Work around LND bug 6039
authorMatt Corallo <git@bluematt.me>
Thu, 17 Aug 2023 22:34:24 +0000 (22:34 +0000)
committerMatt Corallo <git@bluematt.me>
Thu, 17 Aug 2023 22:47:01 +0000 (22:47 +0000)
LND hasn't properly handled shutdown messages ever, and
force-closes any time we send one while HTLCs are still present.
The issue is tracked at
https://github.com/lightningnetwork/lnd/issues/6039 and has had
multiple patches to fix it but none so far have managed to land
upstream. The issue appears to be very low priority for the LND
team despite being marked "P1".

We're not going to bother handling this in a sensible way, instead
simply repeated the Shutdown message on repeat until morale
improves.

lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs
lightning/src/ln/shutdown_tests.rs

index d5cefc943616624ea2dbe1ad2a97e078764cc419..9ab152f5d77a64dd3f54d0a2fd4d5c290e7135e0 100644 (file)
@@ -3811,6 +3811,17 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
                }
        }
 
+       /// Gets the `Shutdown` message we should send our peer on reconnect, if any.
+       pub fn get_outbound_shutdown(&self) -> Option<msgs::Shutdown> {
+               if self.context.channel_state & (ChannelState::LocalShutdownSent as u32) != 0 {
+                       assert!(self.context.shutdown_scriptpubkey.is_some());
+                       Some(msgs::Shutdown {
+                               channel_id: self.context.channel_id,
+                               scriptpubkey: self.get_closing_scriptpubkey(),
+                       })
+               } else { None }
+       }
+
        /// May panic if some calls other than message-handling calls (which will all Err immediately)
        /// have been called between remove_uncommitted_htlcs_and_mark_paused and this call.
        ///
@@ -3877,13 +3888,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
                self.context.channel_state &= !(ChannelState::PeerDisconnected as u32);
                self.context.sent_message_awaiting_response = None;
 
-               let shutdown_msg = if self.context.channel_state & (ChannelState::LocalShutdownSent as u32) != 0 {
-                       assert!(self.context.shutdown_scriptpubkey.is_some());
-                       Some(msgs::Shutdown {
-                               channel_id: self.context.channel_id,
-                               scriptpubkey: self.get_closing_scriptpubkey(),
-                       })
-               } else { None };
+               let shutdown_msg = self.get_outbound_shutdown();
 
                let announcement_sigs = self.get_announcement_sigs(node_signer, genesis_block_hash, user_config, best_block.height(), logger);
 
index 45fffd5407c9caec711f34cd236d01a0dd5ea83d..9794dc905f4c62a8bea91f71ea56bea56be15678 100644 (file)
@@ -7489,6 +7489,36 @@ where
        fn handle_error(&self, counterparty_node_id: &PublicKey, msg: &msgs::ErrorMessage) {
                let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(self);
 
+               match &msg.data as &str {
+                       "cannot co-op close channel w/ active htlcs"|
+                       "link failed to shutdown" =>
+                       {
+                               // LND hasn't properly handled shutdown messages ever, and force-closes any time we
+                               // send one while HTLCs are still present. The issue is tracked at
+                               // https://github.com/lightningnetwork/lnd/issues/6039 and has had multiple patches
+                               // to fix it but none so far have managed to land upstream. The issue appears to be
+                               // very low priority for the LND team despite being marked "P1".
+                               // We're not going to bother handling this in a sensible way, instead simply
+                               // repeating the Shutdown message on repeat until morale improves.
+                               if msg.channel_id != [0; 32] {
+                                       let per_peer_state = self.per_peer_state.read().unwrap();
+                                       let peer_state_mutex_opt = per_peer_state.get(counterparty_node_id);
+                                       if peer_state_mutex_opt.is_none() { return; }
+                                       let mut peer_state = peer_state_mutex_opt.unwrap().lock().unwrap();
+                                       if let Some(chan) = peer_state.channel_by_id.get(&msg.channel_id) {
+                                               if let Some(msg) = chan.get_outbound_shutdown() {
+                                                       peer_state.pending_msg_events.push(events::MessageSendEvent::SendShutdown {
+                                                               node_id: *counterparty_node_id,
+                                                               msg,
+                                                       });
+                                               }
+                                       }
+                               }
+                               return;
+                       }
+                       _ => {}
+               }
+
                if msg.channel_id == [0; 32] {
                        let channel_ids: Vec<[u8; 32]> = {
                                let per_peer_state = self.per_peer_state.read().unwrap();
index bd4df0af082102e8078787b1d4b642d19301f6cf..23d3a30dc8314fb83bac214c250c3c73abdb487b 100644 (file)
@@ -198,6 +198,55 @@ fn expect_channel_shutdown_state_with_htlc() {
        assert!(nodes[0].node.list_channels().is_empty());
 }
 
+#[test]
+fn test_lnd_bug_6039() {
+       // LND sends a nonsense error message any time it gets a shutdown if there are still HTLCs
+       // pending. We currently swallow that error to work around LND's bug #6039. This test emulates
+       // the LND nonsense and ensures we at least kinda handle it.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+       let chan = create_announced_chan_between_nodes(&nodes, 0, 1);
+
+       let (payment_preimage, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1]], 100_000);
+
+       nodes[0].node.close_channel(&chan.2, &nodes[1].node.get_our_node_id()).unwrap();
+       let node_0_shutdown = get_event_msg!(nodes[0], MessageSendEvent::SendShutdown, nodes[1].node.get_our_node_id());
+       nodes[1].node.handle_shutdown(&nodes[0].node.get_our_node_id(), &node_0_shutdown);
+
+       // Generate an lnd-like error message and check that we respond by simply screaming louder to
+       // see if LND will accept our protocol compliance.
+       let err_msg = msgs::ErrorMessage { channel_id: chan.2, data: "link failed to shutdown".to_string() };
+       nodes[0].node.handle_error(&nodes[1].node.get_our_node_id(), &err_msg);
+       let _node_0_repeated_shutdown = get_event_msg!(nodes[0], MessageSendEvent::SendShutdown, nodes[1].node.get_our_node_id());
+
+       let node_1_shutdown = get_event_msg!(nodes[1], MessageSendEvent::SendShutdown, nodes[0].node.get_our_node_id());
+
+       assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+       assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+
+       claim_payment(&nodes[0], &[&nodes[1]], payment_preimage);
+
+       // Assume that LND will eventually respond to our Shutdown if we clear all the remaining HTLCs
+       nodes[0].node.handle_shutdown(&nodes[1].node.get_our_node_id(), &node_1_shutdown);
+
+       // ClosingSignNegotion process
+       let node_0_closing_signed = get_event_msg!(nodes[0], MessageSendEvent::SendClosingSigned, nodes[1].node.get_our_node_id());
+       nodes[1].node.handle_closing_signed(&nodes[0].node.get_our_node_id(), &node_0_closing_signed);
+       let node_1_closing_signed = get_event_msg!(nodes[1], MessageSendEvent::SendClosingSigned, nodes[0].node.get_our_node_id());
+       nodes[0].node.handle_closing_signed(&nodes[1].node.get_our_node_id(), &node_1_closing_signed);
+       let (_, node_0_2nd_closing_signed) = get_closing_signed_broadcast!(nodes[0].node, nodes[1].node.get_our_node_id());
+       nodes[1].node.handle_closing_signed(&nodes[0].node.get_our_node_id(), &node_0_2nd_closing_signed.unwrap());
+       let (_, node_1_none) = get_closing_signed_broadcast!(nodes[1].node, nodes[0].node.get_our_node_id());
+       assert!(node_1_none.is_none());
+       check_closed_event!(nodes[0], 1, ClosureReason::CooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
+       check_closed_event!(nodes[1], 1, ClosureReason::CooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
+
+       // Shutdown basically removes the channelDetails, testing of shutdowncomplete state unnecessary
+       assert!(nodes[0].node.list_channels().is_empty());
+}
+
 #[test]
 fn expect_channel_shutdown_state_with_force_closure() {
        // Test sending a shutdown prior to channel_ready after funding generation