Workaround lnd sending funding_locked before channel_reestablish 2021-06-workaround-broken-lnd
authorMatt Corallo <git@bluematt.me>
Wed, 23 Jun 2021 16:39:27 +0000 (16:39 +0000)
committerMatt Corallo <git@bluematt.me>
Mon, 28 Jun 2021 02:05:33 +0000 (02:05 +0000)
lnd has a long-standing bug where, upon reconnection, if the
channel is not yet confirmed they will not send a
channel_reestablish until the channel locks in. Then, they will
send a funding_locked *before* sending the channel_reestablish
(which is clearly a violation of the BOLT specs). We copy
c-lightning's workaround here and simply store the funding_locked
message until we receive a channel_reestablish.

See-also https://github.com/lightningnetwork/lnd/issues/4006

Fixes #963

lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs
lightning/src/ln/functional_tests.rs

index f8c8c15be95ad660f2803125092a6bcac288f8ee..c46330b525a3c183fbfbd158653c9b6efa9abd4c 100644 (file)
@@ -434,6 +434,15 @@ pub(super) struct Channel<Signer: Sign> {
        next_local_commitment_tx_fee_info_cached: Mutex<Option<CommitmentTxInfoCached>>,
        #[cfg(any(test, feature = "fuzztarget"))]
        next_remote_commitment_tx_fee_info_cached: Mutex<Option<CommitmentTxInfoCached>>,
+
+       /// lnd has a long-standing bug where, upon reconnection, if the channel is not yet confirmed
+       /// they will not send a channel_reestablish until the channel locks in. Then, they will send a
+       /// funding_locked *before* sending the channel_reestablish (which is clearly a violation of
+       /// the BOLT specs). We copy c-lightning's workaround here and simply store the funding_locked
+       /// message until we receive a channel_reestablish.
+       ///
+       /// See-also <https://github.com/lightningnetwork/lnd/issues/4006>
+       pub workaround_lnd_bug_4006: Option<msgs::FundingLocked>,
 }
 
 #[cfg(any(test, feature = "fuzztarget"))]
@@ -633,6 +642,8 @@ impl<Signer: Sign> Channel<Signer> {
                        next_local_commitment_tx_fee_info_cached: Mutex::new(None),
                        #[cfg(any(test, feature = "fuzztarget"))]
                        next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
+
+                       workaround_lnd_bug_4006: None,
                })
        }
 
@@ -876,6 +887,8 @@ impl<Signer: Sign> Channel<Signer> {
                        next_local_commitment_tx_fee_info_cached: Mutex::new(None),
                        #[cfg(any(test, feature = "fuzztarget"))]
                        next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
+
+                       workaround_lnd_bug_4006: None,
                };
 
                Ok(chan)
@@ -1691,7 +1704,8 @@ impl<Signer: Sign> Channel<Signer> {
 
        pub fn funding_locked(&mut self, msg: &msgs::FundingLocked) -> Result<(), ChannelError> {
                if self.channel_state & (ChannelState::PeerDisconnected as u32) == ChannelState::PeerDisconnected as u32 {
-                       return Err(ChannelError::Close("Peer sent funding_locked when we needed a channel_reestablish".to_owned()));
+                       self.workaround_lnd_bug_4006 = Some(msg.clone());
+                       return Err(ChannelError::Ignore("Peer sent funding_locked when we needed a channel_reestablish. The peer is likely lnd, see https://github.com/lightningnetwork/lnd/issues/4006".to_owned()));
                }
 
                let non_shutdown_state = self.channel_state & (!MULTI_STATE_FLAGS);
@@ -4863,6 +4877,8 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel<Signer>
                        next_local_commitment_tx_fee_info_cached: Mutex::new(None),
                        #[cfg(any(test, feature = "fuzztarget"))]
                        next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
+
+                       workaround_lnd_bug_4006: None,
                })
        }
 }
index ee266849b8bd8d813cf3757279ebacc5018dc60c..dc472b52f34883de434072c8e7625894188fc246 100644 (file)
@@ -3365,7 +3365,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
        }
 
        fn internal_channel_reestablish(&self, counterparty_node_id: &PublicKey, msg: &msgs::ChannelReestablish) -> Result<(), MsgHandleErrInternal> {
-               let (htlcs_failed_forward, chan_restoration_res) = {
+               let (htlcs_failed_forward, need_lnd_workaround, chan_restoration_res) = {
                        let mut channel_state_lock = self.channel_state.lock().unwrap();
                        let channel_state = &mut *channel_state_lock;
 
@@ -3386,13 +3386,19 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                                                        msg,
                                                });
                                        }
-                                       (htlcs_failed_forward, handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked))
+                                       let need_lnd_workaround = chan.get_mut().workaround_lnd_bug_4006.take();
+                                       (htlcs_failed_forward, need_lnd_workaround,
+                                               handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked))
                                },
                                hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel".to_owned(), msg.channel_id))
                        }
                };
                post_handle_chan_restoration!(self, chan_restoration_res);
                self.fail_holding_cell_htlcs(htlcs_failed_forward, msg.channel_id);
+
+               if let Some(funding_locked_msg) = need_lnd_workaround {
+                       self.internal_funding_locked(counterparty_node_id, &funding_locked_msg)?;
+               }
                Ok(())
        }
 
index 6298f654965f2ddf1fbea336b366315186246646..0471cd081182af4e0fad028ca5d044e02b0fcc95 100644 (file)
@@ -3605,15 +3605,20 @@ fn test_simple_peer_disconnect() {
        fail_payment(&nodes[0], &vec!(&nodes[1], &nodes[2]), payment_hash_6);
 }
 
-fn do_test_drop_messages_peer_disconnect(messages_delivered: u8) {
+fn do_test_drop_messages_peer_disconnect(messages_delivered: u8, simulate_broken_lnd: bool) {
        // Test that we can reconnect when in-flight HTLC updates get dropped
        let chanmon_cfgs = create_chanmon_cfgs(2);
        let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
        let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
        let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+       let mut as_funding_locked = None;
        if messages_delivered == 0 {
-               create_chan_between_nodes_with_value_a(&nodes[0], &nodes[1], 100000, 10001, InitFeatures::known(), InitFeatures::known());
+               let (funding_locked, _, _) = create_chan_between_nodes_with_value_a(&nodes[0], &nodes[1], 100000, 10001, InitFeatures::known(), InitFeatures::known());
+               as_funding_locked = Some(funding_locked);
                // nodes[1] doesn't receive the funding_locked message (it'll be re-sent on reconnect)
+               // Note that we store it so that if we're running with `simulate_broken_lnd` we can deliver
+               // it before the channel_reestablish message.
        } else {
                create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
        }
@@ -3668,6 +3673,17 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8) {
        nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
        nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
        if messages_delivered < 3 {
+               if simulate_broken_lnd {
+                       // lnd has a long-standing bug where they send a funding_locked prior to a
+                       // channel_reestablish if you reconnect prior to funding_locked time.
+                       //
+                       // Here we simulate that behavior, delivering a funding_locked immediately on
+                       // reconnect. Note that we don't bother skipping the now-duplicate funding_locked sent
+                       // in `reconnect_nodes` but we currently don't fail based on that.
+                       //
+                       // See-also <https://github.com/lightningnetwork/lnd/issues/4006>
+                       nodes[1].node.handle_funding_locked(&nodes[0].node.get_our_node_id(), &as_funding_locked.as_ref().unwrap().0);
+               }
                // Even if the funding_locked messages get exchanged, as long as nothing further was
                // received on either side, both sides will need to resend them.
                reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 1), (0, 0), (0, 0), (0, 0), (false, false));
@@ -3811,17 +3827,18 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8) {
 
 #[test]
 fn test_drop_messages_peer_disconnect_a() {
-       do_test_drop_messages_peer_disconnect(0);
-       do_test_drop_messages_peer_disconnect(1);
-       do_test_drop_messages_peer_disconnect(2);
-       do_test_drop_messages_peer_disconnect(3);
+       do_test_drop_messages_peer_disconnect(0, true);
+       do_test_drop_messages_peer_disconnect(0, false);
+       do_test_drop_messages_peer_disconnect(1, false);
+       do_test_drop_messages_peer_disconnect(2, false);
 }
 
 #[test]
 fn test_drop_messages_peer_disconnect_b() {
-       do_test_drop_messages_peer_disconnect(4);
-       do_test_drop_messages_peer_disconnect(5);
-       do_test_drop_messages_peer_disconnect(6);
+       do_test_drop_messages_peer_disconnect(3, false);
+       do_test_drop_messages_peer_disconnect(4, false);
+       do_test_drop_messages_peer_disconnect(5, false);
+       do_test_drop_messages_peer_disconnect(6, false);
 }
 
 #[test]