From: Matt Corallo <git@bluematt.me>
Date: Wed, 23 Jun 2021 16:39:27 +0000 (+0000)
Subject: Workaround lnd sending funding_locked before channel_reestablish
X-Git-Tag: v0.0.99~14^2
X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=refs%2Fheads%2F2021-06-workaround-broken-lnd;p=rust-lightning

Workaround lnd sending funding_locked before channel_reestablish

lnd has a long-standing bug where, upon reconnection, if the
channel is not yet confirmed they will not send a
channel_reestablish until the channel locks in. Then, they will
send a funding_locked *before* sending the channel_reestablish
(which is clearly a violation of the BOLT specs). We copy
c-lightning's workaround here and simply store the funding_locked
message until we receive a channel_reestablish.

See-also https://github.com/lightningnetwork/lnd/issues/4006

Fixes #963
---

diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index f8c8c15be..c46330b52 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -434,6 +434,15 @@ pub(super) struct Channel<Signer: Sign> {
 	next_local_commitment_tx_fee_info_cached: Mutex<Option<CommitmentTxInfoCached>>,
 	#[cfg(any(test, feature = "fuzztarget"))]
 	next_remote_commitment_tx_fee_info_cached: Mutex<Option<CommitmentTxInfoCached>>,
+
+	/// lnd has a long-standing bug where, upon reconnection, if the channel is not yet confirmed
+	/// they will not send a channel_reestablish until the channel locks in. Then, they will send a
+	/// funding_locked *before* sending the channel_reestablish (which is clearly a violation of
+	/// the BOLT specs). We copy c-lightning's workaround here and simply store the funding_locked
+	/// message until we receive a channel_reestablish.
+	///
+	/// See-also <https://github.com/lightningnetwork/lnd/issues/4006>
+	pub workaround_lnd_bug_4006: Option<msgs::FundingLocked>,
 }
 
 #[cfg(any(test, feature = "fuzztarget"))]
@@ -633,6 +642,8 @@ impl<Signer: Sign> Channel<Signer> {
 			next_local_commitment_tx_fee_info_cached: Mutex::new(None),
 			#[cfg(any(test, feature = "fuzztarget"))]
 			next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
+
+			workaround_lnd_bug_4006: None,
 		})
 	}
 
@@ -876,6 +887,8 @@ impl<Signer: Sign> Channel<Signer> {
 			next_local_commitment_tx_fee_info_cached: Mutex::new(None),
 			#[cfg(any(test, feature = "fuzztarget"))]
 			next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
+
+			workaround_lnd_bug_4006: None,
 		};
 
 		Ok(chan)
@@ -1691,7 +1704,8 @@ impl<Signer: Sign> Channel<Signer> {
 
 	pub fn funding_locked(&mut self, msg: &msgs::FundingLocked) -> Result<(), ChannelError> {
 		if self.channel_state & (ChannelState::PeerDisconnected as u32) == ChannelState::PeerDisconnected as u32 {
-			return Err(ChannelError::Close("Peer sent funding_locked when we needed a channel_reestablish".to_owned()));
+			self.workaround_lnd_bug_4006 = Some(msg.clone());
+			return Err(ChannelError::Ignore("Peer sent funding_locked when we needed a channel_reestablish. The peer is likely lnd, see https://github.com/lightningnetwork/lnd/issues/4006".to_owned()));
 		}
 
 		let non_shutdown_state = self.channel_state & (!MULTI_STATE_FLAGS);
@@ -4863,6 +4877,8 @@ impl<'a, Signer: Sign, K: Deref> ReadableArgs<&'a K> for Channel<Signer>
 			next_local_commitment_tx_fee_info_cached: Mutex::new(None),
 			#[cfg(any(test, feature = "fuzztarget"))]
 			next_remote_commitment_tx_fee_info_cached: Mutex::new(None),
+
+			workaround_lnd_bug_4006: None,
 		})
 	}
 }
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index ee266849b..dc472b52f 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -3365,7 +3365,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 	}
 
 	fn internal_channel_reestablish(&self, counterparty_node_id: &PublicKey, msg: &msgs::ChannelReestablish) -> Result<(), MsgHandleErrInternal> {
-		let (htlcs_failed_forward, chan_restoration_res) = {
+		let (htlcs_failed_forward, need_lnd_workaround, chan_restoration_res) = {
 			let mut channel_state_lock = self.channel_state.lock().unwrap();
 			let channel_state = &mut *channel_state_lock;
 
@@ -3386,13 +3386,19 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
 							msg,
 						});
 					}
-					(htlcs_failed_forward, handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked))
+					let need_lnd_workaround = chan.get_mut().workaround_lnd_bug_4006.take();
+					(htlcs_failed_forward, need_lnd_workaround,
+						handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked))
 				},
 				hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel".to_owned(), msg.channel_id))
 			}
 		};
 		post_handle_chan_restoration!(self, chan_restoration_res);
 		self.fail_holding_cell_htlcs(htlcs_failed_forward, msg.channel_id);
+
+		if let Some(funding_locked_msg) = need_lnd_workaround {
+			self.internal_funding_locked(counterparty_node_id, &funding_locked_msg)?;
+		}
 		Ok(())
 	}
 
diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs
index 6298f6549..0471cd081 100644
--- a/lightning/src/ln/functional_tests.rs
+++ b/lightning/src/ln/functional_tests.rs
@@ -3605,15 +3605,20 @@ fn test_simple_peer_disconnect() {
 	fail_payment(&nodes[0], &vec!(&nodes[1], &nodes[2]), payment_hash_6);
 }
 
-fn do_test_drop_messages_peer_disconnect(messages_delivered: u8) {
+fn do_test_drop_messages_peer_disconnect(messages_delivered: u8, simulate_broken_lnd: bool) {
 	// Test that we can reconnect when in-flight HTLC updates get dropped
 	let chanmon_cfgs = create_chanmon_cfgs(2);
 	let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
 	let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
 	let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+	let mut as_funding_locked = None;
 	if messages_delivered == 0 {
-		create_chan_between_nodes_with_value_a(&nodes[0], &nodes[1], 100000, 10001, InitFeatures::known(), InitFeatures::known());
+		let (funding_locked, _, _) = create_chan_between_nodes_with_value_a(&nodes[0], &nodes[1], 100000, 10001, InitFeatures::known(), InitFeatures::known());
+		as_funding_locked = Some(funding_locked);
 		// nodes[1] doesn't receive the funding_locked message (it'll be re-sent on reconnect)
+		// Note that we store it so that if we're running with `simulate_broken_lnd` we can deliver
+		// it before the channel_reestablish message.
 	} else {
 		create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
 	}
@@ -3668,6 +3673,17 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8) {
 	nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
 	nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
 	if messages_delivered < 3 {
+		if simulate_broken_lnd {
+			// lnd has a long-standing bug where they send a funding_locked prior to a
+			// channel_reestablish if you reconnect prior to funding_locked time.
+			//
+			// Here we simulate that behavior, delivering a funding_locked immediately on
+			// reconnect. Note that we don't bother skipping the now-duplicate funding_locked sent
+			// in `reconnect_nodes` but we currently don't fail based on that.
+			//
+			// See-also <https://github.com/lightningnetwork/lnd/issues/4006>
+			nodes[1].node.handle_funding_locked(&nodes[0].node.get_our_node_id(), &as_funding_locked.as_ref().unwrap().0);
+		}
 		// Even if the funding_locked messages get exchanged, as long as nothing further was
 		// received on either side, both sides will need to resend them.
 		reconnect_nodes(&nodes[0], &nodes[1], (true, true), (0, 1), (0, 0), (0, 0), (0, 0), (false, false));
@@ -3811,17 +3827,18 @@ fn do_test_drop_messages_peer_disconnect(messages_delivered: u8) {
 
 #[test]
 fn test_drop_messages_peer_disconnect_a() {
-	do_test_drop_messages_peer_disconnect(0);
-	do_test_drop_messages_peer_disconnect(1);
-	do_test_drop_messages_peer_disconnect(2);
-	do_test_drop_messages_peer_disconnect(3);
+	do_test_drop_messages_peer_disconnect(0, true);
+	do_test_drop_messages_peer_disconnect(0, false);
+	do_test_drop_messages_peer_disconnect(1, false);
+	do_test_drop_messages_peer_disconnect(2, false);
 }
 
 #[test]
 fn test_drop_messages_peer_disconnect_b() {
-	do_test_drop_messages_peer_disconnect(4);
-	do_test_drop_messages_peer_disconnect(5);
-	do_test_drop_messages_peer_disconnect(6);
+	do_test_drop_messages_peer_disconnect(3, false);
+	do_test_drop_messages_peer_disconnect(4, false);
+	do_test_drop_messages_peer_disconnect(5, false);
+	do_test_drop_messages_peer_disconnect(6, false);
 }
 
 #[test]