From bee42b1659c5989516e5d76501b85b9ff970c647 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Sat, 20 Aug 2022 01:03:27 +0000
Subject: [PATCH] Handle async initial ChannelMonitor persistence failing on
 restart

If the initial ChannelMonitor persistence is done asynchronously
but does not complete before the node restarts (with a
ChannelManager persistence), we'll start back up with a channel
present but no corresponding ChannelMonitor.

Because the Channel is pending-monitor-update and has not yet
broadcasted its initial funding transaction or sent channel_ready,
this is not a violation of our API contract nor a safety violation.
However, the previous code would refuse to deserialize the
ChannelManager treating it as an API contract violation.

The solution is to test for this case explicitly and drop the
channel entirely as if the peer disconnected before we received
the funding_signed for outbound channels or before sending the
channel_ready for inbound channels.
---
 lightning/src/ln/chanmon_update_fail_tests.rs | 218 ++++++++++++++++++
 lightning/src/ln/channel.rs                   |  36 +++
 lightning/src/ln/channelmanager.rs            |  10 +
 lightning/src/util/events.rs                  |  12 +-
 4 files changed, 274 insertions(+), 2 deletions(-)

diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
index 15d46b046..ef7efdd22 100644
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -2745,3 +2745,221 @@ fn double_temp_error() {
 	commitment_signed_dance!(nodes[0], nodes[1], commitment_signed_b2, false);
 	expect_payment_sent!(nodes[0], payment_preimage_2);
 }
+
+fn do_test_outbound_reload_without_init_mon(use_0conf: bool) {
+	// Test that if the monitor update generated in funding_signed is stored async and we restart
+	// with the latest ChannelManager but the ChannelMonitor persistence never completed we happily
+	// drop the channel and move on.
+	let chanmon_cfgs = create_chanmon_cfgs(2);
+	let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+
+	let persister: test_utils::TestPersister;
+	let new_chain_monitor: test_utils::TestChainMonitor;
+	let nodes_0_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+
+	let mut chan_config = test_default_channel_config();
+	chan_config.manually_accept_inbound_channels = true;
+	chan_config.channel_handshake_limits.trust_own_funding_0conf = true;
+
+	let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(chan_config), Some(chan_config)]);
+	let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+	nodes[0].node.create_channel(nodes[1].node.get_our_node_id(), 100000, 10001, 43, None).unwrap();
+	nodes[1].node.handle_open_channel(&nodes[0].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[0], MessageSendEvent::SendOpenChannel, nodes[1].node.get_our_node_id()));
+
+	let events = nodes[1].node.get_and_clear_pending_events();
+	assert_eq!(events.len(), 1);
+	match events[0] {
+		Event::OpenChannelRequest { temporary_channel_id, .. } => {
+			if use_0conf {
+				nodes[1].node.accept_inbound_channel_from_trusted_peer_0conf(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+			} else {
+				nodes[1].node.accept_inbound_channel(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+			}
+		},
+		_ => panic!("Unexpected event"),
+	};
+
+	nodes[0].node.handle_accept_channel(&nodes[1].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[1], MessageSendEvent::SendAcceptChannel, nodes[0].node.get_our_node_id()));
+
+	let (temporary_channel_id, funding_tx, ..) = create_funding_transaction(&nodes[0], &nodes[1].node.get_our_node_id(), 100000, 43);
+
+	nodes[0].node.funding_transaction_generated(&temporary_channel_id, &nodes[1].node.get_our_node_id(), funding_tx.clone()).unwrap();
+	check_added_monitors!(nodes[0], 0);
+
+	let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
+	nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
+	check_added_monitors!(nodes[1], 1);
+
+	let bs_signed_locked = nodes[1].node.get_and_clear_pending_msg_events();
+	assert_eq!(bs_signed_locked.len(), if use_0conf { 2 } else { 1 });
+	match &bs_signed_locked[0] {
+		MessageSendEvent::SendFundingSigned { msg, .. } => {
+			chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+
+			nodes[0].node.handle_funding_signed(&nodes[1].node.get_our_node_id(), &msg);
+			check_added_monitors!(nodes[0], 1);
+		}
+		_ => panic!("Unexpected event"),
+	}
+	if use_0conf {
+		match &bs_signed_locked[1] {
+			MessageSendEvent::SendChannelReady { msg, .. } => {
+				nodes[0].node.handle_channel_ready(&nodes[1].node.get_our_node_id(), &msg);
+			}
+			_ => panic!("Unexpected event"),
+		}
+	}
+
+	assert!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().is_empty());
+	assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+	assert!(nodes[0].node.get_and_clear_pending_events().is_empty());
+
+	// nodes[0] is now waiting on the first ChannelMonitor persistence to complete in order to
+	// broadcast the funding transaction. If nodes[0] restarts at this point with the
+	// ChannelMonitor lost, we should simply discard the channel.
+
+	// The test framework checks that watched_txn/outputs match the monitor set, which they will
+	// not, so we have to clear them here.
+	nodes[0].chain_source.watched_txn.lock().unwrap().clear();
+	nodes[0].chain_source.watched_outputs.lock().unwrap().clear();
+
+	let nodes_0_serialized = nodes[0].node.encode();
+	persister = test_utils::TestPersister::new();
+	let keys_manager = &chanmon_cfgs[0].keys_manager;
+	new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[0].chain_source), nodes[0].tx_broadcaster.clone(), nodes[0].logger, node_cfgs[0].fee_estimator, &persister, keys_manager);
+	nodes[0].chain_monitor = &new_chain_monitor;
+
+	let mut nodes_0_read = &nodes_0_serialized[..];
+	let config = UserConfig::default();
+	nodes_0_deserialized = {
+		<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_0_read, ChannelManagerReadArgs {
+			default_config: config,
+			keys_manager,
+			fee_estimator: node_cfgs[0].fee_estimator,
+			chain_monitor: nodes[0].chain_monitor,
+			tx_broadcaster: nodes[0].tx_broadcaster.clone(),
+			logger: nodes[0].logger,
+			channel_monitors: HashMap::new(),
+		}).unwrap().1
+	};
+	nodes[0].node = &nodes_0_deserialized;
+	assert!(nodes_0_read.is_empty());
+
+	check_closed_event!(nodes[0], 1, ClosureReason::DisconnectedPeer);
+	assert!(nodes[0].node.list_channels().is_empty());
+}
+
+#[test]
+fn test_outbound_reload_without_init_mon() {
+	do_test_outbound_reload_without_init_mon(true);
+	do_test_outbound_reload_without_init_mon(false);
+}
+
+fn do_test_inbound_reload_without_init_mon(use_0conf: bool, lock_commitment: bool) {
+	// Test that if the monitor update generated by funding_transaction_generated is stored async
+	// and we restart with the latest ChannelManager but the ChannelMonitor persistence never
+	// completed we happily drop the channel and move on.
+	let chanmon_cfgs = create_chanmon_cfgs(2);
+	let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+
+	let persister: test_utils::TestPersister;
+	let new_chain_monitor: test_utils::TestChainMonitor;
+	let nodes_1_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+
+	let mut chan_config = test_default_channel_config();
+	chan_config.manually_accept_inbound_channels = true;
+	chan_config.channel_handshake_limits.trust_own_funding_0conf = true;
+
+	let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(chan_config), Some(chan_config)]);
+	let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+	nodes[0].node.create_channel(nodes[1].node.get_our_node_id(), 100000, 10001, 43, None).unwrap();
+	nodes[1].node.handle_open_channel(&nodes[0].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[0], MessageSendEvent::SendOpenChannel, nodes[1].node.get_our_node_id()));
+
+	let events = nodes[1].node.get_and_clear_pending_events();
+	assert_eq!(events.len(), 1);
+	match events[0] {
+		Event::OpenChannelRequest { temporary_channel_id, .. } => {
+			if use_0conf {
+				nodes[1].node.accept_inbound_channel_from_trusted_peer_0conf(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+			} else {
+				nodes[1].node.accept_inbound_channel(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+			}
+		},
+		_ => panic!("Unexpected event"),
+	};
+
+	nodes[0].node.handle_accept_channel(&nodes[1].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[1], MessageSendEvent::SendAcceptChannel, nodes[0].node.get_our_node_id()));
+
+	let (temporary_channel_id, funding_tx, ..) = create_funding_transaction(&nodes[0], &nodes[1].node.get_our_node_id(), 100000, 43);
+
+	nodes[0].node.funding_transaction_generated(&temporary_channel_id, &nodes[1].node.get_our_node_id(), funding_tx.clone()).unwrap();
+	check_added_monitors!(nodes[0], 0);
+
+	let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
+	chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+	nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
+	check_added_monitors!(nodes[1], 1);
+
+	// nodes[1] happily sends its funding_signed even though its awaiting the persistence of the
+	// initial ChannelMonitor, but it will decline to send its channel_ready even if the funding
+	// transaction is confirmed.
+	let funding_signed_msg = get_event_msg!(nodes[1], MessageSendEvent::SendFundingSigned, nodes[0].node.get_our_node_id());
+
+	nodes[0].node.handle_funding_signed(&nodes[1].node.get_our_node_id(), &funding_signed_msg);
+	check_added_monitors!(nodes[0], 1);
+
+	let as_funding_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+	if lock_commitment {
+		confirm_transaction(&nodes[0], &as_funding_tx[0]);
+		confirm_transaction(&nodes[1], &as_funding_tx[0]);
+	}
+	if use_0conf || lock_commitment {
+		let as_ready = get_event_msg!(nodes[0], MessageSendEvent::SendChannelReady, nodes[1].node.get_our_node_id());
+		nodes[1].node.handle_channel_ready(&nodes[0].node.get_our_node_id(), &as_ready);
+	}
+	assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+
+	// nodes[1] is now waiting on the first ChannelMonitor persistence to complete in order to
+	// move the channel to ready (or is waiting on the funding transaction to confirm). If nodes[1]
+	// restarts at this point with the ChannelMonitor lost, we should simply discard the channel.
+
+	// The test framework checks that watched_txn/outputs match the monitor set, which they will
+	// not, so we have to clear them here.
+	nodes[1].chain_source.watched_txn.lock().unwrap().clear();
+	nodes[1].chain_source.watched_outputs.lock().unwrap().clear();
+
+	let nodes_1_serialized = nodes[1].node.encode();
+	persister = test_utils::TestPersister::new();
+	let keys_manager = &chanmon_cfgs[1].keys_manager;
+	new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[1].chain_source), nodes[1].tx_broadcaster.clone(), nodes[1].logger, node_cfgs[1].fee_estimator, &persister, keys_manager);
+	nodes[1].chain_monitor = &new_chain_monitor;
+
+	let mut nodes_1_read = &nodes_1_serialized[..];
+	let config = UserConfig::default();
+	nodes_1_deserialized = {
+		<(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_1_read, ChannelManagerReadArgs {
+			default_config: config,
+			keys_manager,
+			fee_estimator: node_cfgs[1].fee_estimator,
+			chain_monitor: nodes[1].chain_monitor,
+			tx_broadcaster: nodes[1].tx_broadcaster.clone(),
+			logger: nodes[1].logger,
+			channel_monitors: HashMap::new(),
+		}).unwrap().1
+	};
+	nodes[1].node = &nodes_1_deserialized;
+	assert!(nodes_1_read.is_empty());
+
+	check_closed_event!(nodes[1], 1, ClosureReason::DisconnectedPeer);
+	assert!(nodes[1].node.list_channels().is_empty());
+}
+
+#[test]
+fn test_inbound_reload_without_init_mon() {
+	do_test_inbound_reload_without_init_mon(true, true);
+	do_test_inbound_reload_without_init_mon(true, false);
+	do_test_inbound_reload_without_init_mon(false, true);
+	do_test_inbound_reload_without_init_mon(false, false);
+}
diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index 48a701e3b..55732d704 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -4788,6 +4788,42 @@ impl<Signer: Sign> Channel<Signer> {
 		self.channel_state >= ChannelState::FundingSent as u32
 	}
 
+	/// Returns true if the channel is awaiting the persistence of the initial ChannelMonitor.
+	/// If the channel is outbound, this implies we have not yet broadcasted the funding
+	/// transaction. If the channel is inbound, this implies simply that the channel has not
+	/// advanced state.
+	pub fn is_awaiting_initial_mon_persist(&self) -> bool {
+		if !self.is_awaiting_monitor_update() { return false; }
+		if self.channel_state &
+			!(ChannelState::TheirChannelReady as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)
+				== ChannelState::FundingSent as u32 {
+			// If we're not a 0conf channel, we'll be waiting on a monitor update with only
+			// FundingSent set, though our peer could have sent their channel_ready.
+			debug_assert!(self.minimum_depth.unwrap_or(1) > 0);
+			return true;
+		}
+		if self.cur_holder_commitment_transaction_number == INITIAL_COMMITMENT_NUMBER - 1 &&
+			self.cur_counterparty_commitment_transaction_number == INITIAL_COMMITMENT_NUMBER - 1 {
+			// If we're a 0-conf channel, we'll move beyond FundingSent immediately even while
+			// waiting for the initial monitor persistence. Thus, we check if our commitment
+			// transaction numbers have both been iterated only exactly once (for the
+			// funding_signed), and we're awaiting monitor update.
+			//
+			// If we got here, we shouldn't have yet broadcasted the funding transaction (as the
+			// only way to get an awaiting-monitor-update state during initial funding is if the
+			// initial monitor persistence is still pending).
+			//
+			// Because deciding we're awaiting initial broadcast spuriously could result in
+			// funds-loss (as we don't have a monitor, but have the funding transaction confirmed),
+			// we hard-assert here, even in production builds.
+			if self.is_outbound() { assert!(self.funding_transaction.is_some()); }
+			assert!(self.monitor_pending_channel_ready);
+			assert_eq!(self.latest_monitor_update_id, 0);
+			return true;
+		}
+		false
+	}
+
 	/// Returns true if our channel_ready has been sent
 	pub fn is_our_channel_ready(&self) -> bool {
 		(self.channel_state & ChannelState::OurChannelReady as u32) != 0 || self.channel_state >= ChannelState::ChannelFunded as u32
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index f234ad9f9..8f0f3811b 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -6921,6 +6921,16 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
 					}
 					by_id.insert(channel.channel_id(), channel);
 				}
+			} else if channel.is_awaiting_initial_mon_persist() {
+				// If we were persisted and shut down while the initial ChannelMonitor persistence
+				// was in-progress, we never broadcasted the funding transaction and can still
+				// safely discard the channel.
+				let _ = channel.force_shutdown(false);
+				channel_closures.push(events::Event::ChannelClosed {
+					channel_id: channel.channel_id(),
+					user_channel_id: channel.get_user_id(),
+					reason: ClosureReason::DisconnectedPeer,
+				});
 			} else {
 				log_error!(args.logger, "Missing ChannelMonitor for channel {} needed by ChannelManager.", log_bytes!(channel.channel_id()));
 				log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
diff --git a/lightning/src/util/events.rs b/lightning/src/util/events.rs
index 8ddd762e9..d2aa076bd 100644
--- a/lightning/src/util/events.rs
+++ b/lightning/src/util/events.rs
@@ -111,11 +111,19 @@ pub enum ClosureReason {
 	/// The peer disconnected prior to funding completing. In this case the spec mandates that we
 	/// forget the channel entirely - we can attempt again if the peer reconnects.
 	///
+	/// This includes cases where we restarted prior to funding completion, including prior to the
+	/// initial [`ChannelMonitor`] persistence completing.
+	///
 	/// In LDK versions prior to 0.0.107 this could also occur if we were unable to connect to the
 	/// peer because of mutual incompatibility between us and our channel counterparty.
+	///
+	/// [`ChannelMonitor`]: crate::chain::channelmonitor::ChannelMonitor
 	DisconnectedPeer,
-	/// Closure generated from `ChannelManager::read` if the ChannelMonitor is newer than
-	/// the ChannelManager deserialized.
+	/// Closure generated from `ChannelManager::read` if the [`ChannelMonitor`] is newer than
+	/// the [`ChannelManager`] deserialized.
+	///
+	/// [`ChannelMonitor`]: crate::chain::channelmonitor::ChannelMonitor
+	/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
 	OutdatedChannelManager
 }
 
-- 
2.39.5