From f9521a4bdaa4cfdaf08f6797d1a84e96475c9f49 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Wed, 5 Jul 2023 02:15:42 +0000
Subject: [PATCH] Run monitor update completion actions for pre-startup
 completion

If a `ChannelMonitorUpdate` completes being persisted, but the
`ChannelManager` isn't informed thereof (or isn't persisted) before
shutdown, on startup we may still have it listed as in-flight. When
we compare the available `ChannelMonitor` with the in-flight set,
we'll notice it completed and remove it, however this may leave
some post-update actions dangling which need to complete.

Here we handle this with a new `BackgroundEvent` indicating we need
to handle any post-update action(s) for a given channel.
---
 lightning/src/ln/chanmon_update_fail_tests.rs | 15 ++++++---
 lightning/src/ln/channelmanager.rs            | 33 +++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
index 707c27908..8d4c40b64 100644
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -2346,6 +2346,7 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
 		RecipientOnionFields::secret_only(payment_secret_2), PaymentId(payment_hash_2.0)).unwrap();
 	check_added_monitors!(nodes[0], 0);
 
+	let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
 	chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
 	chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
 	nodes[0].node.claim_funds(payment_preimage_0);
@@ -2365,8 +2366,9 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
 		// disconnect the peers. Note that the fuzzer originally found this issue because
 		// deserializing a ChannelManager in this state causes an assertion failure.
 		if reload_a {
-			let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
 			reload_node!(nodes[0], &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister, new_chain_monitor, nodes_0_deserialized);
+			persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+			persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
 		} else {
 			nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
 		}
@@ -2406,9 +2408,14 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
 			assert_eq!(pending_cs.commitment_signed, cs);
 		} else { panic!(); }
 
-		// There should be no monitor updates as we are still pending awaiting a failed one.
-		check_added_monitors!(nodes[0], 0);
-		check_added_monitors!(nodes[1], 0);
+		if reload_a {
+			// The two pending monitor updates were replayed (but are still pending).
+			check_added_monitors(&nodes[0], 2);
+		} else {
+			// There should be no monitor updates as we are still pending awaiting a failed one.
+			check_added_monitors(&nodes[0], 0);
+		}
+		check_added_monitors(&nodes[1], 0);
 	}
 
 	// If we finish updating the monitor, we should free the holding cell right away (this did
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index d4d1a9268..b4eed11fc 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -530,6 +530,13 @@ enum BackgroundEvent {
 		funding_txo: OutPoint,
 		update: ChannelMonitorUpdate
 	},
+	/// Some [`ChannelMonitorUpdate`] (s) completed before we were serialized but we still have
+	/// them marked pending, thus we need to run any [`MonitorUpdateCompletionAction`] (s) pending
+	/// on a channel.
+	MonitorUpdatesComplete {
+		counterparty_node_id: PublicKey,
+		channel_id: [u8; 32],
+	},
 }
 
 #[derive(Debug)]
@@ -4191,6 +4198,22 @@ where
 					}
 					let _ = handle_error!(self, res, counterparty_node_id);
 				},
+				BackgroundEvent::MonitorUpdatesComplete { counterparty_node_id, channel_id } => {
+					let per_peer_state = self.per_peer_state.read().unwrap();
+					if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
+						let mut peer_state_lock = peer_state_mutex.lock().unwrap();
+						let peer_state = &mut *peer_state_lock;
+						if let Some(chan) = peer_state.channel_by_id.get_mut(&channel_id) {
+							handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, chan);
+						} else {
+							let update_actions = peer_state.monitor_update_blocked_actions
+								.remove(&channel_id).unwrap_or(Vec::new());
+							mem::drop(peer_state_lock);
+							mem::drop(per_peer_state);
+							self.handle_monitor_update_completion_actions(update_actions);
+						}
+					}
+				},
 			}
 		}
 		NotifyOption::DoPersist
@@ -8518,6 +8541,16 @@ where
 							update: update.clone(),
 						});
 				}
+				if $chan_in_flight_upds.is_empty() {
+					// We had some updates to apply, but it turns out they had completed before we
+					// were serialized, we just weren't notified of that. Thus, we may have to run
+					// the completion actions for any monitor updates, but otherwise are done.
+					pending_background_events.push(
+						BackgroundEvent::MonitorUpdatesComplete {
+							counterparty_node_id: $counterparty_node_id,
+							channel_id: $funding_txo.to_channel_id(),
+						});
+				}
 				if $peer_state.in_flight_monitor_updates.insert($funding_txo, $chan_in_flight_upds).is_some() {
 					log_error!(args.logger, "Duplicate in-flight monitor update set for the same channel!");
 					return Err(DecodeError::InvalidValue);
-- 
2.39.5