From c40504a0fc1f651f79c046b50e20f54746851ae0 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 25 Apr 2024 14:30:05 +0000
Subject: [PATCH] Drop completed blocked `ChannelMonitorUpdate`s on startup

If a user receives a payment preimage for an outbound payment, the
`PaymentSent` event will block any eventual RAA
`ChannelMonitorUpdate` from the same channel, assuming it comes in
before the event can be processed. If this blocking kicks in, but
the flow eventually completes with the RAA `ChannelMonitorUpdate`
being persisted, but the `ChannelManager` is only persisted prior
to the event being handled, on startup we'll have a fully
up-to-date `ChannelMonitor` but a pending, blocked
`ChannelMonitorUpdate`. When the `PaymentSent` event is replayed
we'll end up trying to apply a redundant `ChannelMonitorUpdate`
which will panic.

See the test added in this commit for an implementation of this
situation.

In this commit we fix this issue by simply dropping blocked
`ChannelMonitorUpdate`s the same as we do pending ones.
---
 lightning/src/ln/channel.rs        | 20 ++++++++++++++++
 lightning/src/ln/channelmanager.rs |  5 ++--
 lightning/src/ln/monitor_tests.rs  | 37 ++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 2 deletions(-)
diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index ec4f26664..0608db10d 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -5179,6 +5179,26 @@ impl<SP: Deref> Channel<SP> where
 		}
 	}
 
+	/// On startup, its possible we detect some monitor updates have actually completed (and the
+	/// ChannelManager was simply stale). In that case, we should simply drop them, which we do
+	/// here after logging them.
+	pub fn on_startup_drop_completed_blocked_mon_updates_through<L: Logger>(&mut self, logger: &L, loaded_mon_update_id: u64) {
+		let channel_id = self.context.channel_id();
+		self.context.blocked_monitor_updates.retain(|update| {
+			if update.update.update_id <= loaded_mon_update_id {
+				log_info!(
+					logger,
+					"Dropping completed ChannelMonitorUpdate id {} on channel {} due to a stale ChannelManager",
+					update.update.update_id,
+					channel_id,
+				);
+				false
+			} else {
+				true
+			}
+		});
+	}
+
 	pub fn blocked_monitor_updates_pending(&self) -> usize {
 		self.context.blocked_monitor_updates.len()
 	}
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 3dbbc9784..9d807f854 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -10360,9 +10360,10 @@ where
 						}
 					}
 				} else {
-					log_info!(logger, "Successfully loaded channel {} at update_id {} against monitor at update id {}",
+					channel.on_startup_drop_completed_blocked_mon_updates_through(&logger, monitor.get_latest_update_id());
+					log_info!(logger, "Successfully loaded channel {} at update_id {} against monitor at update id {} with {} blocked updates",
 						&channel.context.channel_id(), channel.context.get_latest_monitor_update_id(),
-						monitor.get_latest_update_id());
+						monitor.get_latest_update_id(), channel.blocked_monitor_updates_pending());
 					if let Some(short_channel_id) = channel.context.get_short_channel_id() {
 						short_to_chan_info.insert(short_channel_id, (channel.context.get_counterparty_node_id(), channel.context.channel_id()));
 					}
diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs
index b62701814..26bec030f 100644
--- a/lightning/src/ln/monitor_tests.rs
+++ b/lightning/src/ln/monitor_tests.rs
@@ -2821,3 +2821,40 @@ fn test_monitor_claims_with_random_signatures() {
 	do_test_monitor_claims_with_random_signatures(true, false);
 	do_test_monitor_claims_with_random_signatures(true, true);
 }
+
+#[test]
+fn test_event_replay_causing_monitor_replay() {
+	// In LDK 0.0.121 there was a bug where if a `PaymentSent` event caused an RAA
+	// `ChannelMonitorUpdate` hold and then the node was restarted after the `PaymentSent` event
+	// and `ChannelMonitorUpdate` both completed but without persisting the `ChannelManager` we'd
+	// replay the `ChannelMonitorUpdate` on restart (which is fine, but triggered a safety panic).
+	let chanmon_cfgs = create_chanmon_cfgs(2);
+	let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+	let persister;
+	let new_chain_monitor;
+	let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+	let node_deserialized;
+	let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+	let chan = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000);
+
+	let payment_preimage = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+
+	do_claim_payment_along_route(&nodes[0], &[&[&nodes[1]]], false, payment_preimage);
+
+	// At this point the `PaymentSent` event has not been processed but the full commitment signed
+	// dance has completed.
+	let serialized_channel_manager = nodes[0].node.encode();
+
+	// Now process the `PaymentSent` to get the final RAA `ChannelMonitorUpdate`, checking that it
+	// resulted in a `ChannelManager` persistence request.
+	nodes[0].node.get_and_clear_needs_persistence();
+	expect_payment_sent(&nodes[0], payment_preimage, None, true, true /* expected post-event monitor update*/);
+	assert!(nodes[0].node.get_and_clear_needs_persistence());
+
+	let serialized_monitor = get_monitor!(nodes[0], chan.2).encode();
+	reload_node!(nodes[0], &serialized_channel_manager, &[&serialized_monitor], persister, new_chain_monitor, node_deserialized);
+
+	// Expect the `PaymentSent` to get replayed, this time without the duplicate monitor update
+	expect_payment_sent(&nodes[0], payment_preimage, None, false, false /* expected post-event monitor update*/);
+}
-- 
2.39.5