From c40504a0fc1f651f79c046b50e20f54746851ae0 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 25 Apr 2024 14:30:05 +0000 Subject: [PATCH] Drop completed blocked `ChannelMonitorUpdate`s on startup If a user receives a payment preimage for an outbound payment, the `PaymentSent` event will block any eventual RAA `ChannelMonitorUpdate` from the same channel, assuming it comes in before the event can be processed. If this blocking kicks in, but the flow eventually completes with the RAA `ChannelMonitorUpdate` being persisted, but the `ChannelManager` is only persisted prior to the event being handled, on startup we'll have a fully up-to-date `ChannelMonitor` but a pending, blocked `ChannelMonitorUpdate`. When the `PaymentSent` event is replayed we'll end up trying to apply a redundant `ChannelMonitorUpdate` which will panic. See the test added in this commit for an implementation of this situation. In this commit we fix this issue by simply dropping blocked `ChannelMonitorUpdate`s the same as we do pending ones. --- lightning/src/ln/channel.rs | 20 ++++++++++++++++ lightning/src/ln/channelmanager.rs | 5 ++-- lightning/src/ln/monitor_tests.rs | 37 ++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index ec4f2666..0608db10 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -5179,6 +5179,26 @@ impl Channel where } } + /// On startup, its possible we detect some monitor updates have actually completed (and the + /// ChannelManager was simply stale). In that case, we should simply drop them, which we do + /// here after logging them. + pub fn on_startup_drop_completed_blocked_mon_updates_through(&mut self, logger: &L, loaded_mon_update_id: u64) { + let channel_id = self.context.channel_id(); + self.context.blocked_monitor_updates.retain(|update| { + if update.update.update_id <= loaded_mon_update_id { + log_info!( + logger, + "Dropping completed ChannelMonitorUpdate id {} on channel {} due to a stale ChannelManager", + update.update.update_id, + channel_id, + ); + false + } else { + true + } + }); + } + pub fn blocked_monitor_updates_pending(&self) -> usize { self.context.blocked_monitor_updates.len() } diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 3dbbc978..9d807f85 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -10360,9 +10360,10 @@ where } } } else { - log_info!(logger, "Successfully loaded channel {} at update_id {} against monitor at update id {}", + channel.on_startup_drop_completed_blocked_mon_updates_through(&logger, monitor.get_latest_update_id()); + log_info!(logger, "Successfully loaded channel {} at update_id {} against monitor at update id {} with {} blocked updates", &channel.context.channel_id(), channel.context.get_latest_monitor_update_id(), - monitor.get_latest_update_id()); + monitor.get_latest_update_id(), channel.blocked_monitor_updates_pending()); if let Some(short_channel_id) = channel.context.get_short_channel_id() { short_to_chan_info.insert(short_channel_id, (channel.context.get_counterparty_node_id(), channel.context.channel_id())); } diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs index b6270181..26bec030 100644 --- a/lightning/src/ln/monitor_tests.rs +++ b/lightning/src/ln/monitor_tests.rs @@ -2821,3 +2821,40 @@ fn test_monitor_claims_with_random_signatures() { do_test_monitor_claims_with_random_signatures(true, false); do_test_monitor_claims_with_random_signatures(true, true); } + +#[test] +fn test_event_replay_causing_monitor_replay() { + // In LDK 0.0.121 there was a bug where if a `PaymentSent` event caused an RAA + // `ChannelMonitorUpdate` hold and then the node was restarted after the `PaymentSent` event + // and `ChannelMonitorUpdate` both completed but without persisting the `ChannelManager` we'd + // replay the `ChannelMonitorUpdate` on restart (which is fine, but triggered a safety panic). + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let persister; + let new_chain_monitor; + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let node_deserialized; + let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + + let chan = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 500_000_000); + + let payment_preimage = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0; + + do_claim_payment_along_route(&nodes[0], &[&[&nodes[1]]], false, payment_preimage); + + // At this point the `PaymentSent` event has not been processed but the full commitment signed + // dance has completed. + let serialized_channel_manager = nodes[0].node.encode(); + + // Now process the `PaymentSent` to get the final RAA `ChannelMonitorUpdate`, checking that it + // resulted in a `ChannelManager` persistence request. + nodes[0].node.get_and_clear_needs_persistence(); + expect_payment_sent(&nodes[0], payment_preimage, None, true, true /* expected post-event monitor update*/); + assert!(nodes[0].node.get_and_clear_needs_persistence()); + + let serialized_monitor = get_monitor!(nodes[0], chan.2).encode(); + reload_node!(nodes[0], &serialized_channel_manager, &[&serialized_monitor], persister, new_chain_monitor, node_deserialized); + + // Expect the `PaymentSent` to get replayed, this time without the duplicate monitor update + expect_payment_sent(&nodes[0], payment_preimage, None, false, false /* expected post-event monitor update*/); +} -- 2.30.2