Run monitor update completion actions for pre-startup completion
authorMatt Corallo <git@bluematt.me>
Wed, 5 Jul 2023 02:15:42 +0000 (02:15 +0000)
committerMatt Corallo <git@bluematt.me>
Wed, 12 Jul 2023 20:53:10 +0000 (20:53 +0000)
If a `ChannelMonitorUpdate` completes being persisted, but the
`ChannelManager` isn't informed thereof (or isn't persisted) before
shutdown, on startup we may still have it listed as in-flight. When
we compare the available `ChannelMonitor` with the in-flight set,
we'll notice it completed and remove it, however this may leave
some post-update actions dangling which need to complete.

Here we handle this with a new `BackgroundEvent` indicating we need
to handle any post-update action(s) for a given channel.

lightning/src/ln/chanmon_update_fail_tests.rs
lightning/src/ln/channelmanager.rs

index 707c27908eddabd5efdbabeda8278af225b02950..8d4c40b64d06b303c7aab3b183adbc98dcf8829d 100644 (file)
@@ -2346,6 +2346,7 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
                RecipientOnionFields::secret_only(payment_secret_2), PaymentId(payment_hash_2.0)).unwrap();
        check_added_monitors!(nodes[0], 0);
 
+       let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
        chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
        chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
        nodes[0].node.claim_funds(payment_preimage_0);
@@ -2365,8 +2366,9 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
                // disconnect the peers. Note that the fuzzer originally found this issue because
                // deserializing a ChannelManager in this state causes an assertion failure.
                if reload_a {
-                       let chan_0_monitor_serialized = get_monitor!(nodes[0], chan_id).encode();
                        reload_node!(nodes[0], &nodes[0].node.encode(), &[&chan_0_monitor_serialized], persister, new_chain_monitor, nodes_0_deserialized);
+                       persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+                       persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
                } else {
                        nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
                }
@@ -2406,9 +2408,14 @@ fn do_channel_holding_cell_serialize(disconnect: bool, reload_a: bool) {
                        assert_eq!(pending_cs.commitment_signed, cs);
                } else { panic!(); }
 
-               // There should be no monitor updates as we are still pending awaiting a failed one.
-               check_added_monitors!(nodes[0], 0);
-               check_added_monitors!(nodes[1], 0);
+               if reload_a {
+                       // The two pending monitor updates were replayed (but are still pending).
+                       check_added_monitors(&nodes[0], 2);
+               } else {
+                       // There should be no monitor updates as we are still pending awaiting a failed one.
+                       check_added_monitors(&nodes[0], 0);
+               }
+               check_added_monitors(&nodes[1], 0);
        }
 
        // If we finish updating the monitor, we should free the holding cell right away (this did
index d4d1a926825d3fe89bb560370ba3c068e956156b..b4eed11fc39e9916fa8564bdf4f5799a81fcec2a 100644 (file)
@@ -530,6 +530,13 @@ enum BackgroundEvent {
                funding_txo: OutPoint,
                update: ChannelMonitorUpdate
        },
+       /// Some [`ChannelMonitorUpdate`] (s) completed before we were serialized but we still have
+       /// them marked pending, thus we need to run any [`MonitorUpdateCompletionAction`] (s) pending
+       /// on a channel.
+       MonitorUpdatesComplete {
+               counterparty_node_id: PublicKey,
+               channel_id: [u8; 32],
+       },
 }
 
 #[derive(Debug)]
@@ -4191,6 +4198,22 @@ where
                                        }
                                        let _ = handle_error!(self, res, counterparty_node_id);
                                },
+                               BackgroundEvent::MonitorUpdatesComplete { counterparty_node_id, channel_id } => {
+                                       let per_peer_state = self.per_peer_state.read().unwrap();
+                                       if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
+                                               let mut peer_state_lock = peer_state_mutex.lock().unwrap();
+                                               let peer_state = &mut *peer_state_lock;
+                                               if let Some(chan) = peer_state.channel_by_id.get_mut(&channel_id) {
+                                                       handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, chan);
+                                               } else {
+                                                       let update_actions = peer_state.monitor_update_blocked_actions
+                                                               .remove(&channel_id).unwrap_or(Vec::new());
+                                                       mem::drop(peer_state_lock);
+                                                       mem::drop(per_peer_state);
+                                                       self.handle_monitor_update_completion_actions(update_actions);
+                                               }
+                                       }
+                               },
                        }
                }
                NotifyOption::DoPersist
@@ -8518,6 +8541,16 @@ where
                                                        update: update.clone(),
                                                });
                                }
+                               if $chan_in_flight_upds.is_empty() {
+                                       // We had some updates to apply, but it turns out they had completed before we
+                                       // were serialized, we just weren't notified of that. Thus, we may have to run
+                                       // the completion actions for any monitor updates, but otherwise are done.
+                                       pending_background_events.push(
+                                               BackgroundEvent::MonitorUpdatesComplete {
+                                                       counterparty_node_id: $counterparty_node_id,
+                                                       channel_id: $funding_txo.to_channel_id(),
+                                               });
+                               }
                                if $peer_state.in_flight_monitor_updates.insert($funding_txo, $chan_in_flight_upds).is_some() {
                                        log_error!(args.logger, "Duplicate in-flight monitor update set for the same channel!");
                                        return Err(DecodeError::InvalidValue);