Move in-flight `ChannelMonitorUpdate`s to `ChannelManager`
authorMatt Corallo <git@bluematt.me>
Mon, 19 Jun 2023 06:26:39 +0000 (06:26 +0000)
committerMatt Corallo <git@bluematt.me>
Fri, 23 Jun 2023 19:25:40 +0000 (19:25 +0000)
Because `ChannelMonitorUpdate`s can be generated for a
channel which is already closed, and must still be tracked
through their completion, storing them in a `Channel`
doesn't make sense - we'd have to have a redundant place to
put them post-closure and handle both storage locations
equivalently.

Instead, here, we move to storing in-flight
`ChannelMonitorUpdate`s to the `ChannelManager`, leaving
blocked `ChannelMonitorUpdate`s in the `Channel` as they
were.

lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs
lightning/src/util/ser.rs

index 26b70f286257a447ca0487819833c710303370df..d68108b3155dc6e03988eb4be6be2e9b662f4331 100644 (file)
@@ -2264,7 +2264,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
        }
 
        pub fn get_update_fulfill_htlc_and_commit<L: Deref>(&mut self, htlc_id: u64, payment_preimage: PaymentPreimage, logger: &L) -> UpdateFulfillCommitFetch where L::Target: Logger {
-               let release_cs_monitor = self.context.pending_monitor_updates.iter().all(|upd| !upd.blocked);
+               let release_cs_monitor = self.context.pending_monitor_updates.is_empty();
                match self.get_update_fulfill_htlc(htlc_id, payment_preimage, logger) {
                        UpdateFulfillFetch::NewClaim { mut monitor_update, htlc_value_msat, msg } => {
                                // Even if we aren't supposed to let new monitor updates with commitment state
@@ -2272,26 +2272,17 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
                                // matter what. Sadly, to push a new monitor update which flies before others
                                // already queued, we have to insert it into the pending queue and update the
                                // update_ids of all the following monitors.
-                               let unblocked_update_pos = if release_cs_monitor && msg.is_some() {
+                               if release_cs_monitor && msg.is_some() {
                                        let mut additional_update = self.build_commitment_no_status_check(logger);
                                        // build_commitment_no_status_check may bump latest_monitor_id but we want them
                                        // to be strictly increasing by one, so decrement it here.
                                        self.context.latest_monitor_update_id = monitor_update.update_id;
                                        monitor_update.updates.append(&mut additional_update.updates);
-                                       self.context.pending_monitor_updates.push(PendingChannelMonitorUpdate {
-                                               update: monitor_update, blocked: false,
-                                       });
-                                       self.context.pending_monitor_updates.len() - 1
                                } else {
-                                       let insert_pos = self.context.pending_monitor_updates.iter().position(|upd| upd.blocked)
-                                               .unwrap_or(self.context.pending_monitor_updates.len());
-                                       let new_mon_id = self.context.pending_monitor_updates.get(insert_pos)
+                                       let new_mon_id = self.context.pending_monitor_updates.get(0)
                                                .map(|upd| upd.update.update_id).unwrap_or(monitor_update.update_id);
                                        monitor_update.update_id = new_mon_id;
-                                       self.context.pending_monitor_updates.insert(insert_pos, PendingChannelMonitorUpdate {
-                                               update: monitor_update, blocked: false,
-                                       });
-                                       for held_update in self.context.pending_monitor_updates.iter_mut().skip(insert_pos + 1) {
+                                       for held_update in self.context.pending_monitor_updates.iter_mut() {
                                                held_update.update.update_id += 1;
                                        }
                                        if msg.is_some() {
@@ -2301,14 +2292,10 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
                                                        update, blocked: true,
                                                });
                                        }
-                                       insert_pos
-                               };
-                               self.monitor_updating_paused(false, msg.is_some(), false, Vec::new(), Vec::new(), Vec::new());
-                               UpdateFulfillCommitFetch::NewClaim {
-                                       monitor_update: self.context.pending_monitor_updates.get(unblocked_update_pos)
-                                               .expect("We just pushed the monitor update").update.clone(),
-                                       htlc_value_msat,
                                }
+
+                               self.monitor_updating_paused(false, msg.is_some(), false, Vec::new(), Vec::new(), Vec::new());
+                               UpdateFulfillCommitFetch::NewClaim { monitor_update, htlc_value_msat, }
                        },
                        UpdateFulfillFetch::DuplicateClaim {} => UpdateFulfillCommitFetch::DuplicateClaim {},
                }
@@ -3349,8 +3336,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
                }
 
                match self.free_holding_cell_htlcs(logger) {
-                       (Some(_), htlcs_to_fail) => {
-                               let mut additional_update = self.context.pending_monitor_updates.pop().unwrap().update;
+                       (Some(mut additional_update), htlcs_to_fail) => {
                                // free_holding_cell_htlcs may bump latest_monitor_id multiple times but we want them to be
                                // strictly increasing by one, so decrement it here.
                                self.context.latest_monitor_update_id = monitor_update.update_id;
@@ -3566,12 +3552,9 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
        {
                assert_eq!(self.context.channel_state & ChannelState::MonitorUpdateInProgress as u32, ChannelState::MonitorUpdateInProgress as u32);
                self.context.channel_state &= !(ChannelState::MonitorUpdateInProgress as u32);
-               let mut found_blocked = false;
-               self.context.pending_monitor_updates.retain(|upd| {
-                       if found_blocked { debug_assert!(upd.blocked, "No mons may be unblocked after a blocked one"); }
-                       if upd.blocked { found_blocked = true; }
-                       upd.blocked
-               });
+               for upd in self.context.pending_monitor_updates.iter() {
+                       debug_assert!(upd.blocked);
+               }
 
                // If we're past (or at) the FundingSent stage on an outbound channel, try to
                // (re-)broadcast the funding transaction as we may have declined to broadcast it when we
@@ -4439,48 +4422,31 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
        /// Returns the next blocked monitor update, if one exists, and a bool which indicates a
        /// further blocked monitor update exists after the next.
        pub fn unblock_next_blocked_monitor_update(&mut self) -> Option<(ChannelMonitorUpdate, bool)> {
-               for i in 0..self.context.pending_monitor_updates.len() {
-                       if self.context.pending_monitor_updates[i].blocked {
-                               self.context.pending_monitor_updates[i].blocked = false;
-                               return Some((self.context.pending_monitor_updates[i].update.clone(),
-                                       self.context.pending_monitor_updates.len() > i + 1));
-                       }
+               for upd in self.context.pending_monitor_updates.iter() {
+                       debug_assert!(upd.blocked);
                }
-               None
+               if self.context.pending_monitor_updates.is_empty() { return None; }
+               Some((self.context.pending_monitor_updates.remove(0).update,
+                       !self.context.pending_monitor_updates.is_empty()))
        }
 
        /// Pushes a new monitor update into our monitor update queue, returning it if it should be
        /// immediately given to the user for persisting or `None` if it should be held as blocked.
        fn push_ret_blockable_mon_update(&mut self, update: ChannelMonitorUpdate)
        -> Option<ChannelMonitorUpdate> {
-               let release_monitor = self.context.pending_monitor_updates.iter().all(|upd| !upd.blocked);
-               self.context.pending_monitor_updates.push(PendingChannelMonitorUpdate {
-                       update, blocked: !release_monitor,
-               });
-               if release_monitor { self.context.pending_monitor_updates.last().map(|upd| upd.update.clone()) } else { None }
-       }
-
-       pub fn no_monitor_updates_pending(&self) -> bool {
-               self.context.pending_monitor_updates.is_empty()
-       }
-
-       pub fn complete_all_mon_updates_through(&mut self, update_id: u64) {
-               self.context.pending_monitor_updates.retain(|upd| {
-                       if upd.update.update_id <= update_id {
-                               assert!(!upd.blocked, "Completed update must have flown");
-                               false
-                       } else { true }
-               });
-       }
-
-       pub fn complete_one_mon_update(&mut self, update_id: u64) {
-               self.context.pending_monitor_updates.retain(|upd| upd.update.update_id != update_id);
+               let release_monitor = self.context.pending_monitor_updates.is_empty();
+               if !release_monitor {
+                       self.context.pending_monitor_updates.push(PendingChannelMonitorUpdate {
+                               update, blocked: true,
+                       });
+                       None
+               } else {
+                       Some(update)
+               }
        }
 
-       /// Returns an iterator over all unblocked monitor updates which have not yet completed.
-       pub fn uncompleted_unblocked_mon_updates(&self) -> impl Iterator<Item=&ChannelMonitorUpdate> {
-               self.context.pending_monitor_updates.iter()
-                       .filter_map(|upd| if upd.blocked { None } else { Some(&upd.update) })
+       pub fn blocked_monitor_updates_pending(&self) -> usize {
+               self.context.pending_monitor_updates.len()
        }
 
        /// Returns true if the channel is awaiting the persistence of the initial ChannelMonitor.
index f51de289b9424e875407c0e1633f7ae78892f82a..0aca388eaa3d2e0f06debd95312ac0f3507fac04 100644 (file)
@@ -633,6 +633,13 @@ pub(super) struct PeerState<Signer: ChannelSigner> {
        /// Messages to send to the peer - pushed to in the same lock that they are generated in (except
        /// for broadcast messages, where ordering isn't as strict).
        pub(super) pending_msg_events: Vec<MessageSendEvent>,
+       /// Map from Channel IDs to pending [`ChannelMonitorUpdate`]s which have been passed to the
+       /// user but which have not yet completed.
+       ///
+       /// Note that the channel may no longer exist. For example if the channel was closed but we
+       /// later needed to claim an HTLC which is pending on-chain, we may generate a monitor update
+       /// for a missing channel.
+       in_flight_monitor_updates: BTreeMap<OutPoint, Vec<ChannelMonitorUpdate>>,
        /// Map from a specific channel to some action(s) that should be taken when all pending
        /// [`ChannelMonitorUpdate`]s for the channel complete updating.
        ///
@@ -668,6 +675,7 @@ impl <Signer: ChannelSigner> PeerState<Signer> {
                        return false
                }
                self.channel_by_id.is_empty() && self.monitor_update_blocked_actions.is_empty()
+                       && self.in_flight_monitor_updates.is_empty()
        }
 
        // Returns a count of all channels we have with this peer, including pending channels.
@@ -1860,7 +1868,7 @@ macro_rules! handle_monitor_update_completion {
 }
 
 macro_rules! handle_new_monitor_update {
-       ($self: ident, $update_res: expr, $update_id: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING_INITIAL_MONITOR, $remove: expr) => { {
+       ($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, _internal, $remove: expr, $completed: expr) => { {
                // update_maps_on_chan_removal needs to be able to take id_to_peer, so make sure we can in
                // any case so that it won't deadlock.
                debug_assert_ne!($self.id_to_peer.held_by_thread(), LockHeldState::HeldByThread);
@@ -1885,20 +1893,40 @@ macro_rules! handle_new_monitor_update {
                                res
                        },
                        ChannelMonitorUpdateStatus::Completed => {
-                               $chan.complete_one_mon_update($update_id);
-                               if $chan.no_monitor_updates_pending() {
-                                       handle_monitor_update_completion!($self, $update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan);
-                               }
+                               $completed;
                                Ok(true)
                        },
                }
        } };
+       ($self: ident, $update_res: expr, $update_id: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING_INITIAL_MONITOR, $remove: expr) => {
+               handle_new_monitor_update!($self, $update_res, $peer_state_lock, $peer_state,
+                       $per_peer_state_lock, $chan, _internal, $remove,
+                       handle_monitor_update_completion!($self, $update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan))
+       };
        ($self: ident, $update_res: expr, $update_id: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan_entry: expr, INITIAL_MONITOR) => {
                handle_new_monitor_update!($self, $update_res, $update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan_entry.get_mut(), MANUALLY_REMOVING_INITIAL_MONITOR, $chan_entry.remove_entry())
        };
        ($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING, $remove: expr) => { {
-               let update_res = $self.chain_monitor.update_channel($funding_txo, &$update);
-               handle_new_monitor_update!($self, update_res, $update.update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan, MANUALLY_REMOVING_INITIAL_MONITOR, $remove)
+               let update_id = $update.update_id;
+               let in_flight_updates = $peer_state.in_flight_monitor_updates.entry($funding_txo)
+                       .or_insert_with(Vec::new);
+               // During startup, we push monitor updates as background events through to here in
+               // order to replay updates that were in-flight when we shut down. Thus, we have to
+               // filter for uniqueness here.
+               let idx = in_flight_updates.iter().position(|upd| upd == &$update)
+                       .unwrap_or_else(|| {
+                               in_flight_updates.push($update);
+                               in_flight_updates.len() - 1
+                       });
+               let update_res = $self.chain_monitor.update_channel($funding_txo, &in_flight_updates[idx]);
+               handle_new_monitor_update!($self, update_res, $peer_state_lock, $peer_state,
+                       $per_peer_state_lock, $chan, _internal, $remove,
+                       {
+                               let _ = in_flight_updates.remove(idx);
+                               if in_flight_updates.is_empty() && $chan.blocked_monitor_updates_pending() == 0 {
+                                       handle_monitor_update_completion!($self, update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan);
+                               }
+                       })
        } };
        ($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan_entry: expr) => {
                handle_new_monitor_update!($self, $funding_txo, $update, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan_entry.get_mut(), MANUALLY_REMOVING, $chan_entry.remove_entry())
@@ -4096,7 +4124,7 @@ where
                                                        match peer_state.channel_by_id.entry(funding_txo.to_channel_id()) {
                                                                hash_map::Entry::Occupied(mut chan) => {
                                                                        updated_chan = true;
-                                                                       handle_new_monitor_update!(self, funding_txo, update,
+                                                                       handle_new_monitor_update!(self, funding_txo, update.clone(),
                                                                                peer_state_lock, peer_state, per_peer_state, chan).map(|_| ())
                                                                },
                                                                hash_map::Entry::Vacant(_) => Ok(()),
@@ -4895,8 +4923,14 @@ where
                                hash_map::Entry::Vacant(_) => return,
                        }
                };
-               log_trace!(self.logger, "ChannelMonitor updated to {}. Current highest is {}",
-                       highest_applied_update_id, channel.get().context.get_latest_monitor_update_id());
+               let remaining_in_flight =
+                       if let Some(pending) = peer_state.in_flight_monitor_updates.get_mut(funding_txo) {
+                               pending.retain(|upd| upd.update_id > highest_applied_update_id);
+                               pending.len()
+                       } else { 0 };
+               log_trace!(self.logger, "ChannelMonitor updated to {}. Current highest is {}. {} pending in-flight updates.",
+                       highest_applied_update_id, channel.get().context.get_latest_monitor_update_id(),
+                       remaining_in_flight);
                if !channel.get().is_awaiting_monitor_update() || channel.get().context.get_latest_monitor_update_id() != highest_applied_update_id {
                        return;
                }
@@ -7029,6 +7063,7 @@ where
                                                inbound_v1_channel_by_id: HashMap::new(),
                                                latest_features: init_msg.features.clone(),
                                                pending_msg_events: Vec::new(),
+                                               in_flight_monitor_updates: BTreeMap::new(),
                                                monitor_update_blocked_actions: BTreeMap::new(),
                                                actions_blocking_raa_monitor_updates: BTreeMap::new(),
                                                is_connected: true,
@@ -7860,6 +7895,16 @@ where
                        pending_claiming_payments = None;
                }
 
+               let mut in_flight_monitor_updates: Option<HashMap<(&PublicKey, &OutPoint), &Vec<ChannelMonitorUpdate>>> = None;
+               for ((counterparty_id, _), peer_state) in per_peer_state.iter().zip(peer_states.iter()) {
+                       for (funding_outpoint, updates) in peer_state.in_flight_monitor_updates.iter() {
+                               if !updates.is_empty() {
+                                       if in_flight_monitor_updates.is_none() { in_flight_monitor_updates = Some(HashMap::new()); }
+                                       in_flight_monitor_updates.as_mut().unwrap().insert((counterparty_id, funding_outpoint), updates);
+                               }
+                       }
+               }
+
                write_tlv_fields!(writer, {
                        (1, pending_outbound_payments_no_retry, required),
                        (2, pending_intercepted_htlcs, option),
@@ -7870,6 +7915,7 @@ where
                        (7, self.fake_scid_rand_bytes, required),
                        (8, if events_not_backwards_compatible { Some(&*events) } else { None }, option),
                        (9, htlc_purposes, vec_type),
+                       (10, in_flight_monitor_updates, option),
                        (11, self.probing_cookie_secret, required),
                        (13, htlc_onion_fields, optional_vec),
                });
@@ -8086,7 +8132,7 @@ where
                let mut id_to_peer = HashMap::with_capacity(cmp::min(channel_count as usize, 128));
                let mut short_to_chan_info = HashMap::with_capacity(cmp::min(channel_count as usize, 128));
                let mut channel_closures = VecDeque::new();
-               let mut pending_background_events = Vec::new();
+               let mut close_background_events = Vec::new();
                for _ in 0..channel_count {
                        let mut channel: Channel<<SP::Target as SignerProvider>::Signer> = Channel::read(reader, (
                                &args.entropy_source, &args.signer_provider, best_block_height, &provided_channel_type_features(&args.default_config)
@@ -8094,17 +8140,7 @@ where
                        let funding_txo = channel.context.get_funding_txo().ok_or(DecodeError::InvalidValue)?;
                        funding_txo_set.insert(funding_txo.clone());
                        if let Some(ref mut monitor) = args.channel_monitors.get_mut(&funding_txo) {
-                               if channel.get_latest_complete_monitor_update_id() > monitor.get_latest_update_id() {
-                                       // If the channel is ahead of the monitor, return InvalidValue:
-                                       log_error!(args.logger, "A ChannelMonitor is stale compared to the current ChannelManager! This indicates a potentially-critical violation of the chain::Watch API!");
-                                       log_error!(args.logger, " The ChannelMonitor for channel {} is at update_id {} but the ChannelManager is at update_id {}.",
-                                               log_bytes!(channel.context.channel_id()), monitor.get_latest_update_id(), channel.get_latest_complete_monitor_update_id());
-                                       log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
-                                       log_error!(args.logger, " client applications must ensure that ChannelMonitor data is always available and the latest to avoid funds loss!");
-                                       log_error!(args.logger, " Without the latest ChannelMonitor we cannot continue without risking funds.");
-                                       log_error!(args.logger, " Please ensure the chain::Watch API requirements are met and file a bug report at https://github.com/lightningdevkit/rust-lightning");
-                                       return Err(DecodeError::InvalidValue);
-                               } else if channel.get_cur_holder_commitment_transaction_number() > monitor.get_cur_holder_commitment_number() ||
+                               if channel.get_cur_holder_commitment_transaction_number() > monitor.get_cur_holder_commitment_number() ||
                                                channel.get_revoked_counterparty_commitment_transaction_number() > monitor.get_min_seen_secret() ||
                                                channel.get_cur_counterparty_commitment_transaction_number() > monitor.get_cur_counterparty_commitment_number() ||
                                                channel.context.get_latest_monitor_update_id() < monitor.get_latest_update_id() {
@@ -8115,7 +8151,7 @@ where
                                                log_bytes!(channel.context.channel_id()), monitor.get_latest_update_id(), channel.context.get_latest_monitor_update_id());
                                        let (monitor_update, mut new_failed_htlcs) = channel.context.force_shutdown(true);
                                        if let Some((counterparty_node_id, funding_txo, update)) = monitor_update {
-                                               pending_background_events.push(BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+                                               close_background_events.push(BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
                                                        counterparty_node_id, funding_txo, update
                                                });
                                        }
@@ -8148,7 +8184,6 @@ where
                                        log_info!(args.logger, "Successfully loaded channel {} at update_id {} against monitor at update id {}",
                                                log_bytes!(channel.context.channel_id()), channel.context.get_latest_monitor_update_id(),
                                                monitor.get_latest_update_id());
-                                       channel.complete_all_mon_updates_through(monitor.get_latest_update_id());
                                        if let Some(short_channel_id) = channel.context.get_short_channel_id() {
                                                short_to_chan_info.insert(short_channel_id, (channel.context.get_counterparty_node_id(), channel.context.channel_id()));
                                        }
@@ -8195,7 +8230,7 @@ where
                                        update_id: CLOSED_CHANNEL_UPDATE_ID,
                                        updates: vec![ChannelMonitorUpdateStep::ChannelForceClosed { should_broadcast: true }],
                                };
-                               pending_background_events.push(BackgroundEvent::ClosingMonitorUpdateRegeneratedOnStartup((*funding_txo, monitor_update)));
+                               close_background_events.push(BackgroundEvent::ClosingMonitorUpdateRegeneratedOnStartup((*funding_txo, monitor_update)));
                        }
                }
 
@@ -8224,20 +8259,27 @@ where
                        claimable_htlcs_list.push((payment_hash, previous_hops));
                }
 
-               let peer_count: u64 = Readable::read(reader)?;
-               let mut per_peer_state = HashMap::with_capacity(cmp::min(peer_count as usize, MAX_ALLOC_SIZE/mem::size_of::<(PublicKey, Mutex<PeerState<<SP::Target as SignerProvider>::Signer>>)>()));
-               for _ in 0..peer_count {
-                       let peer_pubkey = Readable::read(reader)?;
-                       let peer_state = PeerState {
-                               channel_by_id: peer_channels.remove(&peer_pubkey).unwrap_or(HashMap::new()),
+               let peer_state_from_chans = |channel_by_id| {
+                       PeerState {
+                               channel_by_id,
                                outbound_v1_channel_by_id: HashMap::new(),
                                inbound_v1_channel_by_id: HashMap::new(),
-                               latest_features: Readable::read(reader)?,
+                               latest_features: InitFeatures::empty(),
                                pending_msg_events: Vec::new(),
+                               in_flight_monitor_updates: BTreeMap::new(),
                                monitor_update_blocked_actions: BTreeMap::new(),
                                actions_blocking_raa_monitor_updates: BTreeMap::new(),
                                is_connected: false,
-                       };
+                       }
+               };
+
+               let peer_count: u64 = Readable::read(reader)?;
+               let mut per_peer_state = HashMap::with_capacity(cmp::min(peer_count as usize, MAX_ALLOC_SIZE/mem::size_of::<(PublicKey, Mutex<PeerState<<SP::Target as SignerProvider>::Signer>>)>()));
+               for _ in 0..peer_count {
+                       let peer_pubkey = Readable::read(reader)?;
+                       let peer_chans = peer_channels.remove(&peer_pubkey).unwrap_or(HashMap::new());
+                       let mut peer_state = peer_state_from_chans(peer_chans);
+                       peer_state.latest_features = Readable::read(reader)?;
                        per_peer_state.insert(peer_pubkey, Mutex::new(peer_state));
                }
 
@@ -8265,24 +8307,6 @@ where
                        }
                }
 
-               for (node_id, peer_mtx) in per_peer_state.iter() {
-                       let peer_state = peer_mtx.lock().unwrap();
-                       for (_, chan) in peer_state.channel_by_id.iter() {
-                               for update in chan.uncompleted_unblocked_mon_updates() {
-                                       if let Some(funding_txo) = chan.context.get_funding_txo() {
-                                               log_trace!(args.logger, "Replaying ChannelMonitorUpdate {} for channel {}",
-                                                       update.update_id, log_bytes!(funding_txo.to_channel_id()));
-                                               pending_background_events.push(
-                                                       BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
-                                                               counterparty_node_id: *node_id, funding_txo, update: update.clone(),
-                                                       });
-                                       } else {
-                                               return Err(DecodeError::InvalidValue);
-                                       }
-                               }
-                       }
-               }
-
                let _last_node_announcement_serial: u32 = Readable::read(reader)?; // Only used < 0.0.111
                let highest_seen_timestamp: u32 = Readable::read(reader)?;
 
@@ -8319,6 +8343,7 @@ where
                let mut pending_claiming_payments = Some(HashMap::new());
                let mut monitor_update_blocked_actions_per_peer: Option<Vec<(_, BTreeMap<_, Vec<_>>)>> = Some(Vec::new());
                let mut events_override = None;
+               let mut in_flight_monitor_updates: Option<HashMap<(PublicKey, OutPoint), Vec<ChannelMonitorUpdate>>> = None;
                read_tlv_fields!(reader, {
                        (1, pending_outbound_payments_no_retry, option),
                        (2, pending_intercepted_htlcs, option),
@@ -8329,6 +8354,7 @@ where
                        (7, fake_scid_rand_bytes, option),
                        (8, events_override, option),
                        (9, claimable_htlc_purposes, vec_type),
+                       (10, in_flight_monitor_updates, option),
                        (11, probing_cookie_secret, option),
                        (13, claimable_htlc_onion_fields, optional_vec),
                });
@@ -8362,6 +8388,103 @@ where
                        retry_lock: Mutex::new(())
                };
 
+               // We have to replay (or skip, if they were completed after we wrote the `ChannelManager`)
+               // each `ChannelMonitorUpdate` in `in_flight_monitor_updates`. After doing so, we have to
+               // check that each channel we have isn't newer than the latest `ChannelMonitorUpdate`(s) we
+               // replayed, and for each monitor update we have to replay we have to ensure there's a
+               // `ChannelMonitor` for it.
+               //
+               // In order to do so we first walk all of our live channels (so that we can check their
+               // state immediately after doing the update replays, when we have the `update_id`s
+               // available) and then walk any remaining in-flight updates.
+               //
+               // Because the actual handling of the in-flight updates is the same, it's macro'ized here:
+               let mut pending_background_events = Vec::new();
+               macro_rules! handle_in_flight_updates {
+                       ($counterparty_node_id: expr, $chan_in_flight_upds: expr, $funding_txo: expr,
+                        $monitor: expr, $peer_state: expr, $channel_info_log: expr
+                       ) => { {
+                               let mut max_in_flight_update_id = 0;
+                               $chan_in_flight_upds.retain(|upd| upd.update_id > $monitor.get_latest_update_id());
+                               for update in $chan_in_flight_upds.iter() {
+                                       log_trace!(args.logger, "Replaying ChannelMonitorUpdate {} for {}channel {}",
+                                               update.update_id, $channel_info_log, log_bytes!($funding_txo.to_channel_id()));
+                                       max_in_flight_update_id = cmp::max(max_in_flight_update_id, update.update_id);
+                                       pending_background_events.push(
+                                               BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+                                                       counterparty_node_id: $counterparty_node_id,
+                                                       funding_txo: $funding_txo,
+                                                       update: update.clone(),
+                                               });
+                               }
+                               if $peer_state.in_flight_monitor_updates.insert($funding_txo, $chan_in_flight_upds).is_some() {
+                                       log_error!(args.logger, "Duplicate in-flight monitor update set for the same channel!");
+                                       return Err(DecodeError::InvalidValue);
+                               }
+                               max_in_flight_update_id
+                       } }
+               }
+
+               for (counterparty_id, peer_state_mtx) in per_peer_state.iter_mut() {
+                       let mut peer_state_lock = peer_state_mtx.lock().unwrap();
+                       let peer_state = &mut *peer_state_lock;
+                       for (_, chan) in peer_state.channel_by_id.iter() {
+                               // Channels that were persisted have to be funded, otherwise they should have been
+                               // discarded.
+                               let funding_txo = chan.context.get_funding_txo().ok_or(DecodeError::InvalidValue)?;
+                               let monitor = args.channel_monitors.get(&funding_txo)
+                                       .expect("We already checked for monitor presence when loading channels");
+                               let mut max_in_flight_update_id = monitor.get_latest_update_id();
+                               if let Some(in_flight_upds) = &mut in_flight_monitor_updates {
+                                       if let Some(mut chan_in_flight_upds) = in_flight_upds.remove(&(*counterparty_id, funding_txo)) {
+                                               max_in_flight_update_id = cmp::max(max_in_flight_update_id,
+                                                       handle_in_flight_updates!(*counterparty_id, chan_in_flight_upds,
+                                                               funding_txo, monitor, peer_state, ""));
+                                       }
+                               }
+                               if chan.get_latest_complete_monitor_update_id() > max_in_flight_update_id {
+                                       // If the channel is ahead of the monitor, return InvalidValue:
+                                       log_error!(args.logger, "A ChannelMonitor is stale compared to the current ChannelManager! This indicates a potentially-critical violation of the chain::Watch API!");
+                                       log_error!(args.logger, " The ChannelMonitor for channel {} is at update_id {} with update_id through {} in-flight",
+                                               log_bytes!(chan.context.channel_id()), monitor.get_latest_update_id(), max_in_flight_update_id);
+                                       log_error!(args.logger, " but the ChannelManager is at update_id {}.", chan.get_latest_complete_monitor_update_id());
+                                       log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
+                                       log_error!(args.logger, " client applications must ensure that ChannelMonitor data is always available and the latest to avoid funds loss!");
+                                       log_error!(args.logger, " Without the latest ChannelMonitor we cannot continue without risking funds.");
+                                       log_error!(args.logger, " Please ensure the chain::Watch API requirements are met and file a bug report at https://github.com/lightningdevkit/rust-lightning");
+                                       return Err(DecodeError::InvalidValue);
+                               }
+                       }
+               }
+
+               if let Some(in_flight_upds) = in_flight_monitor_updates {
+                       for ((counterparty_id, funding_txo), mut chan_in_flight_updates) in in_flight_upds {
+                               if let Some(monitor) = args.channel_monitors.get(&funding_txo) {
+                                       // Now that we've removed all the in-flight monitor updates for channels that are
+                                       // still open, we need to replay any monitor updates that are for closed channels,
+                                       // creating the neccessary peer_state entries as we go.
+                                       let peer_state_mutex = per_peer_state.entry(counterparty_id).or_insert_with(|| {
+                                               Mutex::new(peer_state_from_chans(HashMap::new()))
+                                       });
+                                       let mut peer_state = peer_state_mutex.lock().unwrap();
+                                       handle_in_flight_updates!(counterparty_id, chan_in_flight_updates,
+                                               funding_txo, monitor, peer_state, "closed ");
+                               } else {
+                                       log_error!(args.logger, "A ChannelMonitor is missing even though we have in-flight updates for it! This indicates a potentially-critical violation of the chain::Watch API!");
+                                       log_error!(args.logger, " The ChannelMonitor for channel {} is missing.",
+                                               log_bytes!(funding_txo.to_channel_id()));
+                                       log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
+                                       log_error!(args.logger, " client applications must ensure that ChannelMonitor data is always available and the latest to avoid funds loss!");
+                                       log_error!(args.logger, " Without the latest ChannelMonitor we cannot continue without risking funds.");
+                                       log_error!(args.logger, " Please ensure the chain::Watch API requirements are met and file a bug report at https://github.com/lightningdevkit/rust-lightning");
+                                       return Err(DecodeError::InvalidValue);
+                               }
+                       }
+               }
+
+               // Note that we have to do the above replays before we push new monitor updates.
+               pending_background_events.append(&mut close_background_events);
+
                {
                        // If we're tracking pending payments, ensure we haven't lost any by looking at the
                        // ChannelMonitor data for any channels for which we do not have authorative state
index 6d66d353ff568f8958ac72cd383ef951e568822f..0e510760e62a3725055cecf3394a17b5b879af9a 100644 (file)
@@ -848,6 +848,7 @@ impl Readable for Vec<u8> {
 }
 
 impl_for_vec!(ecdsa::Signature);
+impl_for_vec!(crate::chain::channelmonitor::ChannelMonitorUpdate);
 impl_for_vec!(crate::ln::channelmanager::MonitorUpdateCompletionAction);
 impl_for_vec!((A, B), A, B);
 impl_writeable_for_vec!(&crate::routing::router::BlindedTail);