From: Matt Corallo <git@bluematt.me>
Date: Mon, 19 Jun 2023 06:26:39 +0000 (+0000)
Subject: Move in-flight `ChannelMonitorUpdate`s to `ChannelManager`
X-Git-Tag: v0.0.116-alpha1~3^2~4
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=4041f0899f86eaf6a0a4576a91918fa54026ac46;p=rust-lightning

Move in-flight `ChannelMonitorUpdate`s to `ChannelManager`

Because `ChannelMonitorUpdate`s can be generated for a
channel which is already closed, and must still be tracked
through their completion, storing them in a `Channel`
doesn't make sense - we'd have to have a redundant place to
put them post-closure and handle both storage locations
equivalently.

Instead, here, we move to storing in-flight
`ChannelMonitorUpdate`s to the `ChannelManager`, leaving
blocked `ChannelMonitorUpdate`s in the `Channel` as they
were.
---

diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs
index 26b70f286..d68108b31 100644
--- a/lightning/src/ln/channel.rs
+++ b/lightning/src/ln/channel.rs
@@ -2264,7 +2264,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
 	}
 
 	pub fn get_update_fulfill_htlc_and_commit<L: Deref>(&mut self, htlc_id: u64, payment_preimage: PaymentPreimage, logger: &L) -> UpdateFulfillCommitFetch where L::Target: Logger {
-		let release_cs_monitor = self.context.pending_monitor_updates.iter().all(|upd| !upd.blocked);
+		let release_cs_monitor = self.context.pending_monitor_updates.is_empty();
 		match self.get_update_fulfill_htlc(htlc_id, payment_preimage, logger) {
 			UpdateFulfillFetch::NewClaim { mut monitor_update, htlc_value_msat, msg } => {
 				// Even if we aren't supposed to let new monitor updates with commitment state
@@ -2272,26 +2272,17 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
 				// matter what. Sadly, to push a new monitor update which flies before others
 				// already queued, we have to insert it into the pending queue and update the
 				// update_ids of all the following monitors.
-				let unblocked_update_pos = if release_cs_monitor && msg.is_some() {
+				if release_cs_monitor && msg.is_some() {
 					let mut additional_update = self.build_commitment_no_status_check(logger);
 					// build_commitment_no_status_check may bump latest_monitor_id but we want them
 					// to be strictly increasing by one, so decrement it here.
 					self.context.latest_monitor_update_id = monitor_update.update_id;
 					monitor_update.updates.append(&mut additional_update.updates);
-					self.context.pending_monitor_updates.push(PendingChannelMonitorUpdate {
-						update: monitor_update, blocked: false,
-					});
-					self.context.pending_monitor_updates.len() - 1
 				} else {
-					let insert_pos = self.context.pending_monitor_updates.iter().position(|upd| upd.blocked)
-						.unwrap_or(self.context.pending_monitor_updates.len());
-					let new_mon_id = self.context.pending_monitor_updates.get(insert_pos)
+					let new_mon_id = self.context.pending_monitor_updates.get(0)
 						.map(|upd| upd.update.update_id).unwrap_or(monitor_update.update_id);
 					monitor_update.update_id = new_mon_id;
-					self.context.pending_monitor_updates.insert(insert_pos, PendingChannelMonitorUpdate {
-						update: monitor_update, blocked: false,
-					});
-					for held_update in self.context.pending_monitor_updates.iter_mut().skip(insert_pos + 1) {
+					for held_update in self.context.pending_monitor_updates.iter_mut() {
 						held_update.update.update_id += 1;
 					}
 					if msg.is_some() {
@@ -2301,14 +2292,10 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
 							update, blocked: true,
 						});
 					}
-					insert_pos
-				};
-				self.monitor_updating_paused(false, msg.is_some(), false, Vec::new(), Vec::new(), Vec::new());
-				UpdateFulfillCommitFetch::NewClaim {
-					monitor_update: self.context.pending_monitor_updates.get(unblocked_update_pos)
-						.expect("We just pushed the monitor update").update.clone(),
-					htlc_value_msat,
 				}
+
+				self.monitor_updating_paused(false, msg.is_some(), false, Vec::new(), Vec::new(), Vec::new());
+				UpdateFulfillCommitFetch::NewClaim { monitor_update, htlc_value_msat, }
 			},
 			UpdateFulfillFetch::DuplicateClaim {} => UpdateFulfillCommitFetch::DuplicateClaim {},
 		}
@@ -3349,8 +3336,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
 		}
 
 		match self.free_holding_cell_htlcs(logger) {
-			(Some(_), htlcs_to_fail) => {
-				let mut additional_update = self.context.pending_monitor_updates.pop().unwrap().update;
+			(Some(mut additional_update), htlcs_to_fail) => {
 				// free_holding_cell_htlcs may bump latest_monitor_id multiple times but we want them to be
 				// strictly increasing by one, so decrement it here.
 				self.context.latest_monitor_update_id = monitor_update.update_id;
@@ -3566,12 +3552,9 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
 	{
 		assert_eq!(self.context.channel_state & ChannelState::MonitorUpdateInProgress as u32, ChannelState::MonitorUpdateInProgress as u32);
 		self.context.channel_state &= !(ChannelState::MonitorUpdateInProgress as u32);
-		let mut found_blocked = false;
-		self.context.pending_monitor_updates.retain(|upd| {
-			if found_blocked { debug_assert!(upd.blocked, "No mons may be unblocked after a blocked one"); }
-			if upd.blocked { found_blocked = true; }
-			upd.blocked
-		});
+		for upd in self.context.pending_monitor_updates.iter() {
+			debug_assert!(upd.blocked);
+		}
 
 		// If we're past (or at) the FundingSent stage on an outbound channel, try to
 		// (re-)broadcast the funding transaction as we may have declined to broadcast it when we
@@ -4439,48 +4422,31 @@ impl<Signer: WriteableEcdsaChannelSigner> Channel<Signer> {
 	/// Returns the next blocked monitor update, if one exists, and a bool which indicates a
 	/// further blocked monitor update exists after the next.
 	pub fn unblock_next_blocked_monitor_update(&mut self) -> Option<(ChannelMonitorUpdate, bool)> {
-		for i in 0..self.context.pending_monitor_updates.len() {
-			if self.context.pending_monitor_updates[i].blocked {
-				self.context.pending_monitor_updates[i].blocked = false;
-				return Some((self.context.pending_monitor_updates[i].update.clone(),
-					self.context.pending_monitor_updates.len() > i + 1));
-			}
+		for upd in self.context.pending_monitor_updates.iter() {
+			debug_assert!(upd.blocked);
 		}
-		None
+		if self.context.pending_monitor_updates.is_empty() { return None; }
+		Some((self.context.pending_monitor_updates.remove(0).update,
+			!self.context.pending_monitor_updates.is_empty()))
 	}
 
 	/// Pushes a new monitor update into our monitor update queue, returning it if it should be
 	/// immediately given to the user for persisting or `None` if it should be held as blocked.
 	fn push_ret_blockable_mon_update(&mut self, update: ChannelMonitorUpdate)
 	-> Option<ChannelMonitorUpdate> {
-		let release_monitor = self.context.pending_monitor_updates.iter().all(|upd| !upd.blocked);
-		self.context.pending_monitor_updates.push(PendingChannelMonitorUpdate {
-			update, blocked: !release_monitor,
-		});
-		if release_monitor { self.context.pending_monitor_updates.last().map(|upd| upd.update.clone()) } else { None }
-	}
-
-	pub fn no_monitor_updates_pending(&self) -> bool {
-		self.context.pending_monitor_updates.is_empty()
-	}
-
-	pub fn complete_all_mon_updates_through(&mut self, update_id: u64) {
-		self.context.pending_monitor_updates.retain(|upd| {
-			if upd.update.update_id <= update_id {
-				assert!(!upd.blocked, "Completed update must have flown");
-				false
-			} else { true }
-		});
-	}
-
-	pub fn complete_one_mon_update(&mut self, update_id: u64) {
-		self.context.pending_monitor_updates.retain(|upd| upd.update.update_id != update_id);
+		let release_monitor = self.context.pending_monitor_updates.is_empty();
+		if !release_monitor {
+			self.context.pending_monitor_updates.push(PendingChannelMonitorUpdate {
+				update, blocked: true,
+			});
+			None
+		} else {
+			Some(update)
+		}
 	}
 
-	/// Returns an iterator over all unblocked monitor updates which have not yet completed.
-	pub fn uncompleted_unblocked_mon_updates(&self) -> impl Iterator<Item=&ChannelMonitorUpdate> {
-		self.context.pending_monitor_updates.iter()
-			.filter_map(|upd| if upd.blocked { None } else { Some(&upd.update) })
+	pub fn blocked_monitor_updates_pending(&self) -> usize {
+		self.context.pending_monitor_updates.len()
 	}
 
 	/// Returns true if the channel is awaiting the persistence of the initial ChannelMonitor.
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index f51de289b..0aca388ea 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -633,6 +633,13 @@ pub(super) struct PeerState<Signer: ChannelSigner> {
 	/// Messages to send to the peer - pushed to in the same lock that they are generated in (except
 	/// for broadcast messages, where ordering isn't as strict).
 	pub(super) pending_msg_events: Vec<MessageSendEvent>,
+	/// Map from Channel IDs to pending [`ChannelMonitorUpdate`]s which have been passed to the
+	/// user but which have not yet completed.
+	///
+	/// Note that the channel may no longer exist. For example if the channel was closed but we
+	/// later needed to claim an HTLC which is pending on-chain, we may generate a monitor update
+	/// for a missing channel.
+	in_flight_monitor_updates: BTreeMap<OutPoint, Vec<ChannelMonitorUpdate>>,
 	/// Map from a specific channel to some action(s) that should be taken when all pending
 	/// [`ChannelMonitorUpdate`]s for the channel complete updating.
 	///
@@ -668,6 +675,7 @@ impl <Signer: ChannelSigner> PeerState<Signer> {
 			return false
 		}
 		self.channel_by_id.is_empty() && self.monitor_update_blocked_actions.is_empty()
+			&& self.in_flight_monitor_updates.is_empty()
 	}
 
 	// Returns a count of all channels we have with this peer, including pending channels.
@@ -1860,7 +1868,7 @@ macro_rules! handle_monitor_update_completion {
 }
 
 macro_rules! handle_new_monitor_update {
-	($self: ident, $update_res: expr, $update_id: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING_INITIAL_MONITOR, $remove: expr) => { {
+	($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, _internal, $remove: expr, $completed: expr) => { {
 		// update_maps_on_chan_removal needs to be able to take id_to_peer, so make sure we can in
 		// any case so that it won't deadlock.
 		debug_assert_ne!($self.id_to_peer.held_by_thread(), LockHeldState::HeldByThread);
@@ -1885,20 +1893,40 @@ macro_rules! handle_new_monitor_update {
 				res
 			},
 			ChannelMonitorUpdateStatus::Completed => {
-				$chan.complete_one_mon_update($update_id);
-				if $chan.no_monitor_updates_pending() {
-					handle_monitor_update_completion!($self, $update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan);
-				}
+				$completed;
 				Ok(true)
 			},
 		}
 	} };
+	($self: ident, $update_res: expr, $update_id: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING_INITIAL_MONITOR, $remove: expr) => {
+		handle_new_monitor_update!($self, $update_res, $peer_state_lock, $peer_state,
+			$per_peer_state_lock, $chan, _internal, $remove,
+			handle_monitor_update_completion!($self, $update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan))
+	};
 	($self: ident, $update_res: expr, $update_id: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan_entry: expr, INITIAL_MONITOR) => {
 		handle_new_monitor_update!($self, $update_res, $update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan_entry.get_mut(), MANUALLY_REMOVING_INITIAL_MONITOR, $chan_entry.remove_entry())
 	};
 	($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING, $remove: expr) => { {
-		let update_res = $self.chain_monitor.update_channel($funding_txo, &$update);
-		handle_new_monitor_update!($self, update_res, $update.update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan, MANUALLY_REMOVING_INITIAL_MONITOR, $remove)
+		let update_id = $update.update_id;
+		let in_flight_updates = $peer_state.in_flight_monitor_updates.entry($funding_txo)
+			.or_insert_with(Vec::new);
+		// During startup, we push monitor updates as background events through to here in
+		// order to replay updates that were in-flight when we shut down. Thus, we have to
+		// filter for uniqueness here.
+		let idx = in_flight_updates.iter().position(|upd| upd == &$update)
+			.unwrap_or_else(|| {
+				in_flight_updates.push($update);
+				in_flight_updates.len() - 1
+			});
+		let update_res = $self.chain_monitor.update_channel($funding_txo, &in_flight_updates[idx]);
+		handle_new_monitor_update!($self, update_res, $peer_state_lock, $peer_state,
+			$per_peer_state_lock, $chan, _internal, $remove,
+			{
+				let _ = in_flight_updates.remove(idx);
+				if in_flight_updates.is_empty() && $chan.blocked_monitor_updates_pending() == 0 {
+					handle_monitor_update_completion!($self, update_id, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan);
+				}
+			})
 	} };
 	($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan_entry: expr) => {
 		handle_new_monitor_update!($self, $funding_txo, $update, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan_entry.get_mut(), MANUALLY_REMOVING, $chan_entry.remove_entry())
@@ -4096,7 +4124,7 @@ where
 							match peer_state.channel_by_id.entry(funding_txo.to_channel_id()) {
 								hash_map::Entry::Occupied(mut chan) => {
 									updated_chan = true;
-									handle_new_monitor_update!(self, funding_txo, update,
+									handle_new_monitor_update!(self, funding_txo, update.clone(),
 										peer_state_lock, peer_state, per_peer_state, chan).map(|_| ())
 								},
 								hash_map::Entry::Vacant(_) => Ok(()),
@@ -4895,8 +4923,14 @@ where
 				hash_map::Entry::Vacant(_) => return,
 			}
 		};
-		log_trace!(self.logger, "ChannelMonitor updated to {}. Current highest is {}",
-			highest_applied_update_id, channel.get().context.get_latest_monitor_update_id());
+		let remaining_in_flight =
+			if let Some(pending) = peer_state.in_flight_monitor_updates.get_mut(funding_txo) {
+				pending.retain(|upd| upd.update_id > highest_applied_update_id);
+				pending.len()
+			} else { 0 };
+		log_trace!(self.logger, "ChannelMonitor updated to {}. Current highest is {}. {} pending in-flight updates.",
+			highest_applied_update_id, channel.get().context.get_latest_monitor_update_id(),
+			remaining_in_flight);
 		if !channel.get().is_awaiting_monitor_update() || channel.get().context.get_latest_monitor_update_id() != highest_applied_update_id {
 			return;
 		}
@@ -7029,6 +7063,7 @@ where
 						inbound_v1_channel_by_id: HashMap::new(),
 						latest_features: init_msg.features.clone(),
 						pending_msg_events: Vec::new(),
+						in_flight_monitor_updates: BTreeMap::new(),
 						monitor_update_blocked_actions: BTreeMap::new(),
 						actions_blocking_raa_monitor_updates: BTreeMap::new(),
 						is_connected: true,
@@ -7860,6 +7895,16 @@ where
 			pending_claiming_payments = None;
 		}
 
+		let mut in_flight_monitor_updates: Option<HashMap<(&PublicKey, &OutPoint), &Vec<ChannelMonitorUpdate>>> = None;
+		for ((counterparty_id, _), peer_state) in per_peer_state.iter().zip(peer_states.iter()) {
+			for (funding_outpoint, updates) in peer_state.in_flight_monitor_updates.iter() {
+				if !updates.is_empty() {
+					if in_flight_monitor_updates.is_none() { in_flight_monitor_updates = Some(HashMap::new()); }
+					in_flight_monitor_updates.as_mut().unwrap().insert((counterparty_id, funding_outpoint), updates);
+				}
+			}
+		}
+
 		write_tlv_fields!(writer, {
 			(1, pending_outbound_payments_no_retry, required),
 			(2, pending_intercepted_htlcs, option),
@@ -7870,6 +7915,7 @@ where
 			(7, self.fake_scid_rand_bytes, required),
 			(8, if events_not_backwards_compatible { Some(&*events) } else { None }, option),
 			(9, htlc_purposes, vec_type),
+			(10, in_flight_monitor_updates, option),
 			(11, self.probing_cookie_secret, required),
 			(13, htlc_onion_fields, optional_vec),
 		});
@@ -8086,7 +8132,7 @@ where
 		let mut id_to_peer = HashMap::with_capacity(cmp::min(channel_count as usize, 128));
 		let mut short_to_chan_info = HashMap::with_capacity(cmp::min(channel_count as usize, 128));
 		let mut channel_closures = VecDeque::new();
-		let mut pending_background_events = Vec::new();
+		let mut close_background_events = Vec::new();
 		for _ in 0..channel_count {
 			let mut channel: Channel<<SP::Target as SignerProvider>::Signer> = Channel::read(reader, (
 				&args.entropy_source, &args.signer_provider, best_block_height, &provided_channel_type_features(&args.default_config)
@@ -8094,17 +8140,7 @@ where
 			let funding_txo = channel.context.get_funding_txo().ok_or(DecodeError::InvalidValue)?;
 			funding_txo_set.insert(funding_txo.clone());
 			if let Some(ref mut monitor) = args.channel_monitors.get_mut(&funding_txo) {
-				if channel.get_latest_complete_monitor_update_id() > monitor.get_latest_update_id() {
-					// If the channel is ahead of the monitor, return InvalidValue:
-					log_error!(args.logger, "A ChannelMonitor is stale compared to the current ChannelManager! This indicates a potentially-critical violation of the chain::Watch API!");
-					log_error!(args.logger, " The ChannelMonitor for channel {} is at update_id {} but the ChannelManager is at update_id {}.",
-						log_bytes!(channel.context.channel_id()), monitor.get_latest_update_id(), channel.get_latest_complete_monitor_update_id());
-					log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
-					log_error!(args.logger, " client applications must ensure that ChannelMonitor data is always available and the latest to avoid funds loss!");
-					log_error!(args.logger, " Without the latest ChannelMonitor we cannot continue without risking funds.");
-					log_error!(args.logger, " Please ensure the chain::Watch API requirements are met and file a bug report at https://github.com/lightningdevkit/rust-lightning");
-					return Err(DecodeError::InvalidValue);
-				} else if channel.get_cur_holder_commitment_transaction_number() > monitor.get_cur_holder_commitment_number() ||
+				if channel.get_cur_holder_commitment_transaction_number() > monitor.get_cur_holder_commitment_number() ||
 						channel.get_revoked_counterparty_commitment_transaction_number() > monitor.get_min_seen_secret() ||
 						channel.get_cur_counterparty_commitment_transaction_number() > monitor.get_cur_counterparty_commitment_number() ||
 						channel.context.get_latest_monitor_update_id() < monitor.get_latest_update_id() {
@@ -8115,7 +8151,7 @@ where
 						log_bytes!(channel.context.channel_id()), monitor.get_latest_update_id(), channel.context.get_latest_monitor_update_id());
 					let (monitor_update, mut new_failed_htlcs) = channel.context.force_shutdown(true);
 					if let Some((counterparty_node_id, funding_txo, update)) = monitor_update {
-						pending_background_events.push(BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+						close_background_events.push(BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
 							counterparty_node_id, funding_txo, update
 						});
 					}
@@ -8148,7 +8184,6 @@ where
 					log_info!(args.logger, "Successfully loaded channel {} at update_id {} against monitor at update id {}",
 						log_bytes!(channel.context.channel_id()), channel.context.get_latest_monitor_update_id(),
 						monitor.get_latest_update_id());
-					channel.complete_all_mon_updates_through(monitor.get_latest_update_id());
 					if let Some(short_channel_id) = channel.context.get_short_channel_id() {
 						short_to_chan_info.insert(short_channel_id, (channel.context.get_counterparty_node_id(), channel.context.channel_id()));
 					}
@@ -8195,7 +8230,7 @@ where
 					update_id: CLOSED_CHANNEL_UPDATE_ID,
 					updates: vec![ChannelMonitorUpdateStep::ChannelForceClosed { should_broadcast: true }],
 				};
-				pending_background_events.push(BackgroundEvent::ClosingMonitorUpdateRegeneratedOnStartup((*funding_txo, monitor_update)));
+				close_background_events.push(BackgroundEvent::ClosingMonitorUpdateRegeneratedOnStartup((*funding_txo, monitor_update)));
 			}
 		}
 
@@ -8224,20 +8259,27 @@ where
 			claimable_htlcs_list.push((payment_hash, previous_hops));
 		}
 
-		let peer_count: u64 = Readable::read(reader)?;
-		let mut per_peer_state = HashMap::with_capacity(cmp::min(peer_count as usize, MAX_ALLOC_SIZE/mem::size_of::<(PublicKey, Mutex<PeerState<<SP::Target as SignerProvider>::Signer>>)>()));
-		for _ in 0..peer_count {
-			let peer_pubkey = Readable::read(reader)?;
-			let peer_state = PeerState {
-				channel_by_id: peer_channels.remove(&peer_pubkey).unwrap_or(HashMap::new()),
+		let peer_state_from_chans = |channel_by_id| {
+			PeerState {
+				channel_by_id,
 				outbound_v1_channel_by_id: HashMap::new(),
 				inbound_v1_channel_by_id: HashMap::new(),
-				latest_features: Readable::read(reader)?,
+				latest_features: InitFeatures::empty(),
 				pending_msg_events: Vec::new(),
+				in_flight_monitor_updates: BTreeMap::new(),
 				monitor_update_blocked_actions: BTreeMap::new(),
 				actions_blocking_raa_monitor_updates: BTreeMap::new(),
 				is_connected: false,
-			};
+			}
+		};
+
+		let peer_count: u64 = Readable::read(reader)?;
+		let mut per_peer_state = HashMap::with_capacity(cmp::min(peer_count as usize, MAX_ALLOC_SIZE/mem::size_of::<(PublicKey, Mutex<PeerState<<SP::Target as SignerProvider>::Signer>>)>()));
+		for _ in 0..peer_count {
+			let peer_pubkey = Readable::read(reader)?;
+			let peer_chans = peer_channels.remove(&peer_pubkey).unwrap_or(HashMap::new());
+			let mut peer_state = peer_state_from_chans(peer_chans);
+			peer_state.latest_features = Readable::read(reader)?;
 			per_peer_state.insert(peer_pubkey, Mutex::new(peer_state));
 		}
 
@@ -8265,24 +8307,6 @@ where
 			}
 		}
 
-		for (node_id, peer_mtx) in per_peer_state.iter() {
-			let peer_state = peer_mtx.lock().unwrap();
-			for (_, chan) in peer_state.channel_by_id.iter() {
-				for update in chan.uncompleted_unblocked_mon_updates() {
-					if let Some(funding_txo) = chan.context.get_funding_txo() {
-						log_trace!(args.logger, "Replaying ChannelMonitorUpdate {} for channel {}",
-							update.update_id, log_bytes!(funding_txo.to_channel_id()));
-						pending_background_events.push(
-							BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
-								counterparty_node_id: *node_id, funding_txo, update: update.clone(),
-							});
-					} else {
-						return Err(DecodeError::InvalidValue);
-					}
-				}
-			}
-		}
-
 		let _last_node_announcement_serial: u32 = Readable::read(reader)?; // Only used < 0.0.111
 		let highest_seen_timestamp: u32 = Readable::read(reader)?;
 
@@ -8319,6 +8343,7 @@ where
 		let mut pending_claiming_payments = Some(HashMap::new());
 		let mut monitor_update_blocked_actions_per_peer: Option<Vec<(_, BTreeMap<_, Vec<_>>)>> = Some(Vec::new());
 		let mut events_override = None;
+		let mut in_flight_monitor_updates: Option<HashMap<(PublicKey, OutPoint), Vec<ChannelMonitorUpdate>>> = None;
 		read_tlv_fields!(reader, {
 			(1, pending_outbound_payments_no_retry, option),
 			(2, pending_intercepted_htlcs, option),
@@ -8329,6 +8354,7 @@ where
 			(7, fake_scid_rand_bytes, option),
 			(8, events_override, option),
 			(9, claimable_htlc_purposes, vec_type),
+			(10, in_flight_monitor_updates, option),
 			(11, probing_cookie_secret, option),
 			(13, claimable_htlc_onion_fields, optional_vec),
 		});
@@ -8362,6 +8388,103 @@ where
 			retry_lock: Mutex::new(())
 		};
 
+		// We have to replay (or skip, if they were completed after we wrote the `ChannelManager`)
+		// each `ChannelMonitorUpdate` in `in_flight_monitor_updates`. After doing so, we have to
+		// check that each channel we have isn't newer than the latest `ChannelMonitorUpdate`(s) we
+		// replayed, and for each monitor update we have to replay we have to ensure there's a
+		// `ChannelMonitor` for it.
+		//
+		// In order to do so we first walk all of our live channels (so that we can check their
+		// state immediately after doing the update replays, when we have the `update_id`s
+		// available) and then walk any remaining in-flight updates.
+		//
+		// Because the actual handling of the in-flight updates is the same, it's macro'ized here:
+		let mut pending_background_events = Vec::new();
+		macro_rules! handle_in_flight_updates {
+			($counterparty_node_id: expr, $chan_in_flight_upds: expr, $funding_txo: expr,
+			 $monitor: expr, $peer_state: expr, $channel_info_log: expr
+			) => { {
+				let mut max_in_flight_update_id = 0;
+				$chan_in_flight_upds.retain(|upd| upd.update_id > $monitor.get_latest_update_id());
+				for update in $chan_in_flight_upds.iter() {
+					log_trace!(args.logger, "Replaying ChannelMonitorUpdate {} for {}channel {}",
+						update.update_id, $channel_info_log, log_bytes!($funding_txo.to_channel_id()));
+					max_in_flight_update_id = cmp::max(max_in_flight_update_id, update.update_id);
+					pending_background_events.push(
+						BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+							counterparty_node_id: $counterparty_node_id,
+							funding_txo: $funding_txo,
+							update: update.clone(),
+						});
+				}
+				if $peer_state.in_flight_monitor_updates.insert($funding_txo, $chan_in_flight_upds).is_some() {
+					log_error!(args.logger, "Duplicate in-flight monitor update set for the same channel!");
+					return Err(DecodeError::InvalidValue);
+				}
+				max_in_flight_update_id
+			} }
+		}
+
+		for (counterparty_id, peer_state_mtx) in per_peer_state.iter_mut() {
+			let mut peer_state_lock = peer_state_mtx.lock().unwrap();
+			let peer_state = &mut *peer_state_lock;
+			for (_, chan) in peer_state.channel_by_id.iter() {
+				// Channels that were persisted have to be funded, otherwise they should have been
+				// discarded.
+				let funding_txo = chan.context.get_funding_txo().ok_or(DecodeError::InvalidValue)?;
+				let monitor = args.channel_monitors.get(&funding_txo)
+					.expect("We already checked for monitor presence when loading channels");
+				let mut max_in_flight_update_id = monitor.get_latest_update_id();
+				if let Some(in_flight_upds) = &mut in_flight_monitor_updates {
+					if let Some(mut chan_in_flight_upds) = in_flight_upds.remove(&(*counterparty_id, funding_txo)) {
+						max_in_flight_update_id = cmp::max(max_in_flight_update_id,
+							handle_in_flight_updates!(*counterparty_id, chan_in_flight_upds,
+								funding_txo, monitor, peer_state, ""));
+					}
+				}
+				if chan.get_latest_complete_monitor_update_id() > max_in_flight_update_id {
+					// If the channel is ahead of the monitor, return InvalidValue:
+					log_error!(args.logger, "A ChannelMonitor is stale compared to the current ChannelManager! This indicates a potentially-critical violation of the chain::Watch API!");
+					log_error!(args.logger, " The ChannelMonitor for channel {} is at update_id {} with update_id through {} in-flight",
+						log_bytes!(chan.context.channel_id()), monitor.get_latest_update_id(), max_in_flight_update_id);
+					log_error!(args.logger, " but the ChannelManager is at update_id {}.", chan.get_latest_complete_monitor_update_id());
+					log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
+					log_error!(args.logger, " client applications must ensure that ChannelMonitor data is always available and the latest to avoid funds loss!");
+					log_error!(args.logger, " Without the latest ChannelMonitor we cannot continue without risking funds.");
+					log_error!(args.logger, " Please ensure the chain::Watch API requirements are met and file a bug report at https://github.com/lightningdevkit/rust-lightning");
+					return Err(DecodeError::InvalidValue);
+				}
+			}
+		}
+
+		if let Some(in_flight_upds) = in_flight_monitor_updates {
+			for ((counterparty_id, funding_txo), mut chan_in_flight_updates) in in_flight_upds {
+				if let Some(monitor) = args.channel_monitors.get(&funding_txo) {
+					// Now that we've removed all the in-flight monitor updates for channels that are
+					// still open, we need to replay any monitor updates that are for closed channels,
+					// creating the neccessary peer_state entries as we go.
+					let peer_state_mutex = per_peer_state.entry(counterparty_id).or_insert_with(|| {
+						Mutex::new(peer_state_from_chans(HashMap::new()))
+					});
+					let mut peer_state = peer_state_mutex.lock().unwrap();
+					handle_in_flight_updates!(counterparty_id, chan_in_flight_updates,
+						funding_txo, monitor, peer_state, "closed ");
+				} else {
+					log_error!(args.logger, "A ChannelMonitor is missing even though we have in-flight updates for it! This indicates a potentially-critical violation of the chain::Watch API!");
+					log_error!(args.logger, " The ChannelMonitor for channel {} is missing.",
+						log_bytes!(funding_txo.to_channel_id()));
+					log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
+					log_error!(args.logger, " client applications must ensure that ChannelMonitor data is always available and the latest to avoid funds loss!");
+					log_error!(args.logger, " Without the latest ChannelMonitor we cannot continue without risking funds.");
+					log_error!(args.logger, " Please ensure the chain::Watch API requirements are met and file a bug report at https://github.com/lightningdevkit/rust-lightning");
+					return Err(DecodeError::InvalidValue);
+				}
+			}
+		}
+
+		// Note that we have to do the above replays before we push new monitor updates.
+		pending_background_events.append(&mut close_background_events);
+
 		{
 			// If we're tracking pending payments, ensure we haven't lost any by looking at the
 			// ChannelMonitor data for any channels for which we do not have authorative state
diff --git a/lightning/src/util/ser.rs b/lightning/src/util/ser.rs
index 6d66d353f..0e510760e 100644
--- a/lightning/src/util/ser.rs
+++ b/lightning/src/util/ser.rs
@@ -848,6 +848,7 @@ impl Readable for Vec<u8> {
 }
 
 impl_for_vec!(ecdsa::Signature);
+impl_for_vec!(crate::chain::channelmonitor::ChannelMonitorUpdate);
 impl_for_vec!(crate::ln::channelmanager::MonitorUpdateCompletionAction);
 impl_for_vec!((A, B), A, B);
 impl_writeable_for_vec!(&crate::routing::router::BlindedTail);