X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning%2Fsrc%2Fchain%2Fchainmonitor.rs;h=aae260e735bdbae5c0a538af8024e7e2ef2a78cb;hb=f53d13bcb8220b3ce39e51a4d20beb23b3930d1f;hp=9e92264b0425dffe07049d9acaf4c80e75df20bc;hpb=4500270488e6ed918c5f6e07310eb4a384eb6e21;p=rust-lightning

diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs
index 9e92264b..aae260e7 100644
--- a/lightning/src/chain/chainmonitor.rs
+++ b/lightning/src/chain/chainmonitor.rs
@@ -23,54 +23,104 @@
 //! events. The remote server would make use of [`ChainMonitor`] for block processing and for
 //! servicing [`ChannelMonitor`] updates from the client.
 
-use bitcoin::blockdata::block::{Block, BlockHeader};
+use bitcoin::blockdata::block::BlockHeader;
 use bitcoin::hash_types::Txid;
 
 use chain;
 use chain::{ChannelMonitorUpdateErr, Filter, WatchedOutput};
 use chain::chaininterface::{BroadcasterInterface, FeeEstimator};
-use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs};
+use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, LATENCY_GRACE_PERIOD_BLOCKS};
 use chain::transaction::{OutPoint, TransactionData};
 use chain::keysinterface::Sign;
+use util::atomic_counter::AtomicCounter;
 use util::logger::Logger;
+use util::errors::APIError;
 use util::events;
 use util::events::EventHandler;
 use ln::channelmanager::ChannelDetails;
 
 use prelude::*;
-use sync::{RwLock, RwLockReadGuard, Mutex};
+use sync::{RwLock, RwLockReadGuard, Mutex, MutexGuard};
 use core::ops::Deref;
+use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents
+/// entirely opaque.
+enum UpdateOrigin {
+	/// An update that was generated by the `ChannelManager` (via our `chain::Watch`
+	/// implementation). This corresponds to an actual [`ChannelMonitorUpdate::update_id`] field
+	/// and [`ChannelMonitor::get_latest_update_id`].
+	OffChain(u64),
+	/// An update that was generated during blockchain processing. The ID here is specific to the
+	/// generating [`ChainMonitor`] and does *not* correspond to any on-disk IDs.
+	ChainSync(u64),
+}
+
+/// An opaque identifier describing a specific [`Persist`] method call.
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+pub struct MonitorUpdateId {
+	contents: UpdateOrigin,
+}
+
+impl MonitorUpdateId {
+	pub(crate) fn from_monitor_update(update: &ChannelMonitorUpdate) -> Self {
+		Self { contents: UpdateOrigin::OffChain(update.update_id) }
+	}
+	pub(crate) fn from_new_monitor<ChannelSigner: Sign>(monitor: &ChannelMonitor<ChannelSigner>) -> Self {
+		Self { contents: UpdateOrigin::OffChain(monitor.get_latest_update_id()) }
+	}
+}
 
 /// `Persist` defines behavior for persisting channel monitors: this could mean
 /// writing once to disk, and/or uploading to one or more backup services.
 ///
-/// Note that for every new monitor, you **must** persist the new `ChannelMonitor`
-/// to disk/backups. And, on every update, you **must** persist either the
-/// `ChannelMonitorUpdate` or the updated monitor itself. Otherwise, there is risk
-/// of situations such as revoking a transaction, then crashing before this
-/// revocation can be persisted, then unintentionally broadcasting a revoked
-/// transaction and losing money. This is a risk because previous channel states
-/// are toxic, so it's important that whatever channel state is persisted is
-/// kept up-to-date.
+/// Each method can return three possible values:
+///  * If persistence (including any relevant `fsync()` calls) happens immediately, the
+///    implementation should return `Ok(())`, indicating normal channel operation should continue.
+///  * If persistence happens asynchronously, implementations should first ensure the
+///    [`ChannelMonitor`] or [`ChannelMonitorUpdate`] are written durably to disk, and then return
+///    `Err(ChannelMonitorUpdateErr::TemporaryFailure)` while the update continues in the
+///    background. Once the update completes, [`ChainMonitor::channel_monitor_updated`] should be
+///    called with the corresponding [`MonitorUpdateId`].
+///
+///    Note that unlike the direct [`chain::Watch`] interface,
+///    [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
+///
+///  * If persistence fails for some reason, implementations should return
+///    `Err(ChannelMonitorUpdateErr::PermanentFailure)`, in which case the channel will likely be
+///    closed without broadcasting the latest state. See
+///    [`ChannelMonitorUpdateErr::PermanentFailure`] for more details.
 pub trait Persist<ChannelSigner: Sign> {
-	/// Persist a new channel's data. The data can be stored any way you want, but
-	/// the identifier provided by Rust-Lightning is the channel's outpoint (and
-	/// it is up to you to maintain a correct mapping between the outpoint and the
-	/// stored channel data). Note that you **must** persist every new monitor to
-	/// disk. See the `Persist` trait documentation for more details.
+	/// Persist a new channel's data in response to a [`chain::Watch::watch_channel`] call. This is
+	/// called by [`ChannelManager`] for new channels, or may be called directly, e.g. on startup.
+	///
+	/// The data can be stored any way you want, but the identifier provided by LDK is the
+	/// channel's outpoint (and it is up to you to maintain a correct mapping between the outpoint
+	/// and the stored channel data). Note that you **must** persist every new monitor to disk.
+	///
+	/// The `update_id` is used to identify this call to [`ChainMonitor::channel_monitor_updated`],
+	/// if you return [`ChannelMonitorUpdateErr::TemporaryFailure`].
 	///
 	/// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor`
 	/// and [`ChannelMonitorUpdateErr`] for requirements when returning errors.
 	///
+	/// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
 	/// [`Writeable::write`]: crate::util::ser::Writeable::write
-	fn persist_new_channel(&self, id: OutPoint, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
+	fn persist_new_channel(&self, channel_id: OutPoint, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 
-	/// Update one channel's data. The provided `ChannelMonitor` has already
-	/// applied the given update.
+	/// Update one channel's data. The provided [`ChannelMonitor`] has already applied the given
+	/// update.
 	///
-	/// Note that on every update, you **must** persist either the
-	/// `ChannelMonitorUpdate` or the updated monitor itself to disk/backups. See
-	/// the `Persist` trait documentation for more details.
+	/// Note that on every update, you **must** persist either the [`ChannelMonitorUpdate`] or the
+	/// updated monitor itself to disk/backups. See the [`Persist`] trait documentation for more
+	/// details.
+	///
+	/// During blockchain synchronization operations, this may be called with no
+	/// [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted.
+	/// Note that after the full [`ChannelMonitor`] is persisted any previous
+	/// [`ChannelMonitorUpdate`]s which were persisted should be discarded - they can no longer be
+	/// applied to the persisted [`ChannelMonitor`] as they were already applied.
 	///
 	/// If an implementer chooses to persist the updates only, they need to make
 	/// sure that all the updates are applied to the `ChannelMonitors` *before*
@@ -84,16 +134,61 @@ pub trait Persist<ChannelSigner: Sign> {
 	/// them in batches. The size of each monitor grows `O(number of state updates)`
 	/// whereas updates are small and `O(1)`.
 	///
+	/// The `update_id` is used to identify this call to [`ChainMonitor::channel_monitor_updated`],
+	/// if you return [`ChannelMonitorUpdateErr::TemporaryFailure`].
+	///
 	/// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor`,
 	/// [`Writeable::write`] on [`ChannelMonitorUpdate`] for writing out an update, and
 	/// [`ChannelMonitorUpdateErr`] for requirements when returning errors.
 	///
 	/// [`Writeable::write`]: crate::util::ser::Writeable::write
-	fn update_persisted_channel(&self, id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>) -> Result<(), ChannelMonitorUpdateErr>;
+	fn update_persisted_channel(&self, channel_id: OutPoint, update: &Option<ChannelMonitorUpdate>, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 }
 
 struct MonitorHolder<ChannelSigner: Sign> {
 	monitor: ChannelMonitor<ChannelSigner>,
+	/// The full set of pending monitor updates for this Channel.
+	///
+	/// Note that this lock must be held during updates to prevent a race where we call
+	/// update_persisted_channel, the user returns a TemporaryFailure, and then calls
+	/// channel_monitor_updated immediately, racing our insertion of the pending update into the
+	/// contained Vec.
+	///
+	/// Beyond the synchronization of updates themselves, we cannot handle user events until after
+	/// any chain updates have been stored on disk. Thus, we scan this list when returning updates
+	/// to the ChannelManager, refusing to return any updates for a ChannelMonitor which is still
+	/// being persisted fully to disk after a chain update.
+	///
+	/// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor
+	/// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping
+	/// the pending payment entry, and then reloading before the monitor is persisted, resulting in
+	/// the ChannelManager re-adding the same payment entry, before the same block is replayed,
+	/// resulting in a duplicate PaymentSent event.
+	pending_monitor_updates: Mutex<Vec<MonitorUpdateId>>,
+	/// When the user returns a PermanentFailure error from an update_persisted_channel call during
+	/// block processing, we inform the ChannelManager that the channel should be closed
+	/// asynchronously. In order to ensure no further changes happen before the ChannelManager has
+	/// processed the closure event, we set this to true and return PermanentFailure for any other
+	/// chain::Watch events.
+	channel_perm_failed: AtomicBool,
+	/// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present
+	/// in `pending_monitor_updates`.
+	/// If it's been more than [`LATENCY_GRACE_PERIOD_BLOCKS`] since we started waiting on a chain
+	/// sync event, we let monitor events return to `ChannelManager` because we cannot hold them up
+	/// forever or we'll end up with HTLC preimages waiting to feed back into an upstream channel
+	/// forever, risking funds loss.
+	last_chain_persist_height: AtomicUsize,
+}
+
+impl<ChannelSigner: Sign> MonitorHolder<ChannelSigner> {
+	fn has_pending_offchain_updates(&self, pending_monitor_updates_lock: &MutexGuard<Vec<MonitorUpdateId>>) -> bool {
+		pending_monitor_updates_lock.iter().any(|update_id|
+			if let UpdateOrigin::OffChain(_) = update_id.contents { true } else { false })
+	}
+	fn has_pending_chainsync_updates(&self, pending_monitor_updates_lock: &MutexGuard<Vec<MonitorUpdateId>>) -> bool {
+		pending_monitor_updates_lock.iter().any(|update_id|
+			if let UpdateOrigin::ChainSync(_) = update_id.contents { true } else { false })
+	}
 }
 
 /// A read-only reference to a current ChannelMonitor.
@@ -129,12 +224,20 @@ pub struct ChainMonitor<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: De
         P::Target: Persist<ChannelSigner>,
 {
 	monitors: RwLock<HashMap<OutPoint, MonitorHolder<ChannelSigner>>>,
+	/// When we generate a [`MonitorUpdateId`] for a chain-event monitor persistence, we need a
+	/// unique ID, which we calculate by simply getting the next value from this counter. Note that
+	/// the ID is never persisted so it's ok that they reset on restart.
+	sync_persistence_id: AtomicCounter,
 	chain_source: Option<C>,
 	broadcaster: T,
 	logger: L,
 	fee_estimator: F,
 	persister: P,
+	/// "User-provided" (ie persistence-completion/-failed) [`MonitorEvent`]s. These came directly
+	/// from the user and not from a [`ChannelMonitor`].
 	pending_monitor_events: Mutex<Vec<MonitorEvent>>,
+	/// The best block height seen, used as a proxy for the passage of time.
+	highest_chain_height: AtomicUsize,
 }
 
 impl<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> ChainMonitor<ChannelSigner, C, T, F, L, P>
@@ -153,31 +256,75 @@ where C::Target: chain::Filter,
 	/// calls must not exclude any transactions matching the new outputs nor any in-block
 	/// descendants of such transactions. It is not necessary to re-fetch the block to obtain
 	/// updated `txdata`.
-	fn process_chain_data<FN>(&self, header: &BlockHeader, txdata: &TransactionData, process: FN)
+	///
+	/// Calls which represent a new blockchain tip height should set `best_height`.
+	fn process_chain_data<FN>(&self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData, process: FN)
 	where
 		FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
 	{
 		let mut dependent_txdata = Vec::new();
-		let monitor_states = self.monitors.read().unwrap();
-		for monitor_state in monitor_states.values() {
-			let mut txn_outputs = process(&monitor_state.monitor, txdata);
-
-			// Register any new outputs with the chain source for filtering, storing any dependent
-			// transactions from within the block that previously had not been included in txdata.
-			if let Some(ref chain_source) = self.chain_source {
-				let block_hash = header.block_hash();
-				for (txid, mut outputs) in txn_outputs.drain(..) {
-					for (idx, output) in outputs.drain(..) {
-						// Register any new outputs with the chain source for filtering and recurse
-						// if it indicates that there are dependent transactions within the block
-						// that had not been previously included in txdata.
-						let output = WatchedOutput {
-							block_hash: Some(block_hash),
-							outpoint: OutPoint { txid, index: idx as u16 },
-							script_pubkey: output.script_pubkey,
-						};
-						if let Some(tx) = chain_source.register_output(output) {
-							dependent_txdata.push(tx);
+		{
+			let monitor_states = self.monitors.write().unwrap();
+			if let Some(height) = best_height {
+				// If the best block height is being updated, update highest_chain_height under the
+				// monitors write lock.
+				let old_height = self.highest_chain_height.load(Ordering::Acquire);
+				let new_height = height as usize;
+				if new_height > old_height {
+					self.highest_chain_height.store(new_height, Ordering::Release);
+				}
+			}
+
+			for (funding_outpoint, monitor_state) in monitor_states.iter() {
+				let monitor = &monitor_state.monitor;
+				let mut txn_outputs;
+				{
+					txn_outputs = process(monitor, txdata);
+					let update_id = MonitorUpdateId {
+						contents: UpdateOrigin::ChainSync(self.sync_persistence_id.get_increment()),
+					};
+					let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
+					if let Some(height) = best_height {
+						if !monitor_state.has_pending_chainsync_updates(&pending_monitor_updates) {
+							// If there are not ChainSync persists awaiting completion, go ahead and
+							// set last_chain_persist_height here - we wouldn't want the first
+							// TemporaryFailure to always immediately be considered "overly delayed".
+							monitor_state.last_chain_persist_height.store(height as usize, Ordering::Release);
+						}
+					}
+
+					log_trace!(self.logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor));
+					match self.persister.update_persisted_channel(*funding_outpoint, &None, monitor, update_id) {
+						Ok(()) =>
+							log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
+						Err(ChannelMonitorUpdateErr::PermanentFailure) => {
+							monitor_state.channel_perm_failed.store(true, Ordering::Release);
+							self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateFailed(*funding_outpoint));
+						},
+						Err(ChannelMonitorUpdateErr::TemporaryFailure) => {
+							log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
+							pending_monitor_updates.push(update_id);
+						},
+					}
+				}
+
+				// Register any new outputs with the chain source for filtering, storing any dependent
+				// transactions from within the block that previously had not been included in txdata.
+				if let Some(ref chain_source) = self.chain_source {
+					let block_hash = header.block_hash();
+					for (txid, mut outputs) in txn_outputs.drain(..) {
+						for (idx, output) in outputs.drain(..) {
+							// Register any new outputs with the chain source for filtering and recurse
+							// if it indicates that there are dependent transactions within the block
+							// that had not been previously included in txdata.
+							let output = WatchedOutput {
+								block_hash: Some(block_hash),
+								outpoint: OutPoint { txid, index: idx as u16 },
+								script_pubkey: output.script_pubkey,
+							};
+							if let Some(tx) = chain_source.register_output(output) {
+								dependent_txdata.push(tx);
+							}
 						}
 					}
 				}
@@ -189,7 +336,7 @@ where C::Target: chain::Filter,
 			dependent_txdata.sort_unstable_by_key(|(index, _tx)| *index);
 			dependent_txdata.dedup_by_key(|(index, _tx)| *index);
 			let txdata: Vec<_> = dependent_txdata.iter().map(|(index, tx)| (*index, tx)).collect();
-			self.process_chain_data(header, &txdata, process);
+			self.process_chain_data(header, None, &txdata, process); // We skip the best height the second go-around
 		}
 	}
 
@@ -203,12 +350,14 @@ where C::Target: chain::Filter,
 	pub fn new(chain_source: Option<C>, broadcaster: T, logger: L, feeest: F, persister: P) -> Self {
 		Self {
 			monitors: RwLock::new(HashMap::new()),
+			sync_persistence_id: AtomicCounter::new(),
 			chain_source,
 			broadcaster,
 			logger,
 			fee_estimator: feeest,
 			persister,
 			pending_monitor_events: Mutex::new(Vec::new()),
+			highest_chain_height: AtomicUsize::new(0),
 		}
 	}
 
@@ -267,27 +416,73 @@ where C::Target: chain::Filter,
 	/// Indicates the persistence of a [`ChannelMonitor`] has completed after
 	/// [`ChannelMonitorUpdateErr::TemporaryFailure`] was returned from an update operation.
 	///
-	/// All ChannelMonitor updates up to and including highest_applied_update_id must have been
-	/// fully committed in every copy of the given channels' ChannelMonitors.
-	///
-	/// Note that there is no effect to calling with a highest_applied_update_id other than the
-	/// current latest ChannelMonitorUpdate and one call to this function after multiple
-	/// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field
-	/// exists largely only to prevent races between this and concurrent update_monitor calls.
-	///
 	/// Thus, the anticipated use is, at a high level:
 	///  1) This [`ChainMonitor`] calls [`Persist::update_persisted_channel`] which stores the
 	///     update to disk and begins updating any remote (e.g. watchtower/backup) copies,
 	///     returning [`ChannelMonitorUpdateErr::TemporaryFailure`],
-	///  2) once all remote copies are updated, you call this function with the update_id that
-	///     completed, and once it is the latest the Channel will be re-enabled.
-	pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: u64) {
+	///  2) once all remote copies are updated, you call this function with the
+	///     `completed_update_id` that completed, and once all pending updates have completed the
+	///     channel will be re-enabled.
+	//      Note that we re-enable only after `UpdateOrigin::OffChain` updates complete, we don't
+	//      care about `UpdateOrigin::ChainSync` updates for the channel state being updated. We
+	//      only care about `UpdateOrigin::ChainSync` for returning `MonitorEvent`s.
+	///
+	/// Returns an [`APIError::APIMisuseError`] if `funding_txo` does not match any currently
+	/// registered [`ChannelMonitor`]s.
+	pub fn channel_monitor_updated(&self, funding_txo: OutPoint, completed_update_id: MonitorUpdateId) -> Result<(), APIError> {
+		let monitors = self.monitors.read().unwrap();
+		let monitor_data = if let Some(mon) = monitors.get(&funding_txo) { mon } else {
+			return Err(APIError::APIMisuseError { err: format!("No ChannelMonitor matching funding outpoint {:?} found", funding_txo) });
+		};
+		let mut pending_monitor_updates = monitor_data.pending_monitor_updates.lock().unwrap();
+		pending_monitor_updates.retain(|update_id| *update_id != completed_update_id);
+
+		match completed_update_id {
+			MonitorUpdateId { contents: UpdateOrigin::OffChain(_) } => {
+				// Note that we only check for `UpdateOrigin::OffChain` failures here - if
+				// we're being told that a `UpdateOrigin::OffChain` monitor update completed,
+				// we only care about ensuring we don't tell the `ChannelManager` to restore
+				// the channel to normal operation until all `UpdateOrigin::OffChain` updates
+				// complete.
+				// If there's some `UpdateOrigin::ChainSync` update still pending that's okay
+				// - we can still update our channel state, just as long as we don't return
+				// `MonitorEvent`s from the monitor back to the `ChannelManager` until they
+				// complete.
+				let monitor_is_pending_updates = monitor_data.has_pending_offchain_updates(&pending_monitor_updates);
+				if monitor_is_pending_updates || monitor_data.channel_perm_failed.load(Ordering::Acquire) {
+					// If there are still monitor updates pending (or an old monitor update
+					// finished after a later one perm-failed), we cannot yet construct an
+					// UpdateCompleted event.
+					return Ok(());
+				}
+				self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateCompleted {
+					funding_txo,
+					monitor_update_id: monitor_data.monitor.get_latest_update_id(),
+				});
+			},
+			MonitorUpdateId { contents: UpdateOrigin::ChainSync(_) } => {
+				if !monitor_data.has_pending_chainsync_updates(&pending_monitor_updates) {
+					monitor_data.last_chain_persist_height.store(self.highest_chain_height.load(Ordering::Acquire), Ordering::Release);
+					// The next time release_pending_monitor_events is called, any events for this
+					// ChannelMonitor will be returned.
+				}
+			},
+		}
+		Ok(())
+	}
+
+	/// This wrapper avoids having to update some of our tests for now as they assume the direct
+	/// chain::Watch API wherein we mark a monitor fully-updated by just calling
+	/// channel_monitor_updated once with the highest ID.
+	#[cfg(any(test, fuzzing))]
+	pub fn force_channel_monitor_updated(&self, funding_txo: OutPoint, monitor_update_id: u64) {
 		self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateCompleted {
-			funding_txo, monitor_update_id: highest_applied_update_id
+			funding_txo,
+			monitor_update_id,
 		});
 	}
 
-	#[cfg(any(test, feature = "fuzztarget", feature = "_test_utils"))]
+	#[cfg(any(test, fuzzing, feature = "_test_utils"))]
 	pub fn get_and_clear_pending_events(&self) -> Vec<events::Event> {
 		use util::events::EventsProvider;
 		let events = core::cell::RefCell::new(Vec::new());
@@ -306,11 +501,9 @@ where
 	L::Target: Logger,
 	P::Target: Persist<ChannelSigner>,
 {
-	fn block_connected(&self, block: &Block, height: u32) {
-		let header = &block.header;
-		let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
+	fn filtered_block_connected(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) {
 		log_debug!(self.logger, "New best block {} at height {} provided via block_connected", header.block_hash(), height);
-		self.process_chain_data(header, &txdata, |monitor, txdata| {
+		self.process_chain_data(header, Some(height), &txdata, |monitor, txdata| {
 			monitor.block_connected(
 				header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger)
 		});
@@ -337,7 +530,7 @@ where
 {
 	fn transactions_confirmed(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) {
 		log_debug!(self.logger, "{} provided transactions confirmed at height {} in block {}", txdata.len(), height, header.block_hash());
-		self.process_chain_data(header, txdata, |monitor, txdata| {
+		self.process_chain_data(header, None, txdata, |monitor, txdata| {
 			monitor.transactions_confirmed(
 				header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger)
 		});
@@ -353,7 +546,7 @@ where
 
 	fn best_block_updated(&self, header: &BlockHeader, height: u32) {
 		log_debug!(self.logger, "New best block {} at height {} provided via best_block_updated", header.block_hash(), height);
-		self.process_chain_data(header, &[], |monitor, txdata| {
+		self.process_chain_data(header, Some(height), &[], |monitor, txdata| {
 			// While in practice there shouldn't be any recursive calls when given empty txdata,
 			// it's still possible if a chain::Filter implementation returns a transaction.
 			debug_assert!(txdata.is_empty());
@@ -397,22 +590,29 @@ where C::Target: chain::Filter,
 				return Err(ChannelMonitorUpdateErr::PermanentFailure)},
 			hash_map::Entry::Vacant(e) => e,
 		};
-		let persist_res = self.persister.persist_new_channel(funding_outpoint, &monitor);
+		log_trace!(self.logger, "Got new ChannelMonitor for channel {}", log_funding_info!(monitor));
+		let update_id = MonitorUpdateId::from_new_monitor(&monitor);
+		let mut pending_monitor_updates = Vec::new();
+		let persist_res = self.persister.persist_new_channel(funding_outpoint, &monitor, update_id);
 		if persist_res.is_err() {
-			log_error!(self.logger, "Failed to persist new channel data: {:?}", persist_res);
+			log_error!(self.logger, "Failed to persist new ChannelMonitor for channel {}: {:?}", log_funding_info!(monitor), persist_res);
+		} else {
+			log_trace!(self.logger, "Finished persisting new ChannelMonitor for channel {}", log_funding_info!(monitor));
 		}
 		if persist_res == Err(ChannelMonitorUpdateErr::PermanentFailure) {
 			return persist_res;
+		} else if persist_res.is_err() {
+			pending_monitor_updates.push(update_id);
 		}
-		{
-			let funding_txo = monitor.get_funding_txo();
-			log_trace!(self.logger, "Got new Channel Monitor for channel {}", log_bytes!(funding_txo.0.to_channel_id()[..]));
-
-			if let Some(ref chain_source) = self.chain_source {
-				monitor.load_outputs_to_watch(chain_source);
-			}
+		if let Some(ref chain_source) = self.chain_source {
+			monitor.load_outputs_to_watch(chain_source);
 		}
-		entry.insert(MonitorHolder { monitor });
+		entry.insert(MonitorHolder {
+			monitor,
+			pending_monitor_updates: Mutex::new(pending_monitor_updates),
+			channel_perm_failed: AtomicBool::new(false),
+			last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)),
+		});
 		persist_res
 	}
 
@@ -428,26 +628,37 @@ where C::Target: chain::Filter,
 				// We should never ever trigger this from within ChannelManager. Technically a
 				// user could use this object with some proxying in between which makes this
 				// possible, but in tests and fuzzing, this should be a panic.
-				#[cfg(any(test, feature = "fuzztarget"))]
+				#[cfg(any(test, fuzzing))]
 				panic!("ChannelManager generated a channel update for a channel that was not yet registered!");
-				#[cfg(not(any(test, feature = "fuzztarget")))]
+				#[cfg(not(any(test, fuzzing)))]
 				Err(ChannelMonitorUpdateErr::PermanentFailure)
 			},
 			Some(monitor_state) => {
 				let monitor = &monitor_state.monitor;
-				log_trace!(self.logger, "Updating Channel Monitor for channel {}", log_funding_info!(monitor));
+				log_trace!(self.logger, "Updating ChannelMonitor for channel {}", log_funding_info!(monitor));
 				let update_res = monitor.update_monitor(&update, &self.broadcaster, &self.fee_estimator, &self.logger);
-				if let Err(e) = &update_res {
-					log_error!(self.logger, "Failed to update channel monitor: {:?}", e);
+				if update_res.is_err() {
+					log_error!(self.logger, "Failed to update ChannelMonitor for channel {}.", log_funding_info!(monitor));
 				}
 				// Even if updating the monitor returns an error, the monitor's state will
 				// still be changed. So, persist the updated monitor despite the error.
-				let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor);
-				if let Err(ref e) = persist_res {
-					log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e);
+				let update_id = MonitorUpdateId::from_monitor_update(&update);
+				let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
+				let persist_res = self.persister.update_persisted_channel(funding_txo, &Some(update), monitor, update_id);
+				if let Err(e) = persist_res {
+					if e == ChannelMonitorUpdateErr::TemporaryFailure {
+						pending_monitor_updates.push(update_id);
+					} else {
+						monitor_state.channel_perm_failed.store(true, Ordering::Release);
+					}
+					log_error!(self.logger, "Failed to persist ChannelMonitor update for channel {}: {:?}", log_funding_info!(monitor), e);
+				} else {
+					log_trace!(self.logger, "Finished persisting ChannelMonitor update for channel {}", log_funding_info!(monitor));
 				}
 				if update_res.is_err() {
 					Err(ChannelMonitorUpdateErr::PermanentFailure)
+				} else if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
+					Err(ChannelMonitorUpdateErr::PermanentFailure)
 				} else {
 					persist_res
 				}
@@ -458,7 +669,31 @@ where C::Target: chain::Filter,
 	fn release_pending_monitor_events(&self) -> Vec<MonitorEvent> {
 		let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
 		for monitor_state in self.monitors.read().unwrap().values() {
-			pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events());
+			let is_pending_monitor_update = monitor_state.has_pending_chainsync_updates(&monitor_state.pending_monitor_updates.lock().unwrap());
+			if is_pending_monitor_update &&
+					monitor_state.last_chain_persist_height.load(Ordering::Acquire) + LATENCY_GRACE_PERIOD_BLOCKS as usize
+						> self.highest_chain_height.load(Ordering::Acquire)
+			{
+				log_info!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!");
+			} else {
+				if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
+					// If a `UpdateOrigin::ChainSync` persistence failed with `PermanantFailure`,
+					// we don't really know if the latest `ChannelMonitor` state is on disk or not.
+					// We're supposed to hold monitor updates until the latest state is on disk to
+					// avoid duplicate events, but the user told us persistence is screw-y and may
+					// not complete. We can't hold events forever because we may learn some payment
+					// preimage, so instead we just log and hope the user complied with the
+					// `PermanentFailure` requirements of having at least the local-disk copy
+					// updated.
+					log_info!(self.logger, "A Channel Monitor sync returned PermanentFailure. Returning monitor events but duplicate events may appear after reload!");
+				}
+				if is_pending_monitor_update {
+					log_error!(self.logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS);
+					log_error!(self.logger, "   To avoid funds-loss, we are allowing monitor updates to be released.");
+					log_error!(self.logger, "   This may cause duplicate payment events to be generated.");
+				}
+				pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events());
+			}
 		}
 		pending_monitor_events
 	}
@@ -490,10 +725,18 @@ impl<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> even
 
 #[cfg(test)]
 mod tests {
-	use ::{check_added_monitors, get_local_commitment_txn};
+	use bitcoin::BlockHeader;
+	use ::{check_added_monitors, check_closed_broadcast, check_closed_event};
+	use ::{expect_payment_sent, expect_payment_sent_without_paths, expect_payment_path_successful, get_event_msg};
+	use ::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
+	use chain::{ChannelMonitorUpdateErr, Confirm, Watch};
+	use chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
+	use ln::channelmanager::PaymentSendFailure;
 	use ln::features::InitFeatures;
 	use ln::functional_test_utils::*;
-	use util::events::MessageSendEventsProvider;
+	use ln::msgs::ChannelMessageHandler;
+	use util::errors::APIError;
+	use util::events::{ClosureReason, MessageSendEvent, MessageSendEventsProvider};
 	use util::test_utils::{OnRegisterOutput, TxOutReference};
 
 	/// Tests that in-block dependent transactions are processed by `block_connected` when not
@@ -538,4 +781,180 @@ mod tests {
 		nodes[1].node.get_and_clear_pending_msg_events();
 		nodes[1].node.get_and_clear_pending_events();
 	}
+
+	#[test]
+	fn test_async_ooo_offchain_updates() {
+		// Test that if we have multiple offchain updates being persisted and they complete
+		// out-of-order, the ChainMonitor waits until all have completed before informing the
+		// ChannelManager.
+		let chanmon_cfgs = create_chanmon_cfgs(2);
+		let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+		let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+		let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+		create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+
+		// Route two payments to be claimed at the same time.
+		let payment_preimage_1 = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+		let payment_preimage_2 = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+
+		chanmon_cfgs[1].persister.offchain_monitor_updates.lock().unwrap().clear();
+		chanmon_cfgs[1].persister.set_update_ret(Err(ChannelMonitorUpdateErr::TemporaryFailure));
+
+		nodes[1].node.claim_funds(payment_preimage_1);
+		check_added_monitors!(nodes[1], 1);
+		nodes[1].node.claim_funds(payment_preimage_2);
+		check_added_monitors!(nodes[1], 1);
+
+		chanmon_cfgs[1].persister.set_update_ret(Ok(()));
+
+		let persistences = chanmon_cfgs[1].persister.offchain_monitor_updates.lock().unwrap().clone();
+		assert_eq!(persistences.len(), 1);
+		let (funding_txo, updates) = persistences.iter().next().unwrap();
+		assert_eq!(updates.len(), 2);
+
+		// Note that updates is a HashMap so the ordering here is actually random. This shouldn't
+		// fail either way but if it fails intermittently it's depending on the ordering of updates.
+		let mut update_iter = updates.iter();
+		nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(*funding_txo, update_iter.next().unwrap().clone()).unwrap();
+		assert!(nodes[1].chain_monitor.release_pending_monitor_events().is_empty());
+		assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+		nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(*funding_txo, update_iter.next().unwrap().clone()).unwrap();
+
+		// Now manually walk the commitment signed dance - because we claimed two payments
+		// back-to-back it doesn't fit into the neat walk commitment_signed_dance does.
+
+		let updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
+		nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
+		expect_payment_sent_without_paths!(nodes[0], payment_preimage_1);
+		nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &updates.commitment_signed);
+		check_added_monitors!(nodes[0], 1);
+		let (as_first_raa, as_first_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+
+		nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_first_raa);
+		check_added_monitors!(nodes[1], 1);
+		let bs_second_updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
+		nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_first_update);
+		check_added_monitors!(nodes[1], 1);
+		let bs_first_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+
+		nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_second_updates.update_fulfill_htlcs[0]);
+		expect_payment_sent_without_paths!(nodes[0], payment_preimage_2);
+		nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_updates.commitment_signed);
+		check_added_monitors!(nodes[0], 1);
+		nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_first_raa);
+		expect_payment_path_successful!(nodes[0]);
+		check_added_monitors!(nodes[0], 1);
+		let (as_second_raa, as_second_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+
+		nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_second_raa);
+		check_added_monitors!(nodes[1], 1);
+		nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_second_update);
+		check_added_monitors!(nodes[1], 1);
+		let bs_second_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+
+		nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_second_raa);
+		expect_payment_path_successful!(nodes[0]);
+		check_added_monitors!(nodes[0], 1);
+	}
+
+	fn do_chainsync_pauses_events(block_timeout: bool) {
+		// When a chainsync monitor update occurs, any MonitorUpdates should be held before being
+		// passed upstream to a `ChannelManager` via `Watch::release_pending_monitor_events`. This
+		// tests that behavior, as well as some ways it might go wrong.
+		let chanmon_cfgs = create_chanmon_cfgs(2);
+		let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+		let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+		let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+		let channel = create_announced_chan_between_nodes(
+			&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+
+		// Get a route for later and rebalance the channel somewhat
+		send_payment(&nodes[0], &[&nodes[1]], 10_000_000);
+		let (route, second_payment_hash, _, second_payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], 100_000);
+
+		// First route a payment that we will claim on chain and give the recipient the preimage.
+		let payment_preimage = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+		nodes[1].node.claim_funds(payment_preimage);
+		nodes[1].node.get_and_clear_pending_msg_events();
+		check_added_monitors!(nodes[1], 1);
+		let remote_txn = get_local_commitment_txn!(nodes[1], channel.2);
+		assert_eq!(remote_txn.len(), 2);
+
+		// Temp-fail the block connection which will hold the channel-closed event
+		chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+		chanmon_cfgs[0].persister.set_update_ret(Err(ChannelMonitorUpdateErr::TemporaryFailure));
+
+		// Connect B's commitment transaction, but only to the ChainMonitor/ChannelMonitor. The
+		// channel is now closed, but the ChannelManager doesn't know that yet.
+		let new_header = BlockHeader {
+			version: 2, time: 0, bits: 0, nonce: 0,
+			prev_blockhash: nodes[0].best_block_info().0,
+			merkle_root: Default::default() };
+		nodes[0].chain_monitor.chain_monitor.transactions_confirmed(&new_header,
+			&[(0, &remote_txn[0]), (1, &remote_txn[1])], nodes[0].best_block_info().1 + 1);
+		assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
+		nodes[0].chain_monitor.chain_monitor.best_block_updated(&new_header, nodes[0].best_block_info().1 + 1);
+		assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
+
+		// If the ChannelManager tries to update the channel, however, the ChainMonitor will pass
+		// the update through to the ChannelMonitor which will refuse it (as the channel is closed).
+		chanmon_cfgs[0].persister.set_update_ret(Ok(()));
+		unwrap_send_err!(nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret)),
+			true, APIError::ChannelUnavailable { ref err },
+			assert!(err.contains("ChannelMonitor storage failure")));
+		check_added_monitors!(nodes[0], 2); // After the failure we generate a close-channel monitor update
+		check_closed_broadcast!(nodes[0], true);
+		check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() });
+
+		// However, as the ChainMonitor is still waiting for the original persistence to complete,
+		// it won't yet release the MonitorEvents.
+		assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
+
+		if block_timeout {
+			// After three blocks, pending MontiorEvents should be released either way.
+			let latest_header = BlockHeader {
+				version: 2, time: 0, bits: 0, nonce: 0,
+				prev_blockhash: nodes[0].best_block_info().0,
+				merkle_root: Default::default() };
+			nodes[0].chain_monitor.chain_monitor.best_block_updated(&latest_header, nodes[0].best_block_info().1 + LATENCY_GRACE_PERIOD_BLOCKS);
+		} else {
+			let persistences = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clone();
+			for (funding_outpoint, update_ids) in persistences {
+				for update_id in update_ids {
+					nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_outpoint, update_id).unwrap();
+				}
+			}
+		}
+
+		expect_payment_sent!(nodes[0], payment_preimage);
+	}
+
+	#[test]
+	fn chainsync_pauses_events() {
+		do_chainsync_pauses_events(false);
+		do_chainsync_pauses_events(true);
+	}
+
+	#[test]
+	fn update_during_chainsync_fails_channel() {
+		let chanmon_cfgs = create_chanmon_cfgs(2);
+		let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+		let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+		let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+		create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+
+		chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+		chanmon_cfgs[0].persister.set_update_ret(Err(ChannelMonitorUpdateErr::PermanentFailure));
+
+		connect_blocks(&nodes[0], 1);
+		// Before processing events, the ChannelManager will still think the Channel is open and
+		// there won't be any ChannelMonitorUpdates
+		assert_eq!(nodes[0].node.list_channels().len(), 1);
+		check_added_monitors!(nodes[0], 0);
+		// ... however once we get events once, the channel will close, creating a channel-closed
+		// ChannelMonitorUpdate.
+		check_closed_broadcast!(nodes[0], true);
+		check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() });
+		check_added_monitors!(nodes[0], 1);
+	}
 }