Merge pull request #1286 from tnull/add_random_cltv_offsets
[rust-lightning] / lightning / src / chain / chainmonitor.rs
index 323598f2061c32829b1c13e42c50bde30fee2432..19095fa2375a6d8c3a52a61262736af9ac2fd61c 100644 (file)
@@ -29,9 +29,10 @@ use bitcoin::hash_types::Txid;
 use chain;
 use chain::{ChannelMonitorUpdateErr, Filter, WatchedOutput};
 use chain::chaininterface::{BroadcasterInterface, FeeEstimator};
-use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs};
+use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, LATENCY_GRACE_PERIOD_BLOCKS};
 use chain::transaction::{OutPoint, TransactionData};
 use chain::keysinterface::Sign;
+use util::atomic_counter::AtomicCounter;
 use util::logger::Logger;
 use util::errors::APIError;
 use util::events;
@@ -41,10 +42,19 @@ use ln::channelmanager::ChannelDetails;
 use prelude::*;
 use sync::{RwLock, RwLockReadGuard, Mutex, MutexGuard};
 use core::ops::Deref;
+use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 
 #[derive(Clone, Copy, Hash, PartialEq, Eq)]
+/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents
+/// entirely opaque.
 enum UpdateOrigin {
+       /// An update that was generated by the `ChannelManager` (via our `chain::Watch`
+       /// implementation). This corresponds to an actual [`ChannelMonitorUpdate::update_id`] field
+       /// and [`ChannelMonitor::get_latest_update_id`].
        OffChain(u64),
+       /// An update that was generated during blockchain processing. The ID here is specific to the
+       /// generating [`ChainMonitor`] and does *not* correspond to any on-disk IDs.
+       ChainSync(u64),
 }
 
 /// An opaque identifier describing a specific [`Persist`] method call.
@@ -82,10 +92,12 @@ impl MonitorUpdateId {
 ///    closed without broadcasting the latest state. See
 ///    [`ChannelMonitorUpdateErr::PermanentFailure`] for more details.
 pub trait Persist<ChannelSigner: Sign> {
-       /// Persist a new channel's data. The data can be stored any way you want, but the identifier
-       /// provided by LDK is the channel's outpoint (and it is up to you to maintain a correct
-       /// mapping between the outpoint and the stored channel data). Note that you **must** persist
-       /// every new monitor to disk.
+       /// Persist a new channel's data in response to a [`chain::Watch::watch_channel`] call. This is
+       /// called by [`ChannelManager`] for new channels, or may be called directly, e.g. on startup.
+       ///
+       /// The data can be stored any way you want, but the identifier provided by LDK is the
+       /// channel's outpoint (and it is up to you to maintain a correct mapping between the outpoint
+       /// and the stored channel data). Note that you **must** persist every new monitor to disk.
        ///
        /// The `update_id` is used to identify this call to [`ChainMonitor::channel_monitor_updated`],
        /// if you return [`ChannelMonitorUpdateErr::TemporaryFailure`].
@@ -93,6 +105,7 @@ pub trait Persist<ChannelSigner: Sign> {
        /// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor`
        /// and [`ChannelMonitorUpdateErr`] for requirements when returning errors.
        ///
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
        /// [`Writeable::write`]: crate::util::ser::Writeable::write
        fn persist_new_channel(&self, channel_id: OutPoint, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 
@@ -103,6 +116,12 @@ pub trait Persist<ChannelSigner: Sign> {
        /// updated monitor itself to disk/backups. See the [`Persist`] trait documentation for more
        /// details.
        ///
+       /// During blockchain synchronization operations, this may be called with no
+       /// [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted.
+       /// Note that after the full [`ChannelMonitor`] is persisted any previous
+       /// [`ChannelMonitorUpdate`]s which were persisted should be discarded - they can no longer be
+       /// applied to the persisted [`ChannelMonitor`] as they were already applied.
+       ///
        /// If an implementer chooses to persist the updates only, they need to make
        /// sure that all the updates are applied to the `ChannelMonitors` *before*
        /// the set of channel monitors is given to the `ChannelManager`
@@ -123,7 +142,7 @@ pub trait Persist<ChannelSigner: Sign> {
        /// [`ChannelMonitorUpdateErr`] for requirements when returning errors.
        ///
        /// [`Writeable::write`]: crate::util::ser::Writeable::write
-       fn update_persisted_channel(&self, channel_id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
+       fn update_persisted_channel(&self, channel_id: OutPoint, update: &Option<ChannelMonitorUpdate>, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 }
 
 struct MonitorHolder<ChannelSigner: Sign> {
@@ -134,7 +153,31 @@ struct MonitorHolder<ChannelSigner: Sign> {
        /// update_persisted_channel, the user returns a TemporaryFailure, and then calls
        /// channel_monitor_updated immediately, racing our insertion of the pending update into the
        /// contained Vec.
+       ///
+       /// Beyond the synchronization of updates themselves, we cannot handle user events until after
+       /// any chain updates have been stored on disk. Thus, we scan this list when returning updates
+       /// to the ChannelManager, refusing to return any updates for a ChannelMonitor which is still
+       /// being persisted fully to disk after a chain update.
+       ///
+       /// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor
+       /// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping
+       /// the pending payment entry, and then reloading before the monitor is persisted, resulting in
+       /// the ChannelManager re-adding the same payment entry, before the same block is replayed,
+       /// resulting in a duplicate PaymentSent event.
        pending_monitor_updates: Mutex<Vec<MonitorUpdateId>>,
+       /// When the user returns a PermanentFailure error from an update_persisted_channel call during
+       /// block processing, we inform the ChannelManager that the channel should be closed
+       /// asynchronously. In order to ensure no further changes happen before the ChannelManager has
+       /// processed the closure event, we set this to true and return PermanentFailure for any other
+       /// chain::Watch events.
+       channel_perm_failed: AtomicBool,
+       /// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present
+       /// in `pending_monitor_updates`.
+       /// If it's been more than [`LATENCY_GRACE_PERIOD_BLOCKS`] since we started waiting on a chain
+       /// sync event, we let monitor events return to `ChannelManager` because we cannot hold them up
+       /// forever or we'll end up with HTLC preimages waiting to feed back into an upstream channel
+       /// forever, risking funds loss.
+       last_chain_persist_height: AtomicUsize,
 }
 
 impl<ChannelSigner: Sign> MonitorHolder<ChannelSigner> {
@@ -142,6 +185,10 @@ impl<ChannelSigner: Sign> MonitorHolder<ChannelSigner> {
                pending_monitor_updates_lock.iter().any(|update_id|
                        if let UpdateOrigin::OffChain(_) = update_id.contents { true } else { false })
        }
+       fn has_pending_chainsync_updates(&self, pending_monitor_updates_lock: &MutexGuard<Vec<MonitorUpdateId>>) -> bool {
+               pending_monitor_updates_lock.iter().any(|update_id|
+                       if let UpdateOrigin::ChainSync(_) = update_id.contents { true } else { false })
+       }
 }
 
 /// A read-only reference to a current ChannelMonitor.
@@ -177,12 +224,20 @@ pub struct ChainMonitor<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: De
         P::Target: Persist<ChannelSigner>,
 {
        monitors: RwLock<HashMap<OutPoint, MonitorHolder<ChannelSigner>>>,
+       /// When we generate a [`MonitorUpdateId`] for a chain-event monitor persistence, we need a
+       /// unique ID, which we calculate by simply getting the next value from this counter. Note that
+       /// the ID is never persisted so it's ok that they reset on restart.
+       sync_persistence_id: AtomicCounter,
        chain_source: Option<C>,
        broadcaster: T,
        logger: L,
        fee_estimator: F,
        persister: P,
+       /// "User-provided" (ie persistence-completion/-failed) [`MonitorEvent`]s. These came directly
+       /// from the user and not from a [`ChannelMonitor`].
        pending_monitor_events: Mutex<Vec<MonitorEvent>>,
+       /// The best block height seen, used as a proxy for the passage of time.
+       highest_chain_height: AtomicUsize,
 }
 
 impl<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> ChainMonitor<ChannelSigner, C, T, F, L, P>
@@ -201,31 +256,75 @@ where C::Target: chain::Filter,
        /// calls must not exclude any transactions matching the new outputs nor any in-block
        /// descendants of such transactions. It is not necessary to re-fetch the block to obtain
        /// updated `txdata`.
-       fn process_chain_data<FN>(&self, header: &BlockHeader, txdata: &TransactionData, process: FN)
+       ///
+       /// Calls which represent a new blockchain tip height should set `best_height`.
+       fn process_chain_data<FN>(&self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData, process: FN)
        where
                FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
        {
                let mut dependent_txdata = Vec::new();
-               let monitor_states = self.monitors.read().unwrap();
-               for monitor_state in monitor_states.values() {
-                       let mut txn_outputs = process(&monitor_state.monitor, txdata);
-
-                       // Register any new outputs with the chain source for filtering, storing any dependent
-                       // transactions from within the block that previously had not been included in txdata.
-                       if let Some(ref chain_source) = self.chain_source {
-                               let block_hash = header.block_hash();
-                               for (txid, mut outputs) in txn_outputs.drain(..) {
-                                       for (idx, output) in outputs.drain(..) {
-                                               // Register any new outputs with the chain source for filtering and recurse
-                                               // if it indicates that there are dependent transactions within the block
-                                               // that had not been previously included in txdata.
-                                               let output = WatchedOutput {
-                                                       block_hash: Some(block_hash),
-                                                       outpoint: OutPoint { txid, index: idx as u16 },
-                                                       script_pubkey: output.script_pubkey,
-                                               };
-                                               if let Some(tx) = chain_source.register_output(output) {
-                                                       dependent_txdata.push(tx);
+               {
+                       let monitor_states = self.monitors.write().unwrap();
+                       if let Some(height) = best_height {
+                               // If the best block height is being updated, update highest_chain_height under the
+                               // monitors write lock.
+                               let old_height = self.highest_chain_height.load(Ordering::Acquire);
+                               let new_height = height as usize;
+                               if new_height > old_height {
+                                       self.highest_chain_height.store(new_height, Ordering::Release);
+                               }
+                       }
+
+                       for (funding_outpoint, monitor_state) in monitor_states.iter() {
+                               let monitor = &monitor_state.monitor;
+                               let mut txn_outputs;
+                               {
+                                       txn_outputs = process(monitor, txdata);
+                                       let update_id = MonitorUpdateId {
+                                               contents: UpdateOrigin::ChainSync(self.sync_persistence_id.get_increment()),
+                                       };
+                                       let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
+                                       if let Some(height) = best_height {
+                                               if !monitor_state.has_pending_chainsync_updates(&pending_monitor_updates) {
+                                                       // If there are not ChainSync persists awaiting completion, go ahead and
+                                                       // set last_chain_persist_height here - we wouldn't want the first
+                                                       // TemporaryFailure to always immediately be considered "overly delayed".
+                                                       monitor_state.last_chain_persist_height.store(height as usize, Ordering::Release);
+                                               }
+                                       }
+
+                                       log_trace!(self.logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor));
+                                       match self.persister.update_persisted_channel(*funding_outpoint, &None, monitor, update_id) {
+                                               Ok(()) =>
+                                                       log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
+                                               Err(ChannelMonitorUpdateErr::PermanentFailure) => {
+                                                       monitor_state.channel_perm_failed.store(true, Ordering::Release);
+                                                       self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateFailed(*funding_outpoint));
+                                               },
+                                               Err(ChannelMonitorUpdateErr::TemporaryFailure) => {
+                                                       log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
+                                                       pending_monitor_updates.push(update_id);
+                                               },
+                                       }
+                               }
+
+                               // Register any new outputs with the chain source for filtering, storing any dependent
+                               // transactions from within the block that previously had not been included in txdata.
+                               if let Some(ref chain_source) = self.chain_source {
+                                       let block_hash = header.block_hash();
+                                       for (txid, mut outputs) in txn_outputs.drain(..) {
+                                               for (idx, output) in outputs.drain(..) {
+                                                       // Register any new outputs with the chain source for filtering and recurse
+                                                       // if it indicates that there are dependent transactions within the block
+                                                       // that had not been previously included in txdata.
+                                                       let output = WatchedOutput {
+                                                               block_hash: Some(block_hash),
+                                                               outpoint: OutPoint { txid, index: idx as u16 },
+                                                               script_pubkey: output.script_pubkey,
+                                                       };
+                                                       if let Some(tx) = chain_source.register_output(output) {
+                                                               dependent_txdata.push(tx);
+                                                       }
                                                }
                                        }
                                }
@@ -237,7 +336,7 @@ where C::Target: chain::Filter,
                        dependent_txdata.sort_unstable_by_key(|(index, _tx)| *index);
                        dependent_txdata.dedup_by_key(|(index, _tx)| *index);
                        let txdata: Vec<_> = dependent_txdata.iter().map(|(index, tx)| (*index, tx)).collect();
-                       self.process_chain_data(header, &txdata, process);
+                       self.process_chain_data(header, None, &txdata, process); // We skip the best height the second go-around
                }
        }
 
@@ -251,12 +350,14 @@ where C::Target: chain::Filter,
        pub fn new(chain_source: Option<C>, broadcaster: T, logger: L, feeest: F, persister: P) -> Self {
                Self {
                        monitors: RwLock::new(HashMap::new()),
+                       sync_persistence_id: AtomicCounter::new(),
                        chain_source,
                        broadcaster,
                        logger,
                        fee_estimator: feeest,
                        persister,
                        pending_monitor_events: Mutex::new(Vec::new()),
+                       highest_chain_height: AtomicUsize::new(0),
                }
        }
 
@@ -337,7 +438,7 @@ where C::Target: chain::Filter,
                pending_monitor_updates.retain(|update_id| *update_id != completed_update_id);
 
                match completed_update_id {
-                       MonitorUpdateId { .. } => {
+                       MonitorUpdateId { contents: UpdateOrigin::OffChain(_) } => {
                                // Note that we only check for `UpdateOrigin::OffChain` failures here - if
                                // we're being told that a `UpdateOrigin::OffChain` monitor update completed,
                                // we only care about ensuring we don't tell the `ChannelManager` to restore
@@ -348,8 +449,9 @@ where C::Target: chain::Filter,
                                // `MonitorEvent`s from the monitor back to the `ChannelManager` until they
                                // complete.
                                let monitor_is_pending_updates = monitor_data.has_pending_offchain_updates(&pending_monitor_updates);
-                               if monitor_is_pending_updates {
-                                       // If there are still monitor updates pending, we cannot yet construct an
+                               if monitor_is_pending_updates || monitor_data.channel_perm_failed.load(Ordering::Acquire) {
+                                       // If there are still monitor updates pending (or an old monitor update
+                                       // finished after a later one perm-failed), we cannot yet construct an
                                        // UpdateCompleted event.
                                        return Ok(());
                                }
@@ -357,7 +459,14 @@ where C::Target: chain::Filter,
                                        funding_txo,
                                        monitor_update_id: monitor_data.monitor.get_latest_update_id(),
                                });
-                       }
+                       },
+                       MonitorUpdateId { contents: UpdateOrigin::ChainSync(_) } => {
+                               if !monitor_data.has_pending_chainsync_updates(&pending_monitor_updates) {
+                                       monitor_data.last_chain_persist_height.store(self.highest_chain_height.load(Ordering::Acquire), Ordering::Release);
+                                       // The next time release_pending_monitor_events is called, any events for this
+                                       // ChannelMonitor will be returned.
+                               }
+                       },
                }
                Ok(())
        }
@@ -365,7 +474,7 @@ where C::Target: chain::Filter,
        /// This wrapper avoids having to update some of our tests for now as they assume the direct
        /// chain::Watch API wherein we mark a monitor fully-updated by just calling
        /// channel_monitor_updated once with the highest ID.
-       #[cfg(any(test, feature = "fuzztarget"))]
+       #[cfg(any(test, fuzzing))]
        pub fn force_channel_monitor_updated(&self, funding_txo: OutPoint, monitor_update_id: u64) {
                self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateCompleted {
                        funding_txo,
@@ -373,7 +482,7 @@ where C::Target: chain::Filter,
                });
        }
 
-       #[cfg(any(test, feature = "fuzztarget", feature = "_test_utils"))]
+       #[cfg(any(test, fuzzing, feature = "_test_utils"))]
        pub fn get_and_clear_pending_events(&self) -> Vec<events::Event> {
                use util::events::EventsProvider;
                let events = core::cell::RefCell::new(Vec::new());
@@ -396,7 +505,7 @@ where
                let header = &block.header;
                let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
                log_debug!(self.logger, "New best block {} at height {} provided via block_connected", header.block_hash(), height);
-               self.process_chain_data(header, &txdata, |monitor, txdata| {
+               self.process_chain_data(header, Some(height), &txdata, |monitor, txdata| {
                        monitor.block_connected(
                                header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger)
                });
@@ -423,7 +532,7 @@ where
 {
        fn transactions_confirmed(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) {
                log_debug!(self.logger, "{} provided transactions confirmed at height {} in block {}", txdata.len(), height, header.block_hash());
-               self.process_chain_data(header, txdata, |monitor, txdata| {
+               self.process_chain_data(header, None, txdata, |monitor, txdata| {
                        monitor.transactions_confirmed(
                                header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger)
                });
@@ -439,7 +548,7 @@ where
 
        fn best_block_updated(&self, header: &BlockHeader, height: u32) {
                log_debug!(self.logger, "New best block {} at height {} provided via best_block_updated", header.block_hash(), height);
-               self.process_chain_data(header, &[], |monitor, txdata| {
+               self.process_chain_data(header, Some(height), &[], |monitor, txdata| {
                        // While in practice there shouldn't be any recursive calls when given empty txdata,
                        // it's still possible if a chain::Filter implementation returns a transaction.
                        debug_assert!(txdata.is_empty());
@@ -483,26 +592,29 @@ where C::Target: chain::Filter,
                                return Err(ChannelMonitorUpdateErr::PermanentFailure)},
                        hash_map::Entry::Vacant(e) => e,
                };
+               log_trace!(self.logger, "Got new ChannelMonitor for channel {}", log_funding_info!(monitor));
                let update_id = MonitorUpdateId::from_new_monitor(&monitor);
                let mut pending_monitor_updates = Vec::new();
                let persist_res = self.persister.persist_new_channel(funding_outpoint, &monitor, update_id);
                if persist_res.is_err() {
-                       log_error!(self.logger, "Failed to persist new channel data: {:?}", persist_res);
+                       log_error!(self.logger, "Failed to persist new ChannelMonitor for channel {}: {:?}", log_funding_info!(monitor), persist_res);
+               } else {
+                       log_trace!(self.logger, "Finished persisting new ChannelMonitor for channel {}", log_funding_info!(monitor));
                }
                if persist_res == Err(ChannelMonitorUpdateErr::PermanentFailure) {
                        return persist_res;
                } else if persist_res.is_err() {
                        pending_monitor_updates.push(update_id);
                }
-               {
-                       let funding_txo = monitor.get_funding_txo();
-                       log_trace!(self.logger, "Got new Channel Monitor for channel {}", log_bytes!(funding_txo.0.to_channel_id()[..]));
-
-                       if let Some(ref chain_source) = self.chain_source {
-                               monitor.load_outputs_to_watch(chain_source);
-                       }
+               if let Some(ref chain_source) = self.chain_source {
+                       monitor.load_outputs_to_watch(chain_source);
                }
-               entry.insert(MonitorHolder { monitor, pending_monitor_updates: Mutex::new(pending_monitor_updates) });
+               entry.insert(MonitorHolder {
+                       monitor,
+                       pending_monitor_updates: Mutex::new(pending_monitor_updates),
+                       channel_perm_failed: AtomicBool::new(false),
+                       last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)),
+               });
                persist_res
        }
 
@@ -518,31 +630,37 @@ where C::Target: chain::Filter,
                                // We should never ever trigger this from within ChannelManager. Technically a
                                // user could use this object with some proxying in between which makes this
                                // possible, but in tests and fuzzing, this should be a panic.
-                               #[cfg(any(test, feature = "fuzztarget"))]
+                               #[cfg(any(test, fuzzing))]
                                panic!("ChannelManager generated a channel update for a channel that was not yet registered!");
-                               #[cfg(not(any(test, feature = "fuzztarget")))]
+                               #[cfg(not(any(test, fuzzing)))]
                                Err(ChannelMonitorUpdateErr::PermanentFailure)
                        },
                        Some(monitor_state) => {
                                let monitor = &monitor_state.monitor;
-                               log_trace!(self.logger, "Updating Channel Monitor for channel {}", log_funding_info!(monitor));
+                               log_trace!(self.logger, "Updating ChannelMonitor for channel {}", log_funding_info!(monitor));
                                let update_res = monitor.update_monitor(&update, &self.broadcaster, &self.fee_estimator, &self.logger);
-                               if let Err(e) = &update_res {
-                                       log_error!(self.logger, "Failed to update channel monitor: {:?}", e);
+                               if update_res.is_err() {
+                                       log_error!(self.logger, "Failed to update ChannelMonitor for channel {}.", log_funding_info!(monitor));
                                }
                                // Even if updating the monitor returns an error, the monitor's state will
                                // still be changed. So, persist the updated monitor despite the error.
                                let update_id = MonitorUpdateId::from_monitor_update(&update);
                                let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
-                               let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor, update_id);
+                               let persist_res = self.persister.update_persisted_channel(funding_txo, &Some(update), monitor, update_id);
                                if let Err(e) = persist_res {
                                        if e == ChannelMonitorUpdateErr::TemporaryFailure {
                                                pending_monitor_updates.push(update_id);
+                                       } else {
+                                               monitor_state.channel_perm_failed.store(true, Ordering::Release);
                                        }
-                                       log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e);
+                                       log_error!(self.logger, "Failed to persist ChannelMonitor update for channel {}: {:?}", log_funding_info!(monitor), e);
+                               } else {
+                                       log_trace!(self.logger, "Finished persisting ChannelMonitor update for channel {}", log_funding_info!(monitor));
                                }
                                if update_res.is_err() {
                                        Err(ChannelMonitorUpdateErr::PermanentFailure)
+                               } else if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
+                                       Err(ChannelMonitorUpdateErr::PermanentFailure)
                                } else {
                                        persist_res
                                }
@@ -553,7 +671,31 @@ where C::Target: chain::Filter,
        fn release_pending_monitor_events(&self) -> Vec<MonitorEvent> {
                let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
                for monitor_state in self.monitors.read().unwrap().values() {
-                       pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events());
+                       let is_pending_monitor_update = monitor_state.has_pending_chainsync_updates(&monitor_state.pending_monitor_updates.lock().unwrap());
+                       if is_pending_monitor_update &&
+                                       monitor_state.last_chain_persist_height.load(Ordering::Acquire) + LATENCY_GRACE_PERIOD_BLOCKS as usize
+                                               > self.highest_chain_height.load(Ordering::Acquire)
+                       {
+                               log_info!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!");
+                       } else {
+                               if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
+                                       // If a `UpdateOrigin::ChainSync` persistence failed with `PermanantFailure`,
+                                       // we don't really know if the latest `ChannelMonitor` state is on disk or not.
+                                       // We're supposed to hold monitor updates until the latest state is on disk to
+                                       // avoid duplicate events, but the user told us persistence is screw-y and may
+                                       // not complete. We can't hold events forever because we may learn some payment
+                                       // preimage, so instead we just log and hope the user complied with the
+                                       // `PermanentFailure` requirements of having at least the local-disk copy
+                                       // updated.
+                                       log_info!(self.logger, "A Channel Monitor sync returned PermanentFailure. Returning monitor events but duplicate events may appear after reload!");
+                               }
+                               if is_pending_monitor_update {
+                                       log_error!(self.logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS);
+                                       log_error!(self.logger, "   To avoid funds-loss, we are allowing monitor updates to be released.");
+                                       log_error!(self.logger, "   This may cause duplicate payment events to be generated.");
+                               }
+                               pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events());
+                       }
                }
                pending_monitor_events
        }
@@ -585,10 +727,18 @@ impl<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> even
 
 #[cfg(test)]
 mod tests {
-       use ::{check_added_monitors, get_local_commitment_txn};
+       use bitcoin::BlockHeader;
+       use ::{check_added_monitors, check_closed_broadcast, check_closed_event};
+       use ::{expect_payment_sent, expect_payment_sent_without_paths, expect_payment_path_successful, get_event_msg};
+       use ::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
+       use chain::{ChannelMonitorUpdateErr, Confirm, Watch};
+       use chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
+       use ln::channelmanager::PaymentSendFailure;
        use ln::features::InitFeatures;
        use ln::functional_test_utils::*;
-       use util::events::MessageSendEventsProvider;
+       use ln::msgs::ChannelMessageHandler;
+       use util::errors::APIError;
+       use util::events::{ClosureReason, MessageSendEvent, MessageSendEventsProvider};
        use util::test_utils::{OnRegisterOutput, TxOutReference};
 
        /// Tests that in-block dependent transactions are processed by `block_connected` when not
@@ -633,4 +783,180 @@ mod tests {
                nodes[1].node.get_and_clear_pending_msg_events();
                nodes[1].node.get_and_clear_pending_events();
        }
+
+       #[test]
+       fn test_async_ooo_offchain_updates() {
+               // Test that if we have multiple offchain updates being persisted and they complete
+               // out-of-order, the ChainMonitor waits until all have completed before informing the
+               // ChannelManager.
+               let chanmon_cfgs = create_chanmon_cfgs(2);
+               let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+               let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+               let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+               create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+
+               // Route two payments to be claimed at the same time.
+               let payment_preimage_1 = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+               let payment_preimage_2 = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+
+               chanmon_cfgs[1].persister.offchain_monitor_updates.lock().unwrap().clear();
+               chanmon_cfgs[1].persister.set_update_ret(Err(ChannelMonitorUpdateErr::TemporaryFailure));
+
+               nodes[1].node.claim_funds(payment_preimage_1);
+               check_added_monitors!(nodes[1], 1);
+               nodes[1].node.claim_funds(payment_preimage_2);
+               check_added_monitors!(nodes[1], 1);
+
+               chanmon_cfgs[1].persister.set_update_ret(Ok(()));
+
+               let persistences = chanmon_cfgs[1].persister.offchain_monitor_updates.lock().unwrap().clone();
+               assert_eq!(persistences.len(), 1);
+               let (funding_txo, updates) = persistences.iter().next().unwrap();
+               assert_eq!(updates.len(), 2);
+
+               // Note that updates is a HashMap so the ordering here is actually random. This shouldn't
+               // fail either way but if it fails intermittently it's depending on the ordering of updates.
+               let mut update_iter = updates.iter();
+               nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(*funding_txo, update_iter.next().unwrap().clone()).unwrap();
+               assert!(nodes[1].chain_monitor.release_pending_monitor_events().is_empty());
+               assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+               nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(*funding_txo, update_iter.next().unwrap().clone()).unwrap();
+
+               // Now manually walk the commitment signed dance - because we claimed two payments
+               // back-to-back it doesn't fit into the neat walk commitment_signed_dance does.
+
+               let updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
+               nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
+               expect_payment_sent_without_paths!(nodes[0], payment_preimage_1);
+               nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &updates.commitment_signed);
+               check_added_monitors!(nodes[0], 1);
+               let (as_first_raa, as_first_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+
+               nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_first_raa);
+               check_added_monitors!(nodes[1], 1);
+               let bs_second_updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
+               nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_first_update);
+               check_added_monitors!(nodes[1], 1);
+               let bs_first_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+
+               nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_second_updates.update_fulfill_htlcs[0]);
+               expect_payment_sent_without_paths!(nodes[0], payment_preimage_2);
+               nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_updates.commitment_signed);
+               check_added_monitors!(nodes[0], 1);
+               nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_first_raa);
+               expect_payment_path_successful!(nodes[0]);
+               check_added_monitors!(nodes[0], 1);
+               let (as_second_raa, as_second_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+
+               nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_second_raa);
+               check_added_monitors!(nodes[1], 1);
+               nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_second_update);
+               check_added_monitors!(nodes[1], 1);
+               let bs_second_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+
+               nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_second_raa);
+               expect_payment_path_successful!(nodes[0]);
+               check_added_monitors!(nodes[0], 1);
+       }
+
+       fn do_chainsync_pauses_events(block_timeout: bool) {
+               // When a chainsync monitor update occurs, any MonitorUpdates should be held before being
+               // passed upstream to a `ChannelManager` via `Watch::release_pending_monitor_events`. This
+               // tests that behavior, as well as some ways it might go wrong.
+               let chanmon_cfgs = create_chanmon_cfgs(2);
+               let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+               let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+               let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+               let channel = create_announced_chan_between_nodes(
+                       &nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+
+               // Get a route for later and rebalance the channel somewhat
+               send_payment(&nodes[0], &[&nodes[1]], 10_000_000);
+               let (route, second_payment_hash, _, second_payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], 100_000);
+
+               // First route a payment that we will claim on chain and give the recipient the preimage.
+               let payment_preimage = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0;
+               nodes[1].node.claim_funds(payment_preimage);
+               nodes[1].node.get_and_clear_pending_msg_events();
+               check_added_monitors!(nodes[1], 1);
+               let remote_txn = get_local_commitment_txn!(nodes[1], channel.2);
+               assert_eq!(remote_txn.len(), 2);
+
+               // Temp-fail the block connection which will hold the channel-closed event
+               chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+               chanmon_cfgs[0].persister.set_update_ret(Err(ChannelMonitorUpdateErr::TemporaryFailure));
+
+               // Connect B's commitment transaction, but only to the ChainMonitor/ChannelMonitor. The
+               // channel is now closed, but the ChannelManager doesn't know that yet.
+               let new_header = BlockHeader {
+                       version: 2, time: 0, bits: 0, nonce: 0,
+                       prev_blockhash: nodes[0].best_block_info().0,
+                       merkle_root: Default::default() };
+               nodes[0].chain_monitor.chain_monitor.transactions_confirmed(&new_header,
+                       &[(0, &remote_txn[0]), (1, &remote_txn[1])], nodes[0].best_block_info().1 + 1);
+               assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
+               nodes[0].chain_monitor.chain_monitor.best_block_updated(&new_header, nodes[0].best_block_info().1 + 1);
+               assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
+
+               // If the ChannelManager tries to update the channel, however, the ChainMonitor will pass
+               // the update through to the ChannelMonitor which will refuse it (as the channel is closed).
+               chanmon_cfgs[0].persister.set_update_ret(Ok(()));
+               unwrap_send_err!(nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret)),
+                       true, APIError::ChannelUnavailable { ref err },
+                       assert!(err.contains("ChannelMonitor storage failure")));
+               check_added_monitors!(nodes[0], 2); // After the failure we generate a close-channel monitor update
+               check_closed_broadcast!(nodes[0], true);
+               check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() });
+
+               // However, as the ChainMonitor is still waiting for the original persistence to complete,
+               // it won't yet release the MonitorEvents.
+               assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty());
+
+               if block_timeout {
+                       // After three blocks, pending MontiorEvents should be released either way.
+                       let latest_header = BlockHeader {
+                               version: 2, time: 0, bits: 0, nonce: 0,
+                               prev_blockhash: nodes[0].best_block_info().0,
+                               merkle_root: Default::default() };
+                       nodes[0].chain_monitor.chain_monitor.best_block_updated(&latest_header, nodes[0].best_block_info().1 + LATENCY_GRACE_PERIOD_BLOCKS);
+               } else {
+                       let persistences = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clone();
+                       for (funding_outpoint, update_ids) in persistences {
+                               for update_id in update_ids {
+                                       nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_outpoint, update_id).unwrap();
+                               }
+                       }
+               }
+
+               expect_payment_sent!(nodes[0], payment_preimage);
+       }
+
+       #[test]
+       fn chainsync_pauses_events() {
+               do_chainsync_pauses_events(false);
+               do_chainsync_pauses_events(true);
+       }
+
+       #[test]
+       fn update_during_chainsync_fails_channel() {
+               let chanmon_cfgs = create_chanmon_cfgs(2);
+               let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+               let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+               let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+               create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known());
+
+               chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+               chanmon_cfgs[0].persister.set_update_ret(Err(ChannelMonitorUpdateErr::PermanentFailure));
+
+               connect_blocks(&nodes[0], 1);
+               // Before processing events, the ChannelManager will still think the Channel is open and
+               // there won't be any ChannelMonitorUpdates
+               assert_eq!(nodes[0].node.list_channels().len(), 1);
+               check_added_monitors!(nodes[0], 0);
+               // ... however once we get events once, the channel will close, creating a channel-closed
+               // ChannelMonitorUpdate.
+               check_closed_broadcast!(nodes[0], true);
+               check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() });
+               check_added_monitors!(nodes[0], 1);
+       }
 }