Merge pull request #1149 from jkczyz/2021-11-network-graph
[rust-lightning] / lightning / src / chain / chainmonitor.rs
index 323598f2061c32829b1c13e42c50bde30fee2432..71b0b3e506456d36ec16345b70e10f8e1cf12746 100644 (file)
@@ -29,9 +29,10 @@ use bitcoin::hash_types::Txid;
 use chain;
 use chain::{ChannelMonitorUpdateErr, Filter, WatchedOutput};
 use chain::chaininterface::{BroadcasterInterface, FeeEstimator};
-use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs};
+use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, LATENCY_GRACE_PERIOD_BLOCKS};
 use chain::transaction::{OutPoint, TransactionData};
 use chain::keysinterface::Sign;
+use util::atomic_counter::AtomicCounter;
 use util::logger::Logger;
 use util::errors::APIError;
 use util::events;
@@ -41,10 +42,19 @@ use ln::channelmanager::ChannelDetails;
 use prelude::*;
 use sync::{RwLock, RwLockReadGuard, Mutex, MutexGuard};
 use core::ops::Deref;
+use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 
 #[derive(Clone, Copy, Hash, PartialEq, Eq)]
+/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents
+/// entirely opaque.
 enum UpdateOrigin {
+       /// An update that was generated by the `ChannelManager` (via our `chain::Watch`
+       /// implementation). This corresponds to an actual [`ChannelMonitorUpdate::update_id`] field
+       /// and [`ChannelMonitor::get_latest_update_id`].
        OffChain(u64),
+       /// An update that was generated during blockchain processing. The ID here is specific to the
+       /// generating [`ChainMonitor`] and does *not* correspond to any on-disk IDs.
+       ChainSync(u64),
 }
 
 /// An opaque identifier describing a specific [`Persist`] method call.
@@ -82,10 +92,12 @@ impl MonitorUpdateId {
 ///    closed without broadcasting the latest state. See
 ///    [`ChannelMonitorUpdateErr::PermanentFailure`] for more details.
 pub trait Persist<ChannelSigner: Sign> {
-       /// Persist a new channel's data. The data can be stored any way you want, but the identifier
-       /// provided by LDK is the channel's outpoint (and it is up to you to maintain a correct
-       /// mapping between the outpoint and the stored channel data). Note that you **must** persist
-       /// every new monitor to disk.
+       /// Persist a new channel's data in response to a [`chain::Watch::watch_channel`] call. This is
+       /// called by [`ChannelManager`] for new channels, or may be called directly, e.g. on startup.
+       ///
+       /// The data can be stored any way you want, but the identifier provided by LDK is the
+       /// channel's outpoint (and it is up to you to maintain a correct mapping between the outpoint
+       /// and the stored channel data). Note that you **must** persist every new monitor to disk.
        ///
        /// The `update_id` is used to identify this call to [`ChainMonitor::channel_monitor_updated`],
        /// if you return [`ChannelMonitorUpdateErr::TemporaryFailure`].
@@ -93,6 +105,7 @@ pub trait Persist<ChannelSigner: Sign> {
        /// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor`
        /// and [`ChannelMonitorUpdateErr`] for requirements when returning errors.
        ///
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
        /// [`Writeable::write`]: crate::util::ser::Writeable::write
        fn persist_new_channel(&self, channel_id: OutPoint, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 
@@ -103,6 +116,12 @@ pub trait Persist<ChannelSigner: Sign> {
        /// updated monitor itself to disk/backups. See the [`Persist`] trait documentation for more
        /// details.
        ///
+       /// During blockchain synchronization operations, this may be called with no
+       /// [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted.
+       /// Note that after the full [`ChannelMonitor`] is persisted any previous
+       /// [`ChannelMonitorUpdate`]s which were persisted should be discarded - they can no longer be
+       /// applied to the persisted [`ChannelMonitor`] as they were already applied.
+       ///
        /// If an implementer chooses to persist the updates only, they need to make
        /// sure that all the updates are applied to the `ChannelMonitors` *before*
        /// the set of channel monitors is given to the `ChannelManager`
@@ -123,7 +142,7 @@ pub trait Persist<ChannelSigner: Sign> {
        /// [`ChannelMonitorUpdateErr`] for requirements when returning errors.
        ///
        /// [`Writeable::write`]: crate::util::ser::Writeable::write
-       fn update_persisted_channel(&self, channel_id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
+       fn update_persisted_channel(&self, channel_id: OutPoint, update: &Option<ChannelMonitorUpdate>, data: &ChannelMonitor<ChannelSigner>, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>;
 }
 
 struct MonitorHolder<ChannelSigner: Sign> {
@@ -134,7 +153,31 @@ struct MonitorHolder<ChannelSigner: Sign> {
        /// update_persisted_channel, the user returns a TemporaryFailure, and then calls
        /// channel_monitor_updated immediately, racing our insertion of the pending update into the
        /// contained Vec.
+       ///
+       /// Beyond the synchronization of updates themselves, we cannot handle user events until after
+       /// any chain updates have been stored on disk. Thus, we scan this list when returning updates
+       /// to the ChannelManager, refusing to return any updates for a ChannelMonitor which is still
+       /// being persisted fully to disk after a chain update.
+       ///
+       /// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor
+       /// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping
+       /// the pending payment entry, and then reloading before the monitor is persisted, resulting in
+       /// the ChannelManager re-adding the same payment entry, before the same block is replayed,
+       /// resulting in a duplicate PaymentSent event.
        pending_monitor_updates: Mutex<Vec<MonitorUpdateId>>,
+       /// When the user returns a PermanentFailure error from an update_persisted_channel call during
+       /// block processing, we inform the ChannelManager that the channel should be closed
+       /// asynchronously. In order to ensure no further changes happen before the ChannelManager has
+       /// processed the closure event, we set this to true and return PermanentFailure for any other
+       /// chain::Watch events.
+       channel_perm_failed: AtomicBool,
+       /// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present
+       /// in `pending_monitor_updates`.
+       /// If it's been more than [`LATENCY_GRACE_PERIOD_BLOCKS`] since we started waiting on a chain
+       /// sync event, we let monitor events return to `ChannelManager` because we cannot hold them up
+       /// forever or we'll end up with HTLC preimages waiting to feed back into an upstream channel
+       /// forever, risking funds loss.
+       last_chain_persist_height: AtomicUsize,
 }
 
 impl<ChannelSigner: Sign> MonitorHolder<ChannelSigner> {
@@ -142,6 +185,10 @@ impl<ChannelSigner: Sign> MonitorHolder<ChannelSigner> {
                pending_monitor_updates_lock.iter().any(|update_id|
                        if let UpdateOrigin::OffChain(_) = update_id.contents { true } else { false })
        }
+       fn has_pending_chainsync_updates(&self, pending_monitor_updates_lock: &MutexGuard<Vec<MonitorUpdateId>>) -> bool {
+               pending_monitor_updates_lock.iter().any(|update_id|
+                       if let UpdateOrigin::ChainSync(_) = update_id.contents { true } else { false })
+       }
 }
 
 /// A read-only reference to a current ChannelMonitor.
@@ -177,12 +224,20 @@ pub struct ChainMonitor<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: De
         P::Target: Persist<ChannelSigner>,
 {
        monitors: RwLock<HashMap<OutPoint, MonitorHolder<ChannelSigner>>>,
+       /// When we generate a [`MonitorUpdateId`] for a chain-event monitor persistence, we need a
+       /// unique ID, which we calculate by simply getting the next value from this counter. Note that
+       /// the ID is never persisted so it's ok that they reset on restart.
+       sync_persistence_id: AtomicCounter,
        chain_source: Option<C>,
        broadcaster: T,
        logger: L,
        fee_estimator: F,
        persister: P,
+       /// "User-provided" (ie persistence-completion/-failed) [`MonitorEvent`]s. These came directly
+       /// from the user and not from a [`ChannelMonitor`].
        pending_monitor_events: Mutex<Vec<MonitorEvent>>,
+       /// The best block height seen, used as a proxy for the passage of time.
+       highest_chain_height: AtomicUsize,
 }
 
 impl<ChannelSigner: Sign, C: Deref, T: Deref, F: Deref, L: Deref, P: Deref> ChainMonitor<ChannelSigner, C, T, F, L, P>
@@ -201,31 +256,75 @@ where C::Target: chain::Filter,
        /// calls must not exclude any transactions matching the new outputs nor any in-block
        /// descendants of such transactions. It is not necessary to re-fetch the block to obtain
        /// updated `txdata`.
-       fn process_chain_data<FN>(&self, header: &BlockHeader, txdata: &TransactionData, process: FN)
+       ///
+       /// Calls which represent a new blockchain tip height should set `best_height`.
+       fn process_chain_data<FN>(&self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData, process: FN)
        where
                FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
        {
                let mut dependent_txdata = Vec::new();
-               let monitor_states = self.monitors.read().unwrap();
-               for monitor_state in monitor_states.values() {
-                       let mut txn_outputs = process(&monitor_state.monitor, txdata);
+               {
+                       let monitor_states = self.monitors.write().unwrap();
+                       if let Some(height) = best_height {
+                               // If the best block height is being updated, update highest_chain_height under the
+                               // monitors write lock.
+                               let old_height = self.highest_chain_height.load(Ordering::Acquire);
+                               let new_height = height as usize;
+                               if new_height > old_height {
+                                       self.highest_chain_height.store(new_height, Ordering::Release);
+                               }
+                       }
 
-                       // Register any new outputs with the chain source for filtering, storing any dependent
-                       // transactions from within the block that previously had not been included in txdata.
-                       if let Some(ref chain_source) = self.chain_source {
-                               let block_hash = header.block_hash();
-                               for (txid, mut outputs) in txn_outputs.drain(..) {
-                                       for (idx, output) in outputs.drain(..) {
-                                               // Register any new outputs with the chain source for filtering and recurse
-                                               // if it indicates that there are dependent transactions within the block
-                                               // that had not been previously included in txdata.
-                                               let output = WatchedOutput {
-                                                       block_hash: Some(block_hash),
-                                                       outpoint: OutPoint { txid, index: idx as u16 },
-                                                       script_pubkey: output.script_pubkey,
-                                               };
-                                               if let Some(tx) = chain_source.register_output(output) {
-                                                       dependent_txdata.push(tx);
+                       for (funding_outpoint, monitor_state) in monitor_states.iter() {
+                               let monitor = &monitor_state.monitor;
+                               let mut txn_outputs;
+                               {
+                                       txn_outputs = process(monitor, txdata);
+                                       let update_id = MonitorUpdateId {
+                                               contents: UpdateOrigin::ChainSync(self.sync_persistence_id.get_increment()),
+                                       };
+                                       let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
+                                       if let Some(height) = best_height {
+                                               if !monitor_state.has_pending_chainsync_updates(&pending_monitor_updates) {
+                                                       // If there are not ChainSync persists awaiting completion, go ahead and
+                                                       // set last_chain_persist_height here - we wouldn't want the first
+                                                       // TemporaryFailure to always immediately be considered "overly delayed".
+                                                       monitor_state.last_chain_persist_height.store(height as usize, Ordering::Release);
+                                               }
+                                       }
+
+                                       log_trace!(self.logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor));
+                                       match self.persister.update_persisted_channel(*funding_outpoint, &None, monitor, update_id) {
+                                               Ok(()) =>
+                                                       log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
+                                               Err(ChannelMonitorUpdateErr::PermanentFailure) => {
+                                                       monitor_state.channel_perm_failed.store(true, Ordering::Release);
+                                                       self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateFailed(*funding_outpoint));
+                                               },
+                                               Err(ChannelMonitorUpdateErr::TemporaryFailure) => {
+                                                       log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
+                                                       pending_monitor_updates.push(update_id);
+                                               },
+                                       }
+                               }
+
+                               // Register any new outputs with the chain source for filtering, storing any dependent
+                               // transactions from within the block that previously had not been included in txdata.
+                               if let Some(ref chain_source) = self.chain_source {
+                                       let block_hash = header.block_hash();
+                                       for (txid, mut outputs) in txn_outputs.drain(..) {
+                                               for (idx, output) in outputs.drain(..) {
+                                                       // Register any new outputs with the chain source for filtering and recurse
+                                                       // if it indicates that there are dependent transactions within the block
+                                                       // that had not been previously included in txdata.
+                                                       let output = WatchedOutput {
+                                                               block_hash: Some(block_hash),
+                                                               outpoint: OutPoint { txid, index: idx as u16 },
+                                                               script_pubkey: output.script_pubkey,
+                                                       };
+                                                       if let Some(tx) = chain_source.register_output(output) {
+                                                               dependent_txdata.push(tx);
+                                                       }
                                                }
                                        }
                                }
@@ -237,7 +336,7 @@ where C::Target: chain::Filter,
                        dependent_txdata.sort_unstable_by_key(|(index, _tx)| *index);
                        dependent_txdata.dedup_by_key(|(index, _tx)| *index);
                        let txdata: Vec<_> = dependent_txdata.iter().map(|(index, tx)| (*index, tx)).collect();
-                       self.process_chain_data(header, &txdata, process);
+                       self.process_chain_data(header, None, &txdata, process); // We skip the best height the second go-around
                }
        }
 
@@ -251,12 +350,14 @@ where C::Target: chain::Filter,
        pub fn new(chain_source: Option<C>, broadcaster: T, logger: L, feeest: F, persister: P) -> Self {
                Self {
                        monitors: RwLock::new(HashMap::new()),
+                       sync_persistence_id: AtomicCounter::new(),
                        chain_source,
                        broadcaster,
                        logger,
                        fee_estimator: feeest,
                        persister,
                        pending_monitor_events: Mutex::new(Vec::new()),
+                       highest_chain_height: AtomicUsize::new(0),
                }
        }
 
@@ -337,7 +438,7 @@ where C::Target: chain::Filter,
                pending_monitor_updates.retain(|update_id| *update_id != completed_update_id);
 
                match completed_update_id {
-                       MonitorUpdateId { .. } => {
+                       MonitorUpdateId { contents: UpdateOrigin::OffChain(_) } => {
                                // Note that we only check for `UpdateOrigin::OffChain` failures here - if
                                // we're being told that a `UpdateOrigin::OffChain` monitor update completed,
                                // we only care about ensuring we don't tell the `ChannelManager` to restore
@@ -348,8 +449,9 @@ where C::Target: chain::Filter,
                                // `MonitorEvent`s from the monitor back to the `ChannelManager` until they
                                // complete.
                                let monitor_is_pending_updates = monitor_data.has_pending_offchain_updates(&pending_monitor_updates);
-                               if monitor_is_pending_updates {
-                                       // If there are still monitor updates pending, we cannot yet construct an
+                               if monitor_is_pending_updates || monitor_data.channel_perm_failed.load(Ordering::Acquire) {
+                                       // If there are still monitor updates pending (or an old monitor update
+                                       // finished after a later one perm-failed), we cannot yet construct an
                                        // UpdateCompleted event.
                                        return Ok(());
                                }
@@ -357,7 +459,14 @@ where C::Target: chain::Filter,
                                        funding_txo,
                                        monitor_update_id: monitor_data.monitor.get_latest_update_id(),
                                });
-                       }
+                       },
+                       MonitorUpdateId { contents: UpdateOrigin::ChainSync(_) } => {
+                               if !monitor_data.has_pending_chainsync_updates(&pending_monitor_updates) {
+                                       monitor_data.last_chain_persist_height.store(self.highest_chain_height.load(Ordering::Acquire), Ordering::Release);
+                                       // The next time release_pending_monitor_events is called, any events for this
+                                       // ChannelMonitor will be returned.
+                               }
+                       },
                }
                Ok(())
        }
@@ -396,7 +505,7 @@ where
                let header = &block.header;
                let txdata: Vec<_> = block.txdata.iter().enumerate().collect();
                log_debug!(self.logger, "New best block {} at height {} provided via block_connected", header.block_hash(), height);
-               self.process_chain_data(header, &txdata, |monitor, txdata| {
+               self.process_chain_data(header, Some(height), &txdata, |monitor, txdata| {
                        monitor.block_connected(
                                header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger)
                });
@@ -423,7 +532,7 @@ where
 {
        fn transactions_confirmed(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) {
                log_debug!(self.logger, "{} provided transactions confirmed at height {} in block {}", txdata.len(), height, header.block_hash());
-               self.process_chain_data(header, txdata, |monitor, txdata| {
+               self.process_chain_data(header, None, txdata, |monitor, txdata| {
                        monitor.transactions_confirmed(
                                header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger)
                });
@@ -439,7 +548,7 @@ where
 
        fn best_block_updated(&self, header: &BlockHeader, height: u32) {
                log_debug!(self.logger, "New best block {} at height {} provided via best_block_updated", header.block_hash(), height);
-               self.process_chain_data(header, &[], |monitor, txdata| {
+               self.process_chain_data(header, Some(height), &[], |monitor, txdata| {
                        // While in practice there shouldn't be any recursive calls when given empty txdata,
                        // it's still possible if a chain::Filter implementation returns a transaction.
                        debug_assert!(txdata.is_empty());
@@ -502,7 +611,12 @@ where C::Target: chain::Filter,
                                monitor.load_outputs_to_watch(chain_source);
                        }
                }
-               entry.insert(MonitorHolder { monitor, pending_monitor_updates: Mutex::new(pending_monitor_updates) });
+               entry.insert(MonitorHolder {
+                       monitor,
+                       pending_monitor_updates: Mutex::new(pending_monitor_updates),
+                       channel_perm_failed: AtomicBool::new(false),
+                       last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)),
+               });
                persist_res
        }
 
@@ -534,15 +648,19 @@ where C::Target: chain::Filter,
                                // still be changed. So, persist the updated monitor despite the error.
                                let update_id = MonitorUpdateId::from_monitor_update(&update);
                                let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
-                               let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor, update_id);
+                               let persist_res = self.persister.update_persisted_channel(funding_txo, &Some(update), monitor, update_id);
                                if let Err(e) = persist_res {
                                        if e == ChannelMonitorUpdateErr::TemporaryFailure {
                                                pending_monitor_updates.push(update_id);
+                                       } else {
+                                               monitor_state.channel_perm_failed.store(true, Ordering::Release);
                                        }
                                        log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e);
                                }
                                if update_res.is_err() {
                                        Err(ChannelMonitorUpdateErr::PermanentFailure)
+                               } else if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
+                                       Err(ChannelMonitorUpdateErr::PermanentFailure)
                                } else {
                                        persist_res
                                }
@@ -553,7 +671,31 @@ where C::Target: chain::Filter,
        fn release_pending_monitor_events(&self) -> Vec<MonitorEvent> {
                let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0);
                for monitor_state in self.monitors.read().unwrap().values() {
-                       pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events());
+                       let is_pending_monitor_update = monitor_state.has_pending_chainsync_updates(&monitor_state.pending_monitor_updates.lock().unwrap());
+                       if is_pending_monitor_update &&
+                                       monitor_state.last_chain_persist_height.load(Ordering::Acquire) + LATENCY_GRACE_PERIOD_BLOCKS as usize
+                                               > self.highest_chain_height.load(Ordering::Acquire)
+                       {
+                               log_info!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!");
+                       } else {
+                               if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
+                                       // If a `UpdateOrigin::ChainSync` persistence failed with `PermanantFailure`,
+                                       // we don't really know if the latest `ChannelMonitor` state is on disk or not.
+                                       // We're supposed to hold monitor updates until the latest state is on disk to
+                                       // avoid duplicate events, but the user told us persistence is screw-y and may
+                                       // not complete. We can't hold events forever because we may learn some payment
+                                       // preimage, so instead we just log and hope the user complied with the
+                                       // `PermanentFailure` requirements of having at least the local-disk copy
+                                       // updated.
+                                       log_info!(self.logger, "A Channel Monitor sync returned PermanentFailure. Returning monitor events but duplicate events may appear after reload!");
+                               }
+                               if is_pending_monitor_update {
+                                       log_error!(self.logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS);
+                                       log_error!(self.logger, "   To avoid funds-loss, we are allowing monitor updates to be released.");
+                                       log_error!(self.logger, "   This may cause duplicate payment events to be generated.");
+                               }
+                               pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events());
+                       }
                }
                pending_monitor_events
        }