X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning%2Fsrc%2Fchain%2Fchainmonitor.rs;h=aae260e735bdbae5c0a538af8024e7e2ef2a78cb;hb=f53d13bcb8220b3ce39e51a4d20beb23b3930d1f;hp=9e92264b0425dffe07049d9acaf4c80e75df20bc;hpb=4500270488e6ed918c5f6e07310eb4a384eb6e21;p=rust-lightning diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 9e92264b..aae260e7 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -23,54 +23,104 @@ //! events. The remote server would make use of [`ChainMonitor`] for block processing and for //! servicing [`ChannelMonitor`] updates from the client. -use bitcoin::blockdata::block::{Block, BlockHeader}; +use bitcoin::blockdata::block::BlockHeader; use bitcoin::hash_types::Txid; use chain; use chain::{ChannelMonitorUpdateErr, Filter, WatchedOutput}; use chain::chaininterface::{BroadcasterInterface, FeeEstimator}; -use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs}; +use chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, Balance, MonitorEvent, TransactionOutputs, LATENCY_GRACE_PERIOD_BLOCKS}; use chain::transaction::{OutPoint, TransactionData}; use chain::keysinterface::Sign; +use util::atomic_counter::AtomicCounter; use util::logger::Logger; +use util::errors::APIError; use util::events; use util::events::EventHandler; use ln::channelmanager::ChannelDetails; use prelude::*; -use sync::{RwLock, RwLockReadGuard, Mutex}; +use sync::{RwLock, RwLockReadGuard, Mutex, MutexGuard}; use core::ops::Deref; +use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents +/// entirely opaque. +enum UpdateOrigin { + /// An update that was generated by the `ChannelManager` (via our `chain::Watch` + /// implementation). This corresponds to an actual [`ChannelMonitorUpdate::update_id`] field + /// and [`ChannelMonitor::get_latest_update_id`]. + OffChain(u64), + /// An update that was generated during blockchain processing. The ID here is specific to the + /// generating [`ChainMonitor`] and does *not* correspond to any on-disk IDs. + ChainSync(u64), +} + +/// An opaque identifier describing a specific [`Persist`] method call. +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub struct MonitorUpdateId { + contents: UpdateOrigin, +} + +impl MonitorUpdateId { + pub(crate) fn from_monitor_update(update: &ChannelMonitorUpdate) -> Self { + Self { contents: UpdateOrigin::OffChain(update.update_id) } + } + pub(crate) fn from_new_monitor(monitor: &ChannelMonitor) -> Self { + Self { contents: UpdateOrigin::OffChain(monitor.get_latest_update_id()) } + } +} /// `Persist` defines behavior for persisting channel monitors: this could mean /// writing once to disk, and/or uploading to one or more backup services. /// -/// Note that for every new monitor, you **must** persist the new `ChannelMonitor` -/// to disk/backups. And, on every update, you **must** persist either the -/// `ChannelMonitorUpdate` or the updated monitor itself. Otherwise, there is risk -/// of situations such as revoking a transaction, then crashing before this -/// revocation can be persisted, then unintentionally broadcasting a revoked -/// transaction and losing money. This is a risk because previous channel states -/// are toxic, so it's important that whatever channel state is persisted is -/// kept up-to-date. +/// Each method can return three possible values: +/// * If persistence (including any relevant `fsync()` calls) happens immediately, the +/// implementation should return `Ok(())`, indicating normal channel operation should continue. +/// * If persistence happens asynchronously, implementations should first ensure the +/// [`ChannelMonitor`] or [`ChannelMonitorUpdate`] are written durably to disk, and then return +/// `Err(ChannelMonitorUpdateErr::TemporaryFailure)` while the update continues in the +/// background. Once the update completes, [`ChainMonitor::channel_monitor_updated`] should be +/// called with the corresponding [`MonitorUpdateId`]. +/// +/// Note that unlike the direct [`chain::Watch`] interface, +/// [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs. +/// +/// * If persistence fails for some reason, implementations should return +/// `Err(ChannelMonitorUpdateErr::PermanentFailure)`, in which case the channel will likely be +/// closed without broadcasting the latest state. See +/// [`ChannelMonitorUpdateErr::PermanentFailure`] for more details. pub trait Persist { - /// Persist a new channel's data. The data can be stored any way you want, but - /// the identifier provided by Rust-Lightning is the channel's outpoint (and - /// it is up to you to maintain a correct mapping between the outpoint and the - /// stored channel data). Note that you **must** persist every new monitor to - /// disk. See the `Persist` trait documentation for more details. + /// Persist a new channel's data in response to a [`chain::Watch::watch_channel`] call. This is + /// called by [`ChannelManager`] for new channels, or may be called directly, e.g. on startup. + /// + /// The data can be stored any way you want, but the identifier provided by LDK is the + /// channel's outpoint (and it is up to you to maintain a correct mapping between the outpoint + /// and the stored channel data). Note that you **must** persist every new monitor to disk. + /// + /// The `update_id` is used to identify this call to [`ChainMonitor::channel_monitor_updated`], + /// if you return [`ChannelMonitorUpdateErr::TemporaryFailure`]. /// /// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor` /// and [`ChannelMonitorUpdateErr`] for requirements when returning errors. /// + /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager /// [`Writeable::write`]: crate::util::ser::Writeable::write - fn persist_new_channel(&self, id: OutPoint, data: &ChannelMonitor) -> Result<(), ChannelMonitorUpdateErr>; + fn persist_new_channel(&self, channel_id: OutPoint, data: &ChannelMonitor, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>; - /// Update one channel's data. The provided `ChannelMonitor` has already - /// applied the given update. + /// Update one channel's data. The provided [`ChannelMonitor`] has already applied the given + /// update. /// - /// Note that on every update, you **must** persist either the - /// `ChannelMonitorUpdate` or the updated monitor itself to disk/backups. See - /// the `Persist` trait documentation for more details. + /// Note that on every update, you **must** persist either the [`ChannelMonitorUpdate`] or the + /// updated monitor itself to disk/backups. See the [`Persist`] trait documentation for more + /// details. + /// + /// During blockchain synchronization operations, this may be called with no + /// [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted. + /// Note that after the full [`ChannelMonitor`] is persisted any previous + /// [`ChannelMonitorUpdate`]s which were persisted should be discarded - they can no longer be + /// applied to the persisted [`ChannelMonitor`] as they were already applied. /// /// If an implementer chooses to persist the updates only, they need to make /// sure that all the updates are applied to the `ChannelMonitors` *before* @@ -84,16 +134,61 @@ pub trait Persist { /// them in batches. The size of each monitor grows `O(number of state updates)` /// whereas updates are small and `O(1)`. /// + /// The `update_id` is used to identify this call to [`ChainMonitor::channel_monitor_updated`], + /// if you return [`ChannelMonitorUpdateErr::TemporaryFailure`]. + /// /// See [`Writeable::write`] on [`ChannelMonitor`] for writing out a `ChannelMonitor`, /// [`Writeable::write`] on [`ChannelMonitorUpdate`] for writing out an update, and /// [`ChannelMonitorUpdateErr`] for requirements when returning errors. /// /// [`Writeable::write`]: crate::util::ser::Writeable::write - fn update_persisted_channel(&self, id: OutPoint, update: &ChannelMonitorUpdate, data: &ChannelMonitor) -> Result<(), ChannelMonitorUpdateErr>; + fn update_persisted_channel(&self, channel_id: OutPoint, update: &Option, data: &ChannelMonitor, update_id: MonitorUpdateId) -> Result<(), ChannelMonitorUpdateErr>; } struct MonitorHolder { monitor: ChannelMonitor, + /// The full set of pending monitor updates for this Channel. + /// + /// Note that this lock must be held during updates to prevent a race where we call + /// update_persisted_channel, the user returns a TemporaryFailure, and then calls + /// channel_monitor_updated immediately, racing our insertion of the pending update into the + /// contained Vec. + /// + /// Beyond the synchronization of updates themselves, we cannot handle user events until after + /// any chain updates have been stored on disk. Thus, we scan this list when returning updates + /// to the ChannelManager, refusing to return any updates for a ChannelMonitor which is still + /// being persisted fully to disk after a chain update. + /// + /// This avoids the possibility of handling, e.g. an on-chain claim, generating a claim monitor + /// event, resulting in the relevant ChannelManager generating a PaymentSent event and dropping + /// the pending payment entry, and then reloading before the monitor is persisted, resulting in + /// the ChannelManager re-adding the same payment entry, before the same block is replayed, + /// resulting in a duplicate PaymentSent event. + pending_monitor_updates: Mutex>, + /// When the user returns a PermanentFailure error from an update_persisted_channel call during + /// block processing, we inform the ChannelManager that the channel should be closed + /// asynchronously. In order to ensure no further changes happen before the ChannelManager has + /// processed the closure event, we set this to true and return PermanentFailure for any other + /// chain::Watch events. + channel_perm_failed: AtomicBool, + /// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present + /// in `pending_monitor_updates`. + /// If it's been more than [`LATENCY_GRACE_PERIOD_BLOCKS`] since we started waiting on a chain + /// sync event, we let monitor events return to `ChannelManager` because we cannot hold them up + /// forever or we'll end up with HTLC preimages waiting to feed back into an upstream channel + /// forever, risking funds loss. + last_chain_persist_height: AtomicUsize, +} + +impl MonitorHolder { + fn has_pending_offchain_updates(&self, pending_monitor_updates_lock: &MutexGuard>) -> bool { + pending_monitor_updates_lock.iter().any(|update_id| + if let UpdateOrigin::OffChain(_) = update_id.contents { true } else { false }) + } + fn has_pending_chainsync_updates(&self, pending_monitor_updates_lock: &MutexGuard>) -> bool { + pending_monitor_updates_lock.iter().any(|update_id| + if let UpdateOrigin::ChainSync(_) = update_id.contents { true } else { false }) + } } /// A read-only reference to a current ChannelMonitor. @@ -129,12 +224,20 @@ pub struct ChainMonitor, { monitors: RwLock>>, + /// When we generate a [`MonitorUpdateId`] for a chain-event monitor persistence, we need a + /// unique ID, which we calculate by simply getting the next value from this counter. Note that + /// the ID is never persisted so it's ok that they reset on restart. + sync_persistence_id: AtomicCounter, chain_source: Option, broadcaster: T, logger: L, fee_estimator: F, persister: P, + /// "User-provided" (ie persistence-completion/-failed) [`MonitorEvent`]s. These came directly + /// from the user and not from a [`ChannelMonitor`]. pending_monitor_events: Mutex>, + /// The best block height seen, used as a proxy for the passage of time. + highest_chain_height: AtomicUsize, } impl ChainMonitor @@ -153,31 +256,75 @@ where C::Target: chain::Filter, /// calls must not exclude any transactions matching the new outputs nor any in-block /// descendants of such transactions. It is not necessary to re-fetch the block to obtain /// updated `txdata`. - fn process_chain_data(&self, header: &BlockHeader, txdata: &TransactionData, process: FN) + /// + /// Calls which represent a new blockchain tip height should set `best_height`. + fn process_chain_data(&self, header: &BlockHeader, best_height: Option, txdata: &TransactionData, process: FN) where FN: Fn(&ChannelMonitor, &TransactionData) -> Vec { let mut dependent_txdata = Vec::new(); - let monitor_states = self.monitors.read().unwrap(); - for monitor_state in monitor_states.values() { - let mut txn_outputs = process(&monitor_state.monitor, txdata); - - // Register any new outputs with the chain source for filtering, storing any dependent - // transactions from within the block that previously had not been included in txdata. - if let Some(ref chain_source) = self.chain_source { - let block_hash = header.block_hash(); - for (txid, mut outputs) in txn_outputs.drain(..) { - for (idx, output) in outputs.drain(..) { - // Register any new outputs with the chain source for filtering and recurse - // if it indicates that there are dependent transactions within the block - // that had not been previously included in txdata. - let output = WatchedOutput { - block_hash: Some(block_hash), - outpoint: OutPoint { txid, index: idx as u16 }, - script_pubkey: output.script_pubkey, - }; - if let Some(tx) = chain_source.register_output(output) { - dependent_txdata.push(tx); + { + let monitor_states = self.monitors.write().unwrap(); + if let Some(height) = best_height { + // If the best block height is being updated, update highest_chain_height under the + // monitors write lock. + let old_height = self.highest_chain_height.load(Ordering::Acquire); + let new_height = height as usize; + if new_height > old_height { + self.highest_chain_height.store(new_height, Ordering::Release); + } + } + + for (funding_outpoint, monitor_state) in monitor_states.iter() { + let monitor = &monitor_state.monitor; + let mut txn_outputs; + { + txn_outputs = process(monitor, txdata); + let update_id = MonitorUpdateId { + contents: UpdateOrigin::ChainSync(self.sync_persistence_id.get_increment()), + }; + let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap(); + if let Some(height) = best_height { + if !monitor_state.has_pending_chainsync_updates(&pending_monitor_updates) { + // If there are not ChainSync persists awaiting completion, go ahead and + // set last_chain_persist_height here - we wouldn't want the first + // TemporaryFailure to always immediately be considered "overly delayed". + monitor_state.last_chain_persist_height.store(height as usize, Ordering::Release); + } + } + + log_trace!(self.logger, "Syncing Channel Monitor for channel {}", log_funding_info!(monitor)); + match self.persister.update_persisted_channel(*funding_outpoint, &None, monitor, update_id) { + Ok(()) => + log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)), + Err(ChannelMonitorUpdateErr::PermanentFailure) => { + monitor_state.channel_perm_failed.store(true, Ordering::Release); + self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateFailed(*funding_outpoint)); + }, + Err(ChannelMonitorUpdateErr::TemporaryFailure) => { + log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor)); + pending_monitor_updates.push(update_id); + }, + } + } + + // Register any new outputs with the chain source for filtering, storing any dependent + // transactions from within the block that previously had not been included in txdata. + if let Some(ref chain_source) = self.chain_source { + let block_hash = header.block_hash(); + for (txid, mut outputs) in txn_outputs.drain(..) { + for (idx, output) in outputs.drain(..) { + // Register any new outputs with the chain source for filtering and recurse + // if it indicates that there are dependent transactions within the block + // that had not been previously included in txdata. + let output = WatchedOutput { + block_hash: Some(block_hash), + outpoint: OutPoint { txid, index: idx as u16 }, + script_pubkey: output.script_pubkey, + }; + if let Some(tx) = chain_source.register_output(output) { + dependent_txdata.push(tx); + } } } } @@ -189,7 +336,7 @@ where C::Target: chain::Filter, dependent_txdata.sort_unstable_by_key(|(index, _tx)| *index); dependent_txdata.dedup_by_key(|(index, _tx)| *index); let txdata: Vec<_> = dependent_txdata.iter().map(|(index, tx)| (*index, tx)).collect(); - self.process_chain_data(header, &txdata, process); + self.process_chain_data(header, None, &txdata, process); // We skip the best height the second go-around } } @@ -203,12 +350,14 @@ where C::Target: chain::Filter, pub fn new(chain_source: Option, broadcaster: T, logger: L, feeest: F, persister: P) -> Self { Self { monitors: RwLock::new(HashMap::new()), + sync_persistence_id: AtomicCounter::new(), chain_source, broadcaster, logger, fee_estimator: feeest, persister, pending_monitor_events: Mutex::new(Vec::new()), + highest_chain_height: AtomicUsize::new(0), } } @@ -267,27 +416,73 @@ where C::Target: chain::Filter, /// Indicates the persistence of a [`ChannelMonitor`] has completed after /// [`ChannelMonitorUpdateErr::TemporaryFailure`] was returned from an update operation. /// - /// All ChannelMonitor updates up to and including highest_applied_update_id must have been - /// fully committed in every copy of the given channels' ChannelMonitors. - /// - /// Note that there is no effect to calling with a highest_applied_update_id other than the - /// current latest ChannelMonitorUpdate and one call to this function after multiple - /// ChannelMonitorUpdateErr::TemporaryFailures is fine. The highest_applied_update_id field - /// exists largely only to prevent races between this and concurrent update_monitor calls. - /// /// Thus, the anticipated use is, at a high level: /// 1) This [`ChainMonitor`] calls [`Persist::update_persisted_channel`] which stores the /// update to disk and begins updating any remote (e.g. watchtower/backup) copies, /// returning [`ChannelMonitorUpdateErr::TemporaryFailure`], - /// 2) once all remote copies are updated, you call this function with the update_id that - /// completed, and once it is the latest the Channel will be re-enabled. - pub fn channel_monitor_updated(&self, funding_txo: OutPoint, highest_applied_update_id: u64) { + /// 2) once all remote copies are updated, you call this function with the + /// `completed_update_id` that completed, and once all pending updates have completed the + /// channel will be re-enabled. + // Note that we re-enable only after `UpdateOrigin::OffChain` updates complete, we don't + // care about `UpdateOrigin::ChainSync` updates for the channel state being updated. We + // only care about `UpdateOrigin::ChainSync` for returning `MonitorEvent`s. + /// + /// Returns an [`APIError::APIMisuseError`] if `funding_txo` does not match any currently + /// registered [`ChannelMonitor`]s. + pub fn channel_monitor_updated(&self, funding_txo: OutPoint, completed_update_id: MonitorUpdateId) -> Result<(), APIError> { + let monitors = self.monitors.read().unwrap(); + let monitor_data = if let Some(mon) = monitors.get(&funding_txo) { mon } else { + return Err(APIError::APIMisuseError { err: format!("No ChannelMonitor matching funding outpoint {:?} found", funding_txo) }); + }; + let mut pending_monitor_updates = monitor_data.pending_monitor_updates.lock().unwrap(); + pending_monitor_updates.retain(|update_id| *update_id != completed_update_id); + + match completed_update_id { + MonitorUpdateId { contents: UpdateOrigin::OffChain(_) } => { + // Note that we only check for `UpdateOrigin::OffChain` failures here - if + // we're being told that a `UpdateOrigin::OffChain` monitor update completed, + // we only care about ensuring we don't tell the `ChannelManager` to restore + // the channel to normal operation until all `UpdateOrigin::OffChain` updates + // complete. + // If there's some `UpdateOrigin::ChainSync` update still pending that's okay + // - we can still update our channel state, just as long as we don't return + // `MonitorEvent`s from the monitor back to the `ChannelManager` until they + // complete. + let monitor_is_pending_updates = monitor_data.has_pending_offchain_updates(&pending_monitor_updates); + if monitor_is_pending_updates || monitor_data.channel_perm_failed.load(Ordering::Acquire) { + // If there are still monitor updates pending (or an old monitor update + // finished after a later one perm-failed), we cannot yet construct an + // UpdateCompleted event. + return Ok(()); + } + self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateCompleted { + funding_txo, + monitor_update_id: monitor_data.monitor.get_latest_update_id(), + }); + }, + MonitorUpdateId { contents: UpdateOrigin::ChainSync(_) } => { + if !monitor_data.has_pending_chainsync_updates(&pending_monitor_updates) { + monitor_data.last_chain_persist_height.store(self.highest_chain_height.load(Ordering::Acquire), Ordering::Release); + // The next time release_pending_monitor_events is called, any events for this + // ChannelMonitor will be returned. + } + }, + } + Ok(()) + } + + /// This wrapper avoids having to update some of our tests for now as they assume the direct + /// chain::Watch API wherein we mark a monitor fully-updated by just calling + /// channel_monitor_updated once with the highest ID. + #[cfg(any(test, fuzzing))] + pub fn force_channel_monitor_updated(&self, funding_txo: OutPoint, monitor_update_id: u64) { self.pending_monitor_events.lock().unwrap().push(MonitorEvent::UpdateCompleted { - funding_txo, monitor_update_id: highest_applied_update_id + funding_txo, + monitor_update_id, }); } - #[cfg(any(test, feature = "fuzztarget", feature = "_test_utils"))] + #[cfg(any(test, fuzzing, feature = "_test_utils"))] pub fn get_and_clear_pending_events(&self) -> Vec { use util::events::EventsProvider; let events = core::cell::RefCell::new(Vec::new()); @@ -306,11 +501,9 @@ where L::Target: Logger, P::Target: Persist, { - fn block_connected(&self, block: &Block, height: u32) { - let header = &block.header; - let txdata: Vec<_> = block.txdata.iter().enumerate().collect(); + fn filtered_block_connected(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) { log_debug!(self.logger, "New best block {} at height {} provided via block_connected", header.block_hash(), height); - self.process_chain_data(header, &txdata, |monitor, txdata| { + self.process_chain_data(header, Some(height), &txdata, |monitor, txdata| { monitor.block_connected( header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger) }); @@ -337,7 +530,7 @@ where { fn transactions_confirmed(&self, header: &BlockHeader, txdata: &TransactionData, height: u32) { log_debug!(self.logger, "{} provided transactions confirmed at height {} in block {}", txdata.len(), height, header.block_hash()); - self.process_chain_data(header, txdata, |monitor, txdata| { + self.process_chain_data(header, None, txdata, |monitor, txdata| { monitor.transactions_confirmed( header, txdata, height, &*self.broadcaster, &*self.fee_estimator, &*self.logger) }); @@ -353,7 +546,7 @@ where fn best_block_updated(&self, header: &BlockHeader, height: u32) { log_debug!(self.logger, "New best block {} at height {} provided via best_block_updated", header.block_hash(), height); - self.process_chain_data(header, &[], |monitor, txdata| { + self.process_chain_data(header, Some(height), &[], |monitor, txdata| { // While in practice there shouldn't be any recursive calls when given empty txdata, // it's still possible if a chain::Filter implementation returns a transaction. debug_assert!(txdata.is_empty()); @@ -397,22 +590,29 @@ where C::Target: chain::Filter, return Err(ChannelMonitorUpdateErr::PermanentFailure)}, hash_map::Entry::Vacant(e) => e, }; - let persist_res = self.persister.persist_new_channel(funding_outpoint, &monitor); + log_trace!(self.logger, "Got new ChannelMonitor for channel {}", log_funding_info!(monitor)); + let update_id = MonitorUpdateId::from_new_monitor(&monitor); + let mut pending_monitor_updates = Vec::new(); + let persist_res = self.persister.persist_new_channel(funding_outpoint, &monitor, update_id); if persist_res.is_err() { - log_error!(self.logger, "Failed to persist new channel data: {:?}", persist_res); + log_error!(self.logger, "Failed to persist new ChannelMonitor for channel {}: {:?}", log_funding_info!(monitor), persist_res); + } else { + log_trace!(self.logger, "Finished persisting new ChannelMonitor for channel {}", log_funding_info!(monitor)); } if persist_res == Err(ChannelMonitorUpdateErr::PermanentFailure) { return persist_res; + } else if persist_res.is_err() { + pending_monitor_updates.push(update_id); } - { - let funding_txo = monitor.get_funding_txo(); - log_trace!(self.logger, "Got new Channel Monitor for channel {}", log_bytes!(funding_txo.0.to_channel_id()[..])); - - if let Some(ref chain_source) = self.chain_source { - monitor.load_outputs_to_watch(chain_source); - } + if let Some(ref chain_source) = self.chain_source { + monitor.load_outputs_to_watch(chain_source); } - entry.insert(MonitorHolder { monitor }); + entry.insert(MonitorHolder { + monitor, + pending_monitor_updates: Mutex::new(pending_monitor_updates), + channel_perm_failed: AtomicBool::new(false), + last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)), + }); persist_res } @@ -428,26 +628,37 @@ where C::Target: chain::Filter, // We should never ever trigger this from within ChannelManager. Technically a // user could use this object with some proxying in between which makes this // possible, but in tests and fuzzing, this should be a panic. - #[cfg(any(test, feature = "fuzztarget"))] + #[cfg(any(test, fuzzing))] panic!("ChannelManager generated a channel update for a channel that was not yet registered!"); - #[cfg(not(any(test, feature = "fuzztarget")))] + #[cfg(not(any(test, fuzzing)))] Err(ChannelMonitorUpdateErr::PermanentFailure) }, Some(monitor_state) => { let monitor = &monitor_state.monitor; - log_trace!(self.logger, "Updating Channel Monitor for channel {}", log_funding_info!(monitor)); + log_trace!(self.logger, "Updating ChannelMonitor for channel {}", log_funding_info!(monitor)); let update_res = monitor.update_monitor(&update, &self.broadcaster, &self.fee_estimator, &self.logger); - if let Err(e) = &update_res { - log_error!(self.logger, "Failed to update channel monitor: {:?}", e); + if update_res.is_err() { + log_error!(self.logger, "Failed to update ChannelMonitor for channel {}.", log_funding_info!(monitor)); } // Even if updating the monitor returns an error, the monitor's state will // still be changed. So, persist the updated monitor despite the error. - let persist_res = self.persister.update_persisted_channel(funding_txo, &update, monitor); - if let Err(ref e) = persist_res { - log_error!(self.logger, "Failed to persist channel monitor update: {:?}", e); + let update_id = MonitorUpdateId::from_monitor_update(&update); + let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap(); + let persist_res = self.persister.update_persisted_channel(funding_txo, &Some(update), monitor, update_id); + if let Err(e) = persist_res { + if e == ChannelMonitorUpdateErr::TemporaryFailure { + pending_monitor_updates.push(update_id); + } else { + monitor_state.channel_perm_failed.store(true, Ordering::Release); + } + log_error!(self.logger, "Failed to persist ChannelMonitor update for channel {}: {:?}", log_funding_info!(monitor), e); + } else { + log_trace!(self.logger, "Finished persisting ChannelMonitor update for channel {}", log_funding_info!(monitor)); } if update_res.is_err() { Err(ChannelMonitorUpdateErr::PermanentFailure) + } else if monitor_state.channel_perm_failed.load(Ordering::Acquire) { + Err(ChannelMonitorUpdateErr::PermanentFailure) } else { persist_res } @@ -458,7 +669,31 @@ where C::Target: chain::Filter, fn release_pending_monitor_events(&self) -> Vec { let mut pending_monitor_events = self.pending_monitor_events.lock().unwrap().split_off(0); for monitor_state in self.monitors.read().unwrap().values() { - pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events()); + let is_pending_monitor_update = monitor_state.has_pending_chainsync_updates(&monitor_state.pending_monitor_updates.lock().unwrap()); + if is_pending_monitor_update && + monitor_state.last_chain_persist_height.load(Ordering::Acquire) + LATENCY_GRACE_PERIOD_BLOCKS as usize + > self.highest_chain_height.load(Ordering::Acquire) + { + log_info!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!"); + } else { + if monitor_state.channel_perm_failed.load(Ordering::Acquire) { + // If a `UpdateOrigin::ChainSync` persistence failed with `PermanantFailure`, + // we don't really know if the latest `ChannelMonitor` state is on disk or not. + // We're supposed to hold monitor updates until the latest state is on disk to + // avoid duplicate events, but the user told us persistence is screw-y and may + // not complete. We can't hold events forever because we may learn some payment + // preimage, so instead we just log and hope the user complied with the + // `PermanentFailure` requirements of having at least the local-disk copy + // updated. + log_info!(self.logger, "A Channel Monitor sync returned PermanentFailure. Returning monitor events but duplicate events may appear after reload!"); + } + if is_pending_monitor_update { + log_error!(self.logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS); + log_error!(self.logger, " To avoid funds-loss, we are allowing monitor updates to be released."); + log_error!(self.logger, " This may cause duplicate payment events to be generated."); + } + pending_monitor_events.append(&mut monitor_state.monitor.get_and_clear_pending_monitor_events()); + } } pending_monitor_events } @@ -490,10 +725,18 @@ impl even #[cfg(test)] mod tests { - use ::{check_added_monitors, get_local_commitment_txn}; + use bitcoin::BlockHeader; + use ::{check_added_monitors, check_closed_broadcast, check_closed_event}; + use ::{expect_payment_sent, expect_payment_sent_without_paths, expect_payment_path_successful, get_event_msg}; + use ::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err}; + use chain::{ChannelMonitorUpdateErr, Confirm, Watch}; + use chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS; + use ln::channelmanager::PaymentSendFailure; use ln::features::InitFeatures; use ln::functional_test_utils::*; - use util::events::MessageSendEventsProvider; + use ln::msgs::ChannelMessageHandler; + use util::errors::APIError; + use util::events::{ClosureReason, MessageSendEvent, MessageSendEventsProvider}; use util::test_utils::{OnRegisterOutput, TxOutReference}; /// Tests that in-block dependent transactions are processed by `block_connected` when not @@ -538,4 +781,180 @@ mod tests { nodes[1].node.get_and_clear_pending_msg_events(); nodes[1].node.get_and_clear_pending_events(); } + + #[test] + fn test_async_ooo_offchain_updates() { + // Test that if we have multiple offchain updates being persisted and they complete + // out-of-order, the ChainMonitor waits until all have completed before informing the + // ChannelManager. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let nodes = create_network(2, &node_cfgs, &node_chanmgrs); + create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known()); + + // Route two payments to be claimed at the same time. + let payment_preimage_1 = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0; + let payment_preimage_2 = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0; + + chanmon_cfgs[1].persister.offchain_monitor_updates.lock().unwrap().clear(); + chanmon_cfgs[1].persister.set_update_ret(Err(ChannelMonitorUpdateErr::TemporaryFailure)); + + nodes[1].node.claim_funds(payment_preimage_1); + check_added_monitors!(nodes[1], 1); + nodes[1].node.claim_funds(payment_preimage_2); + check_added_monitors!(nodes[1], 1); + + chanmon_cfgs[1].persister.set_update_ret(Ok(())); + + let persistences = chanmon_cfgs[1].persister.offchain_monitor_updates.lock().unwrap().clone(); + assert_eq!(persistences.len(), 1); + let (funding_txo, updates) = persistences.iter().next().unwrap(); + assert_eq!(updates.len(), 2); + + // Note that updates is a HashMap so the ordering here is actually random. This shouldn't + // fail either way but if it fails intermittently it's depending on the ordering of updates. + let mut update_iter = updates.iter(); + nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(*funding_txo, update_iter.next().unwrap().clone()).unwrap(); + assert!(nodes[1].chain_monitor.release_pending_monitor_events().is_empty()); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + nodes[1].chain_monitor.chain_monitor.channel_monitor_updated(*funding_txo, update_iter.next().unwrap().clone()).unwrap(); + + // Now manually walk the commitment signed dance - because we claimed two payments + // back-to-back it doesn't fit into the neat walk commitment_signed_dance does. + + let updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()); + nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]); + expect_payment_sent_without_paths!(nodes[0], payment_preimage_1); + nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &updates.commitment_signed); + check_added_monitors!(nodes[0], 1); + let (as_first_raa, as_first_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id()); + + nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_first_raa); + check_added_monitors!(nodes[1], 1); + let bs_second_updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id()); + nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_first_update); + check_added_monitors!(nodes[1], 1); + let bs_first_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id()); + + nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_second_updates.update_fulfill_htlcs[0]); + expect_payment_sent_without_paths!(nodes[0], payment_preimage_2); + nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_updates.commitment_signed); + check_added_monitors!(nodes[0], 1); + nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_first_raa); + expect_payment_path_successful!(nodes[0]); + check_added_monitors!(nodes[0], 1); + let (as_second_raa, as_second_update) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id()); + + nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_second_raa); + check_added_monitors!(nodes[1], 1); + nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_second_update); + check_added_monitors!(nodes[1], 1); + let bs_second_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id()); + + nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_second_raa); + expect_payment_path_successful!(nodes[0]); + check_added_monitors!(nodes[0], 1); + } + + fn do_chainsync_pauses_events(block_timeout: bool) { + // When a chainsync monitor update occurs, any MonitorUpdates should be held before being + // passed upstream to a `ChannelManager` via `Watch::release_pending_monitor_events`. This + // tests that behavior, as well as some ways it might go wrong. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let nodes = create_network(2, &node_cfgs, &node_chanmgrs); + let channel = create_announced_chan_between_nodes( + &nodes, 0, 1, InitFeatures::known(), InitFeatures::known()); + + // Get a route for later and rebalance the channel somewhat + send_payment(&nodes[0], &[&nodes[1]], 10_000_000); + let (route, second_payment_hash, _, second_payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[1], 100_000); + + // First route a payment that we will claim on chain and give the recipient the preimage. + let payment_preimage = route_payment(&nodes[0], &[&nodes[1]], 1_000_000).0; + nodes[1].node.claim_funds(payment_preimage); + nodes[1].node.get_and_clear_pending_msg_events(); + check_added_monitors!(nodes[1], 1); + let remote_txn = get_local_commitment_txn!(nodes[1], channel.2); + assert_eq!(remote_txn.len(), 2); + + // Temp-fail the block connection which will hold the channel-closed event + chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear(); + chanmon_cfgs[0].persister.set_update_ret(Err(ChannelMonitorUpdateErr::TemporaryFailure)); + + // Connect B's commitment transaction, but only to the ChainMonitor/ChannelMonitor. The + // channel is now closed, but the ChannelManager doesn't know that yet. + let new_header = BlockHeader { + version: 2, time: 0, bits: 0, nonce: 0, + prev_blockhash: nodes[0].best_block_info().0, + merkle_root: Default::default() }; + nodes[0].chain_monitor.chain_monitor.transactions_confirmed(&new_header, + &[(0, &remote_txn[0]), (1, &remote_txn[1])], nodes[0].best_block_info().1 + 1); + assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty()); + nodes[0].chain_monitor.chain_monitor.best_block_updated(&new_header, nodes[0].best_block_info().1 + 1); + assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty()); + + // If the ChannelManager tries to update the channel, however, the ChainMonitor will pass + // the update through to the ChannelMonitor which will refuse it (as the channel is closed). + chanmon_cfgs[0].persister.set_update_ret(Ok(())); + unwrap_send_err!(nodes[0].node.send_payment(&route, second_payment_hash, &Some(second_payment_secret)), + true, APIError::ChannelUnavailable { ref err }, + assert!(err.contains("ChannelMonitor storage failure"))); + check_added_monitors!(nodes[0], 2); // After the failure we generate a close-channel monitor update + check_closed_broadcast!(nodes[0], true); + check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() }); + + // However, as the ChainMonitor is still waiting for the original persistence to complete, + // it won't yet release the MonitorEvents. + assert!(nodes[0].chain_monitor.release_pending_monitor_events().is_empty()); + + if block_timeout { + // After three blocks, pending MontiorEvents should be released either way. + let latest_header = BlockHeader { + version: 2, time: 0, bits: 0, nonce: 0, + prev_blockhash: nodes[0].best_block_info().0, + merkle_root: Default::default() }; + nodes[0].chain_monitor.chain_monitor.best_block_updated(&latest_header, nodes[0].best_block_info().1 + LATENCY_GRACE_PERIOD_BLOCKS); + } else { + let persistences = chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clone(); + for (funding_outpoint, update_ids) in persistences { + for update_id in update_ids { + nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(funding_outpoint, update_id).unwrap(); + } + } + } + + expect_payment_sent!(nodes[0], payment_preimage); + } + + #[test] + fn chainsync_pauses_events() { + do_chainsync_pauses_events(false); + do_chainsync_pauses_events(true); + } + + #[test] + fn update_during_chainsync_fails_channel() { + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let nodes = create_network(2, &node_cfgs, &node_chanmgrs); + create_announced_chan_between_nodes(&nodes, 0, 1, InitFeatures::known(), InitFeatures::known()); + + chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear(); + chanmon_cfgs[0].persister.set_update_ret(Err(ChannelMonitorUpdateErr::PermanentFailure)); + + connect_blocks(&nodes[0], 1); + // Before processing events, the ChannelManager will still think the Channel is open and + // there won't be any ChannelMonitorUpdates + assert_eq!(nodes[0].node.list_channels().len(), 1); + check_added_monitors!(nodes[0], 0); + // ... however once we get events once, the channel will close, creating a channel-closed + // ChannelMonitorUpdate. + check_closed_broadcast!(nodes[0], true); + check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() }); + check_added_monitors!(nodes[0], 1); + } }