X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning%2Fsrc%2Fchain%2Fchainmonitor.rs;h=e87d082d9a7bb9061894ce26fb15da0087a7c173;hb=3f416bc24e0e804e0cae1c8c5650b19500122b6d;hp=7dbf308a568c11c4abf45bc07ef1653aff6c4b51;hpb=aa9c601774b11d1ef2958961f7479599625e798b;p=rust-lightning diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs index 7dbf308a..e87d082d 100644 --- a/lightning/src/chain/chainmonitor.rs +++ b/lightning/src/chain/chainmonitor.rs @@ -47,23 +47,35 @@ use core::ops::Deref; use core::sync::atomic::{AtomicUsize, Ordering}; use bitcoin::secp256k1::PublicKey; -#[derive(Clone, Copy, Hash, PartialEq, Eq)] -/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents -/// entirely opaque. -enum UpdateOrigin { - /// An update that was generated by the `ChannelManager` (via our `chain::Watch` - /// implementation). This corresponds to an actual [`ChannelMonitorUpdate::update_id`] field - /// and [`ChannelMonitor::get_latest_update_id`]. - OffChain(u64), - /// An update that was generated during blockchain processing. The ID here is specific to the - /// generating [`ChainMonitor`] and does *not* correspond to any on-disk IDs. - ChainSync(u64), +mod update_origin { + #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] + /// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents + /// entirely opaque. + pub(crate) enum UpdateOrigin { + /// An update that was generated by the `ChannelManager` (via our [`crate::chain::Watch`] + /// implementation). This corresponds to an actual [ChannelMonitorUpdate::update_id] field + /// and [ChannelMonitor::get_latest_update_id]. + /// + /// [ChannelMonitor::get_latest_update_id]: crate::chain::channelmonitor::ChannelMonitor::get_latest_update_id + /// [ChannelMonitorUpdate::update_id]: crate::chain::channelmonitor::ChannelMonitorUpdate::update_id + OffChain(u64), + /// An update that was generated during blockchain processing. The ID here is specific to the + /// generating [ChannelMonitor] and does *not* correspond to any on-disk IDs. + /// + /// [ChannelMonitor]: crate::chain::channelmonitor::ChannelMonitor + ChainSync(u64), + } } +#[cfg(any(feature = "_test_utils", test))] +pub(crate) use update_origin::UpdateOrigin; +#[cfg(not(any(feature = "_test_utils", test)))] +use update_origin::UpdateOrigin; + /// An opaque identifier describing a specific [`Persist`] method call. -#[derive(Clone, Copy, Hash, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] pub struct MonitorUpdateId { - contents: UpdateOrigin, + pub(crate) contents: UpdateOrigin, } impl MonitorUpdateId { @@ -78,26 +90,51 @@ impl MonitorUpdateId { /// `Persist` defines behavior for persisting channel monitors: this could mean /// writing once to disk, and/or uploading to one or more backup services. /// -/// Each method can return two possible values: -/// * If persistence (including any relevant `fsync()` calls) happens immediately, the -/// implementation should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal -/// channel operation should continue. +/// Persistence can happen in one of two ways - synchronously completing before the trait method +/// calls return or asynchronously in the background. +/// +/// # For those implementing synchronous persistence +/// +/// * If persistence completes fully (including any relevant `fsync()` calls), the implementation +/// should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal channel operation +/// should continue. +/// +/// * If persistence fails for some reason, implementations should consider returning +/// [`ChannelMonitorUpdateStatus::InProgress`] and retry all pending persistence operations in +/// the background with [`ChainMonitor::list_pending_monitor_updates`] and +/// [`ChainMonitor::get_monitor`]. +/// +/// Once a full [`ChannelMonitor`] has been persisted, all pending updates for that channel can +/// be marked as complete via [`ChainMonitor::channel_monitor_updated`]. +/// +/// If at some point no further progress can be made towards persisting the pending updates, the +/// node should simply shut down. +/// +/// * If the persistence has failed and cannot be retried further (e.g. because of an outage), +/// [`ChannelMonitorUpdateStatus::UnrecoverableError`] can be used, though this will result in +/// an immediate panic and future operations in LDK generally failing. +/// +/// # For those implementing asynchronous persistence /// -/// * If persistence happens asynchronously, implementations can return -/// [`ChannelMonitorUpdateStatus::InProgress`] while the update continues in the background. -/// Once the update completes, [`ChainMonitor::channel_monitor_updated`] should be called with -/// the corresponding [`MonitorUpdateId`]. +/// All calls should generally spawn a background task and immediately return +/// [`ChannelMonitorUpdateStatus::InProgress`]. Once the update completes, +/// [`ChainMonitor::channel_monitor_updated`] should be called with the corresponding +/// [`MonitorUpdateId`]. /// -/// Note that unlike the direct [`chain::Watch`] interface, -/// [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs. +/// Note that unlike the direct [`chain::Watch`] interface, +/// [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs. /// -/// If persistence fails for some reason, implementations should still return -/// [`ChannelMonitorUpdateStatus::InProgress`] and attempt to shut down or otherwise resolve the -/// situation ASAP. +/// If at some point no further progress can be made towards persisting a pending update, the node +/// should simply shut down. Until then, the background task should either loop indefinitely, or +/// persistence should be regularly retried with [`ChainMonitor::list_pending_monitor_updates`] +/// and [`ChainMonitor::get_monitor`] (note that if a full monitor is persisted all pending +/// monitor updates may be marked completed). /// -/// Third-party watchtowers may be built as a part of an implementation of this trait, with the -/// advantage that you can control whether to resume channel operation depending on if an update -/// has been persisted to a watchtower. For this, you may find the following methods useful: +/// # Using remote watchtowers +/// +/// Watchtowers may be updated as a part of an implementation of this trait, utilizing the async +/// update process described above while the watchtower is being updated. The following methods are +/// provided for bulding transactions for a watchtower: /// [`ChannelMonitor::initial_counterparty_commitment_tx`], /// [`ChannelMonitor::counterparty_commitment_txs_from_update`], /// [`ChannelMonitor::sign_to_local_justice_tx`], [`TrustedCommitmentTransaction::revokeable_output_index`], @@ -130,8 +167,8 @@ pub trait Persist { /// updated monitor itself to disk/backups. See the [`Persist`] trait documentation for more /// details. /// - /// During blockchain synchronization operations, this may be called with no - /// [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted. + /// During blockchain synchronization operations, and in some rare cases, this may be called with + /// no [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted. /// Note that after the full [`ChannelMonitor`] is persisted any previous /// [`ChannelMonitorUpdate`]s which were persisted should be discarded - they can no longer be /// applied to the persisted [`ChannelMonitor`] as they were already applied. @@ -279,11 +316,19 @@ where C::Target: chain::Filter, where FN: Fn(&ChannelMonitor, &TransactionData) -> Vec { + let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down."; let funding_outpoints: HashSet = HashSet::from_iter(self.monitors.read().unwrap().keys().cloned()); for funding_outpoint in funding_outpoints.iter() { let monitor_lock = self.monitors.read().unwrap(); if let Some(monitor_state) = monitor_lock.get(funding_outpoint) { - self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state); + if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state).is_err() { + // Take the monitors lock for writing so that we poison it and any future + // operations going forward fail immediately. + core::mem::drop(monitor_lock); + let _poison = self.monitors.write().unwrap(); + log_error!(self.logger, "{}", err_str); + panic!("{}", err_str); + } } } @@ -291,7 +336,10 @@ where C::Target: chain::Filter, let monitor_states = self.monitors.write().unwrap(); for (funding_outpoint, monitor_state) in monitor_states.iter() { if !funding_outpoints.contains(funding_outpoint) { - self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state); + if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state).is_err() { + log_error!(self.logger, "{}", err_str); + panic!("{}", err_str); + } } } @@ -306,7 +354,10 @@ where C::Target: chain::Filter, } } - fn update_monitor_with_chain_data(&self, header: &BlockHeader, best_height: Option, txdata: &TransactionData, process: FN, funding_outpoint: &OutPoint, monitor_state: &MonitorHolder) where FN: Fn(&ChannelMonitor, &TransactionData) -> Vec { + fn update_monitor_with_chain_data( + &self, header: &BlockHeader, best_height: Option, txdata: &TransactionData, + process: FN, funding_outpoint: &OutPoint, monitor_state: &MonitorHolder + ) -> Result<(), ()> where FN: Fn(&ChannelMonitor, &TransactionData) -> Vec { let monitor = &monitor_state.monitor; let mut txn_outputs; { @@ -331,7 +382,10 @@ where C::Target: chain::Filter, ChannelMonitorUpdateStatus::InProgress => { log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor)); pending_monitor_updates.push(update_id); - } + }, + ChannelMonitorUpdateStatus::UnrecoverableError => { + return Err(()); + }, } } @@ -351,6 +405,7 @@ where C::Target: chain::Filter, } } } + Ok(()) } /// Creates a new `ChainMonitor` used to watch on-chain activity pertaining to channels. @@ -379,7 +434,8 @@ where C::Target: chain::Filter, /// claims which are awaiting confirmation. /// /// Includes the balances from each [`ChannelMonitor`] *except* those included in - /// `ignored_channels`. + /// `ignored_channels`, allowing you to filter out balances from channels which are still open + /// (and whose balance should likely be pulled from the [`ChannelDetails`]). /// /// See [`ChannelMonitor::get_claimable_balances`] for more details on the exact criteria for /// inclusion in the return value. @@ -674,7 +730,12 @@ where C::Target: chain::Filter, }, ChannelMonitorUpdateStatus::Completed => { log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} completed", log_funding_info!(monitor)); - } + }, + ChannelMonitorUpdateStatus::UnrecoverableError => { + let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down."; + log_error!(self.logger, "{}", err_str); + panic!("{}", err_str); + }, } if let Some(ref chain_source) = self.chain_source { monitor.load_outputs_to_watch(chain_source); @@ -690,7 +751,7 @@ where C::Target: chain::Filter, fn update_channel(&self, funding_txo: OutPoint, update: &ChannelMonitorUpdate) -> ChannelMonitorUpdateStatus { // Update the monitor that watches the channel referred to by the given outpoint. let monitors = self.monitors.read().unwrap(); - match monitors.get(&funding_txo) { + let ret = match monitors.get(&funding_txo) { None => { log_error!(self.logger, "Failed to update channel monitor: no such monitor registered"); @@ -705,15 +766,21 @@ where C::Target: chain::Filter, Some(monitor_state) => { let monitor = &monitor_state.monitor; log_trace!(self.logger, "Updating ChannelMonitor for channel {}", log_funding_info!(monitor)); - let update_res = monitor.update_monitor(update, &self.broadcaster, &*self.fee_estimator, &self.logger); - if update_res.is_err() { - log_error!(self.logger, "Failed to update ChannelMonitor for channel {}.", log_funding_info!(monitor)); - } - // Even if updating the monitor returns an error, the monitor's state will - // still be changed. So, persist the updated monitor despite the error. + let update_res = monitor.update_monitor(update, &self.broadcaster, &self.fee_estimator, &self.logger); + let update_id = MonitorUpdateId::from_monitor_update(update); let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap(); - let persist_res = self.persister.update_persisted_channel(funding_txo, Some(update), monitor, update_id); + let persist_res = if update_res.is_err() { + // Even if updating the monitor returns an error, the monitor's state will + // still be changed. Therefore, we should persist the updated monitor despite the error. + // We don't want to persist a `monitor_update` which results in a failure to apply later + // while reading `channel_monitor` with updates from storage. Instead, we should persist + // the entire `channel_monitor` here. + log_warn!(self.logger, "Failed to update ChannelMonitor for channel {}. Going ahead and persisting the entire ChannelMonitor", log_funding_info!(monitor)); + self.persister.update_persisted_channel(funding_txo, None, monitor, update_id) + } else { + self.persister.update_persisted_channel(funding_txo, Some(update), monitor, update_id) + }; match persist_res { ChannelMonitorUpdateStatus::InProgress => { pending_monitor_updates.push(update_id); @@ -722,6 +789,7 @@ where C::Target: chain::Filter, ChannelMonitorUpdateStatus::Completed => { log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} completed", log_funding_info!(monitor)); }, + ChannelMonitorUpdateStatus::UnrecoverableError => { /* we'll panic in a moment */ }, } if update_res.is_err() { ChannelMonitorUpdateStatus::InProgress @@ -729,7 +797,17 @@ where C::Target: chain::Filter, persist_res } } + }; + if let ChannelMonitorUpdateStatus::UnrecoverableError = ret { + // Take the monitors lock for writing so that we poison it and any future + // operations going forward fail immediately. + core::mem::drop(monitors); + let _poison = self.monitors.write().unwrap(); + let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down."; + log_error!(self.logger, "{}", err_str); + panic!("{}", err_str); } + ret } fn release_pending_monitor_events(&self) -> Vec<(OutPoint, Vec, Option)> { @@ -973,4 +1051,26 @@ mod tests { do_chainsync_pauses_events(false); do_chainsync_pauses_events(true); } + + #[test] + #[cfg(feature = "std")] + fn update_during_chainsync_poisons_channel() { + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let nodes = create_network(2, &node_cfgs, &node_chanmgrs); + create_announced_chan_between_nodes(&nodes, 0, 1); + + chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear(); + chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::UnrecoverableError); + + assert!(std::panic::catch_unwind(|| { + // Returning an UnrecoverableError should always panic immediately + connect_blocks(&nodes[0], 1); + }).is_err()); + assert!(std::panic::catch_unwind(|| { + // ...and also poison our locks causing later use to panic as well + core::mem::drop(nodes); + }).is_err()); + } }