X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning%2Fsrc%2Fchain%2Fchainmonitor.rs;h=e87d082d9a7bb9061894ce26fb15da0087a7c173;hb=3f416bc24e0e804e0cae1c8c5650b19500122b6d;hp=7dbf308a568c11c4abf45bc07ef1653aff6c4b51;hpb=aa9c601774b11d1ef2958961f7479599625e798b;p=rust-lightning

diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs
index 7dbf308a..e87d082d 100644
--- a/lightning/src/chain/chainmonitor.rs
+++ b/lightning/src/chain/chainmonitor.rs
@@ -47,23 +47,35 @@ use core::ops::Deref;
 use core::sync::atomic::{AtomicUsize, Ordering};
 use bitcoin::secp256k1::PublicKey;
 
-#[derive(Clone, Copy, Hash, PartialEq, Eq)]
-/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents
-/// entirely opaque.
-enum UpdateOrigin {
-	/// An update that was generated by the `ChannelManager` (via our `chain::Watch`
-	/// implementation). This corresponds to an actual [`ChannelMonitorUpdate::update_id`] field
-	/// and [`ChannelMonitor::get_latest_update_id`].
-	OffChain(u64),
-	/// An update that was generated during blockchain processing. The ID here is specific to the
-	/// generating [`ChainMonitor`] and does *not* correspond to any on-disk IDs.
-	ChainSync(u64),
+mod update_origin {
+	#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+	/// A specific update's ID stored in a `MonitorUpdateId`, separated out to make the contents
+	/// entirely opaque.
+	pub(crate) enum UpdateOrigin {
+		/// An update that was generated by the `ChannelManager` (via our [`crate::chain::Watch`]
+		/// implementation). This corresponds to an actual [ChannelMonitorUpdate::update_id] field
+		/// and [ChannelMonitor::get_latest_update_id].
+		///
+		/// [ChannelMonitor::get_latest_update_id]: crate::chain::channelmonitor::ChannelMonitor::get_latest_update_id
+		/// [ChannelMonitorUpdate::update_id]: crate::chain::channelmonitor::ChannelMonitorUpdate::update_id
+		OffChain(u64),
+		/// An update that was generated during blockchain processing. The ID here is specific to the
+		/// generating [ChannelMonitor] and does *not* correspond to any on-disk IDs.
+		///
+		/// [ChannelMonitor]: crate::chain::channelmonitor::ChannelMonitor
+		ChainSync(u64),
+	}
 }
 
+#[cfg(any(feature = "_test_utils", test))]
+pub(crate) use update_origin::UpdateOrigin;
+#[cfg(not(any(feature = "_test_utils", test)))]
+use update_origin::UpdateOrigin;
+
 /// An opaque identifier describing a specific [`Persist`] method call.
-#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
 pub struct MonitorUpdateId {
-	contents: UpdateOrigin,
+	pub(crate) contents: UpdateOrigin,
 }
 
 impl MonitorUpdateId {
@@ -78,26 +90,51 @@ impl MonitorUpdateId {
 /// `Persist` defines behavior for persisting channel monitors: this could mean
 /// writing once to disk, and/or uploading to one or more backup services.
 ///
-/// Each method can return two possible values:
-///  * If persistence (including any relevant `fsync()` calls) happens immediately, the
-///    implementation should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal
-///    channel operation should continue.
+/// Persistence can happen in one of two ways - synchronously completing before the trait method
+/// calls return or asynchronously in the background.
+///
+/// # For those implementing synchronous persistence
+///
+///  * If persistence completes fully (including any relevant `fsync()` calls), the implementation
+///    should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal channel operation
+///    should continue.
+///
+///  * If persistence fails for some reason, implementations should consider returning
+///    [`ChannelMonitorUpdateStatus::InProgress`] and retry all pending persistence operations in
+///    the background with [`ChainMonitor::list_pending_monitor_updates`] and
+///    [`ChainMonitor::get_monitor`].
+///
+///    Once a full [`ChannelMonitor`] has been persisted, all pending updates for that channel can
+///    be marked as complete via [`ChainMonitor::channel_monitor_updated`].
+///
+///    If at some point no further progress can be made towards persisting the pending updates, the
+///    node should simply shut down.
+///
+///  * If the persistence has failed and cannot be retried further (e.g. because of an outage),
+///    [`ChannelMonitorUpdateStatus::UnrecoverableError`] can be used, though this will result in
+///    an immediate panic and future operations in LDK generally failing.
+///
+/// # For those implementing asynchronous persistence
 ///
-///  * If persistence happens asynchronously, implementations can return
-///    [`ChannelMonitorUpdateStatus::InProgress`] while the update continues in the background.
-///    Once the update completes, [`ChainMonitor::channel_monitor_updated`] should be called with
-///    the corresponding [`MonitorUpdateId`].
+///  All calls should generally spawn a background task and immediately return
+///  [`ChannelMonitorUpdateStatus::InProgress`]. Once the update completes,
+///  [`ChainMonitor::channel_monitor_updated`] should be called with the corresponding
+///  [`MonitorUpdateId`].
 ///
-///    Note that unlike the direct [`chain::Watch`] interface,
-///    [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
+///  Note that unlike the direct [`chain::Watch`] interface,
+///  [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
 ///
-///    If persistence fails for some reason, implementations should still return
-///    [`ChannelMonitorUpdateStatus::InProgress`] and attempt to shut down or otherwise resolve the
-///    situation ASAP.
+///  If at some point no further progress can be made towards persisting a pending update, the node
+///  should simply shut down. Until then, the background task should either loop indefinitely, or
+///  persistence should be regularly retried with [`ChainMonitor::list_pending_monitor_updates`]
+///  and [`ChainMonitor::get_monitor`] (note that if a full monitor is persisted all pending
+///  monitor updates may be marked completed).
 ///
-/// Third-party watchtowers may be built as a part of an implementation of this trait, with the
-/// advantage that you can control whether to resume channel operation depending on if an update
-/// has been persisted to a watchtower. For this, you may find the following methods useful:
+/// # Using remote watchtowers
+///
+/// Watchtowers may be updated as a part of an implementation of this trait, utilizing the async
+/// update process described above while the watchtower is being updated. The following methods are
+/// provided for bulding transactions for a watchtower:
 /// [`ChannelMonitor::initial_counterparty_commitment_tx`],
 /// [`ChannelMonitor::counterparty_commitment_txs_from_update`],
 /// [`ChannelMonitor::sign_to_local_justice_tx`], [`TrustedCommitmentTransaction::revokeable_output_index`],
@@ -130,8 +167,8 @@ pub trait Persist<ChannelSigner: WriteableEcdsaChannelSigner> {
 	/// updated monitor itself to disk/backups. See the [`Persist`] trait documentation for more
 	/// details.
 	///
-	/// During blockchain synchronization operations, this may be called with no
-	/// [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted.
+	/// During blockchain synchronization operations, and in some rare cases, this may be called with
+	/// no [`ChannelMonitorUpdate`], in which case the full [`ChannelMonitor`] needs to be persisted.
 	/// Note that after the full [`ChannelMonitor`] is persisted any previous
 	/// [`ChannelMonitorUpdate`]s which were persisted should be discarded - they can no longer be
 	/// applied to the persisted [`ChannelMonitor`] as they were already applied.
@@ -279,11 +316,19 @@ where C::Target: chain::Filter,
 	where
 		FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
 	{
+		let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
 		let funding_outpoints: HashSet<OutPoint> = HashSet::from_iter(self.monitors.read().unwrap().keys().cloned());
 		for funding_outpoint in funding_outpoints.iter() {
 			let monitor_lock = self.monitors.read().unwrap();
 			if let Some(monitor_state) = monitor_lock.get(funding_outpoint) {
-				self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state);
+				if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state).is_err() {
+					// Take the monitors lock for writing so that we poison it and any future
+					// operations going forward fail immediately.
+					core::mem::drop(monitor_lock);
+					let _poison = self.monitors.write().unwrap();
+					log_error!(self.logger, "{}", err_str);
+					panic!("{}", err_str);
+				}
 			}
 		}
 
@@ -291,7 +336,10 @@ where C::Target: chain::Filter,
 		let monitor_states = self.monitors.write().unwrap();
 		for (funding_outpoint, monitor_state) in monitor_states.iter() {
 			if !funding_outpoints.contains(funding_outpoint) {
-				self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state);
+				if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state).is_err() {
+					log_error!(self.logger, "{}", err_str);
+					panic!("{}", err_str);
+				}
 			}
 		}
 
@@ -306,7 +354,10 @@ where C::Target: chain::Filter,
 		}
 	}
 
-	fn update_monitor_with_chain_data<FN>(&self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData, process: FN, funding_outpoint: &OutPoint, monitor_state: &MonitorHolder<ChannelSigner>) where FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs> {
+	fn update_monitor_with_chain_data<FN>(
+		&self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData,
+		process: FN, funding_outpoint: &OutPoint, monitor_state: &MonitorHolder<ChannelSigner>
+	) -> Result<(), ()> where FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs> {
 		let monitor = &monitor_state.monitor;
 		let mut txn_outputs;
 		{
@@ -331,7 +382,10 @@ where C::Target: chain::Filter,
 				ChannelMonitorUpdateStatus::InProgress => {
 					log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
 					pending_monitor_updates.push(update_id);
-				}
+				},
+				ChannelMonitorUpdateStatus::UnrecoverableError => {
+					return Err(());
+				},
 			}
 		}
 
@@ -351,6 +405,7 @@ where C::Target: chain::Filter,
 				}
 			}
 		}
+		Ok(())
 	}
 
 	/// Creates a new `ChainMonitor` used to watch on-chain activity pertaining to channels.
@@ -379,7 +434,8 @@ where C::Target: chain::Filter,
 	/// claims which are awaiting confirmation.
 	///
 	/// Includes the balances from each [`ChannelMonitor`] *except* those included in
-	/// `ignored_channels`.
+	/// `ignored_channels`, allowing you to filter out balances from channels which are still open
+	/// (and whose balance should likely be pulled from the [`ChannelDetails`]).
 	///
 	/// See [`ChannelMonitor::get_claimable_balances`] for more details on the exact criteria for
 	/// inclusion in the return value.
@@ -674,7 +730,12 @@ where C::Target: chain::Filter,
 			},
 			ChannelMonitorUpdateStatus::Completed => {
 				log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} completed", log_funding_info!(monitor));
-			}
+			},
+			ChannelMonitorUpdateStatus::UnrecoverableError => {
+				let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
+				log_error!(self.logger, "{}", err_str);
+				panic!("{}", err_str);
+			},
 		}
 		if let Some(ref chain_source) = self.chain_source {
 			monitor.load_outputs_to_watch(chain_source);
@@ -690,7 +751,7 @@ where C::Target: chain::Filter,
 	fn update_channel(&self, funding_txo: OutPoint, update: &ChannelMonitorUpdate) -> ChannelMonitorUpdateStatus {
 		// Update the monitor that watches the channel referred to by the given outpoint.
 		let monitors = self.monitors.read().unwrap();
-		match monitors.get(&funding_txo) {
+		let ret = match monitors.get(&funding_txo) {
 			None => {
 				log_error!(self.logger, "Failed to update channel monitor: no such monitor registered");
 
@@ -705,15 +766,21 @@ where C::Target: chain::Filter,
 			Some(monitor_state) => {
 				let monitor = &monitor_state.monitor;
 				log_trace!(self.logger, "Updating ChannelMonitor for channel {}", log_funding_info!(monitor));
-				let update_res = monitor.update_monitor(update, &self.broadcaster, &*self.fee_estimator, &self.logger);
-				if update_res.is_err() {
-					log_error!(self.logger, "Failed to update ChannelMonitor for channel {}.", log_funding_info!(monitor));
-				}
-				// Even if updating the monitor returns an error, the monitor's state will
-				// still be changed. So, persist the updated monitor despite the error.
+				let update_res = monitor.update_monitor(update, &self.broadcaster, &self.fee_estimator, &self.logger);
+
 				let update_id = MonitorUpdateId::from_monitor_update(update);
 				let mut pending_monitor_updates = monitor_state.pending_monitor_updates.lock().unwrap();
-				let persist_res = self.persister.update_persisted_channel(funding_txo, Some(update), monitor, update_id);
+				let persist_res = if update_res.is_err() {
+					// Even if updating the monitor returns an error, the monitor's state will
+					// still be changed. Therefore, we should persist the updated monitor despite the error.
+					// We don't want to persist a `monitor_update` which results in a failure to apply later
+					// while reading `channel_monitor` with updates from storage. Instead, we should persist
+					// the entire `channel_monitor` here.
+					log_warn!(self.logger, "Failed to update ChannelMonitor for channel {}. Going ahead and persisting the entire ChannelMonitor", log_funding_info!(monitor));
+					self.persister.update_persisted_channel(funding_txo, None, monitor, update_id)
+				} else {
+					self.persister.update_persisted_channel(funding_txo, Some(update), monitor, update_id)
+				};
 				match persist_res {
 					ChannelMonitorUpdateStatus::InProgress => {
 						pending_monitor_updates.push(update_id);
@@ -722,6 +789,7 @@ where C::Target: chain::Filter,
 					ChannelMonitorUpdateStatus::Completed => {
 						log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} completed", log_funding_info!(monitor));
 					},
+					ChannelMonitorUpdateStatus::UnrecoverableError => { /* we'll panic in a moment */ },
 				}
 				if update_res.is_err() {
 					ChannelMonitorUpdateStatus::InProgress
@@ -729,7 +797,17 @@ where C::Target: chain::Filter,
 					persist_res
 				}
 			}
+		};
+		if let ChannelMonitorUpdateStatus::UnrecoverableError = ret {
+			// Take the monitors lock for writing so that we poison it and any future
+			// operations going forward fail immediately.
+			core::mem::drop(monitors);
+			let _poison = self.monitors.write().unwrap();
+			let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
+			log_error!(self.logger, "{}", err_str);
+			panic!("{}", err_str);
 		}
+		ret
 	}
 
 	fn release_pending_monitor_events(&self) -> Vec<(OutPoint, Vec<MonitorEvent>, Option<PublicKey>)> {
@@ -973,4 +1051,26 @@ mod tests {
 		do_chainsync_pauses_events(false);
 		do_chainsync_pauses_events(true);
 	}
+
+	#[test]
+	#[cfg(feature = "std")]
+	fn update_during_chainsync_poisons_channel() {
+		let chanmon_cfgs = create_chanmon_cfgs(2);
+		let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+		let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+		let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+		create_announced_chan_between_nodes(&nodes, 0, 1);
+
+		chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
+		chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::UnrecoverableError);
+
+		assert!(std::panic::catch_unwind(|| {
+			// Returning an UnrecoverableError should always panic immediately
+			connect_blocks(&nodes[0], 1);
+		}).is_err());
+		assert!(std::panic::catch_unwind(|| {
+			// ...and also poison our locks causing later use to panic as well
+			core::mem::drop(nodes);
+		}).is_err());
+	}
 }