Merge pull request #2562 from TheBlueMatt/2023-08-no-perm-fail

author Matt Corallo <649246+TheBlueMatt@users.noreply.github.com>

Thu, 21 Sep 2023 20:22:16 +0000 (20:22 +0000)

committer GitHub <noreply@github.com>

Thu, 21 Sep 2023 20:22:16 +0000 (20:22 +0000)
author Matt Corallo <649246+TheBlueMatt@users.noreply.github.com>
Thu, 21 Sep 2023 20:22:16 +0000 (20:22 +0000)
committer GitHub <noreply@github.com>
Thu, 21 Sep 2023 20:22:16 +0000 (20:22 +0000)
diff --git a/fuzz/src/chanmon_consistency.rs b/fuzz/src/chanmon_consistency.rs

index b6d41fb99140cd2cb012ffbaf08c190efdd443fd..6a2007165d71773cbb3d39ea5ea0dd2187aca6bb 100644 (file)
--- a/fuzz/src/chanmon_consistency.rs
+++ b/fuzz/src/chanmon_consistency.rs
@@ -138,7 +138,7 @@ impl TestChainMonitor {
         }
  }
  impl chain::Watch<TestChannelSigner> for TestChainMonitor {
-       fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> chain::ChannelMonitorUpdateStatus {
+       fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> Result<chain::ChannelMonitorUpdateStatus, ()> {
                 let mut ser = VecWriter(Vec::new());
                 monitor.write(&mut ser).unwrap();
                 if let Some(_) = self.latest_monitors.lock().unwrap().insert(funding_txo, (monitor.get_latest_update_id(), ser.0)) {
@@ -500,7 +500,7 @@ pub fn do_test<Out: Output>(data: &[u8], underlying_out: Out) {
                         let res = (<(BlockHash, ChanMan)>::read(&mut Cursor::new(&$ser.0), read_args).expect("Failed to read manager").1, chain_monitor.clone());
                         for (funding_txo, mon) in monitors.drain() {
                                 assert_eq!(chain_monitor.chain_monitor.watch_channel(funding_txo, mon),
-                                       ChannelMonitorUpdateStatus::Completed);
+                                       Ok(ChannelMonitorUpdateStatus::Completed));
                         }
                         res
                 } }
diff --git a/lightning-persister/src/fs_store.rs b/lightning-persister/src/fs_store.rs

index 81f9709c45467d4e6725ef83f11ba10a33734b68..42b28018fe9202ce4435a41dc87907433b156e7d 100644 (file)
--- a/lightning-persister/src/fs_store.rs
+++ b/lightning-persister/src/fs_store.rs
@@ -436,7 +436,7 @@ mod tests {
         }
  
         // Test that if the store's path to channel data is read-only, writing a
-       // monitor to it results in the store returning a PermanentFailure.
+       // monitor to it results in the store returning an InProgress.
         // Windows ignores the read-only flag for folders, so this test is Unix-only.
         #[cfg(not(target_os = "windows"))]
         #[test]
@@ -470,7 +470,7 @@ mod tests {
                         index: 0
                 };
                 match store.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
-                       ChannelMonitorUpdateStatus::PermanentFailure => {},
+                       ChannelMonitorUpdateStatus::UnrecoverableError => {},
                         _ => panic!("unexpected result from persisting new channel")
                 }
  
@@ -507,7 +507,7 @@ mod tests {
                         index: 0
                 };
                 match store.persist_new_channel(test_txo, &added_monitors[0].1, update_id.2) {
-                       ChannelMonitorUpdateStatus::PermanentFailure => {},
+                       ChannelMonitorUpdateStatus::UnrecoverableError => {},
                         _ => panic!("unexpected result from persisting new channel")
                 }
  
diff --git a/lightning/src/chain/chainmonitor.rs b/lightning/src/chain/chainmonitor.rs

index b3b62a2e8706f96a1ddf82146b7260e63bc91796..b6909cb3e416890895367532455edb127fdc038c 100644 (file)
--- a/lightning/src/chain/chainmonitor.rs
+++ b/lightning/src/chain/chainmonitor.rs
@@ -44,7 +44,7 @@ use crate::prelude::*;
  use crate::sync::{RwLock, RwLockReadGuard, Mutex, MutexGuard};
  use core::iter::FromIterator;
  use core::ops::Deref;
-use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use core::sync::atomic::{AtomicUsize, Ordering};
  use bitcoin::secp256k1::PublicKey;
  
  #[derive(Clone, Copy, Hash, PartialEq, Eq)]
@@ -78,27 +78,48 @@ impl MonitorUpdateId {
  /// `Persist` defines behavior for persisting channel monitors: this could mean
  /// writing once to disk, and/or uploading to one or more backup services.
  ///
-/// Each method can return three possible values:
-///  * If persistence (including any relevant `fsync()` calls) happens immediately, the
-///    implementation should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal
-///    channel operation should continue.
-///  * If persistence happens asynchronously, implementations should first ensure the
-///    [`ChannelMonitor`] or [`ChannelMonitorUpdate`] are written durably to disk, and then return
-///    [`ChannelMonitorUpdateStatus::InProgress`] while the update continues in the background.
-///    Once the update completes, [`ChainMonitor::channel_monitor_updated`] should be called with
-///    the corresponding [`MonitorUpdateId`].
+/// Persistence can happen in one of two ways - synchronously completing before the trait method
+/// calls return or asynchronously in the background.
  ///
-///    Note that unlike the direct [`chain::Watch`] interface,
-///    [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
+/// # For those implementing synchronous persistence
  ///
-///  * If persistence fails for some reason, implementations should return
-///    [`ChannelMonitorUpdateStatus::PermanentFailure`], in which case the channel will likely be
-///    closed without broadcasting the latest state. See
-///    [`ChannelMonitorUpdateStatus::PermanentFailure`] for more details.
+///  * If persistence completes fully (including any relevant `fsync()` calls), the implementation
+///    should return [`ChannelMonitorUpdateStatus::Completed`], indicating normal channel operation
+///    should continue.
  ///
-/// Third-party watchtowers may be built as a part of an implementation of this trait, with the
-/// advantage that you can control whether to resume channel operation depending on if an update
-/// has been persisted to a watchtower. For this, you may find the following methods useful:
+///  * If persistence fails for some reason, implementations should consider returning
+///    [`ChannelMonitorUpdateStatus::InProgress`] and retry all pending persistence operations in
+///    the background with [`ChainMonitor::list_pending_monitor_updates`] and
+///    [`ChainMonitor::get_monitor`].
+///
+///    Once a full [`ChannelMonitor`] has been persisted, all pending updates for that channel can
+///    be marked as complete via [`ChainMonitor::channel_monitor_updated`].
+///
+///    If at some point no further progress can be made towards persisting the pending updates, the
+///    node should simply shut down.
+///
+///  * If the persistence has failed and cannot be retried further (e.g. because of some timeout),
+///    [`ChannelMonitorUpdateStatus::UnrecoverableError`] can be used, though this will result in
+///    an immediate panic and future operations in LDK generally failing.
+///
+/// # For those implementing asynchronous persistence
+///
+///  All calls should generally spawn a background task and immediately return
+///  [`ChannelMonitorUpdateStatus::InProgress`]. Once the update completes,
+///  [`ChainMonitor::channel_monitor_updated`] should be called with the corresponding
+///  [`MonitorUpdateId`].
+///
+///  Note that unlike the direct [`chain::Watch`] interface,
+///  [`ChainMonitor::channel_monitor_updated`] must be called once for *each* update which occurs.
+///
+///  If at some point no further progress can be made towards persisting a pending update, the node
+///  should simply shut down.
+///
+/// # Using remote watchtowers
+///
+/// Watchtowers may be updated as a part of an implementation of this trait, utilizing the async
+/// update process described above while the watchtower is being updated. The following methods are
+/// provided for bulding transactions for a watchtower:
  /// [`ChannelMonitor::initial_counterparty_commitment_tx`],
  /// [`ChannelMonitor::counterparty_commitment_txs_from_update`],
  /// [`ChannelMonitor::sign_to_local_justice_tx`], [`TrustedCommitmentTransaction::revokeable_output_index`],
@@ -180,12 +201,6 @@ struct MonitorHolder<ChannelSigner: WriteableEcdsaChannelSigner> {
         /// the ChannelManager re-adding the same payment entry, before the same block is replayed,
         /// resulting in a duplicate PaymentSent event.
         pending_monitor_updates: Mutex<Vec<MonitorUpdateId>>,
-       /// When the user returns a PermanentFailure error from an update_persisted_channel call during
-       /// block processing, we inform the ChannelManager that the channel should be closed
-       /// asynchronously. In order to ensure no further changes happen before the ChannelManager has
-       /// processed the closure event, we set this to true and return PermanentFailure for any other
-       /// chain::Watch events.
-       channel_perm_failed: AtomicBool,
         /// The last block height at which no [`UpdateOrigin::ChainSync`] monitor updates were present
         /// in `pending_monitor_updates`.
         /// If it's been more than [`LATENCY_GRACE_PERIOD_BLOCKS`] since we started waiting on a chain
@@ -286,11 +301,20 @@ where C::Target: chain::Filter,
         where
                 FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs>
         {
+               let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
                 let funding_outpoints: HashSet<OutPoint> = HashSet::from_iter(self.monitors.read().unwrap().keys().cloned());
                 for funding_outpoint in funding_outpoints.iter() {
                         let monitor_lock = self.monitors.read().unwrap();
                         if let Some(monitor_state) = monitor_lock.get(funding_outpoint) {
-                               self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state);
+                               if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state).is_err() {
+                                       // Take the monitors lock for writing so that we poison it and any future
+                                       // operations going forward fail immediately.
+                                       core::mem::drop(monitor_state);
+                                       core::mem::drop(monitor_lock);
+                                       let _poison = self.monitors.write().unwrap();
+                                       log_error!(self.logger, "{}", err_str);
+                                       panic!("{}", err_str);
+                               }
                         }
                 }
  
@@ -298,7 +322,10 @@ where C::Target: chain::Filter,
                 let monitor_states = self.monitors.write().unwrap();
                 for (funding_outpoint, monitor_state) in monitor_states.iter() {
                         if !funding_outpoints.contains(funding_outpoint) {
-                               self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state);
+                               if self.update_monitor_with_chain_data(header, best_height, txdata, &process, funding_outpoint, &monitor_state).is_err() {
+                                       log_error!(self.logger, "{}", err_str);
+                                       panic!("{}", err_str);
+                               }
                         }
                 }
  
@@ -313,7 +340,10 @@ where C::Target: chain::Filter,
                 }
         }
  
-       fn update_monitor_with_chain_data<FN>(&self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData, process: FN, funding_outpoint: &OutPoint, monitor_state: &MonitorHolder<ChannelSigner>) where FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs> {
+       fn update_monitor_with_chain_data<FN>(
+               &self, header: &BlockHeader, best_height: Option<u32>, txdata: &TransactionData,
+               process: FN, funding_outpoint: &OutPoint, monitor_state: &MonitorHolder<ChannelSigner>
+       ) -> Result<(), ()> where FN: Fn(&ChannelMonitor<ChannelSigner>, &TransactionData) -> Vec<TransactionOutputs> {
                 let monitor = &monitor_state.monitor;
                 let mut txn_outputs;
                 {
@@ -335,15 +365,13 @@ where C::Target: chain::Filter,
                         match self.persister.update_persisted_channel(*funding_outpoint, None, monitor, update_id) {
                                 ChannelMonitorUpdateStatus::Completed =>
                                         log_trace!(self.logger, "Finished syncing Channel Monitor for channel {}", log_funding_info!(monitor)),
-                               ChannelMonitorUpdateStatus::PermanentFailure => {
-                                       monitor_state.channel_perm_failed.store(true, Ordering::Release);
-                                       self.pending_monitor_events.lock().unwrap().push((*funding_outpoint, vec![MonitorEvent::UpdateFailed(*funding_outpoint)], monitor.get_counterparty_node_id()));
-                                       self.event_notifier.notify();
-                               }
                                 ChannelMonitorUpdateStatus::InProgress => {
                                         log_debug!(self.logger, "Channel Monitor sync for channel {} in progress, holding events until completion!", log_funding_info!(monitor));
                                         pending_monitor_updates.push(update_id);
-                               }
+                               },
+                               ChannelMonitorUpdateStatus::UnrecoverableError => {
+                                       return Err(());
+                               },
                         }
                 }
  
@@ -363,6 +391,7 @@ where C::Target: chain::Filter,
                                 }
                         }
                 }
+               Ok(())
         }
  
         /// Creates a new `ChainMonitor` used to watch on-chain activity pertaining to channels.
@@ -491,9 +520,8 @@ where C::Target: chain::Filter,
                                 // `MonitorEvent`s from the monitor back to the `ChannelManager` until they
                                 // complete.
                                 let monitor_is_pending_updates = monitor_data.has_pending_offchain_updates(&pending_monitor_updates);
-                               if monitor_is_pending_updates || monitor_data.channel_perm_failed.load(Ordering::Acquire) {
-                                       // If there are still monitor updates pending (or an old monitor update
-                                       // finished after a later one perm-failed), we cannot yet construct an
+                               if monitor_is_pending_updates {
+                                       // If there are still monitor updates pending, we cannot yet construct a
                                         // Completed event.
                                         return Ok(());
                                 }
@@ -667,18 +695,12 @@ where C::Target: chain::Filter,
             L::Target: Logger,
             P::Target: Persist<ChannelSigner>,
  {
-       /// Adds the monitor that watches the channel referred to by the given outpoint.
-       ///
-       /// Calls back to [`chain::Filter`] with the funding transaction and outputs to watch.
-       ///
-       /// Note that we persist the given `ChannelMonitor` while holding the `ChainMonitor`
-       /// monitors lock.
-       fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> ChannelMonitorUpdateStatus {
+       fn watch_channel(&self, funding_outpoint: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> Result<ChannelMonitorUpdateStatus, ()> {
                 let mut monitors = self.monitors.write().unwrap();
                 let entry = match monitors.entry(funding_outpoint) {
                         hash_map::Entry::Occupied(_) => {
                                 log_error!(self.logger, "Failed to add new channel data: channel monitor for given outpoint is already present");
-                               return ChannelMonitorUpdateStatus::PermanentFailure
+                               return Err(());
                         },
                         hash_map::Entry::Vacant(e) => e,
                 };
@@ -691,13 +713,14 @@ where C::Target: chain::Filter,
                                 log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} in progress", log_funding_info!(monitor));
                                 pending_monitor_updates.push(update_id);
                         },
-                       ChannelMonitorUpdateStatus::PermanentFailure => {
-                               log_error!(self.logger, "Persistence of new ChannelMonitor for channel {} failed", log_funding_info!(monitor));
-                               return persist_res;
-                       },
                         ChannelMonitorUpdateStatus::Completed => {
                                 log_info!(self.logger, "Persistence of new ChannelMonitor for channel {} completed", log_funding_info!(monitor));
-                       }
+                       },
+                       ChannelMonitorUpdateStatus::UnrecoverableError => {
+                               let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
+                               log_error!(self.logger, "{}", err_str);
+                               panic!("{}", err_str);
+                       },
                 }
                 if let Some(ref chain_source) = self.chain_source {
                         monitor.load_outputs_to_watch(chain_source);
@@ -705,28 +728,25 @@ where C::Target: chain::Filter,
                 entry.insert(MonitorHolder {
                         monitor,
                         pending_monitor_updates: Mutex::new(pending_monitor_updates),
-                       channel_perm_failed: AtomicBool::new(false),
                         last_chain_persist_height: AtomicUsize::new(self.highest_chain_height.load(Ordering::Acquire)),
                 });
-               persist_res
+               Ok(persist_res)
         }
  
-       /// Note that we persist the given `ChannelMonitor` update while holding the
-       /// `ChainMonitor` monitors lock.
         fn update_channel(&self, funding_txo: OutPoint, update: &ChannelMonitorUpdate) -> ChannelMonitorUpdateStatus {
                 // Update the monitor that watches the channel referred to by the given outpoint.
                 let monitors = self.monitors.read().unwrap();
-               match monitors.get(&funding_txo) {
+               let ret = match monitors.get(&funding_txo) {
                         None => {
                                 log_error!(self.logger, "Failed to update channel monitor: no such monitor registered");
  
                                 // We should never ever trigger this from within ChannelManager. Technically a
                                 // user could use this object with some proxying in between which makes this
                                 // possible, but in tests and fuzzing, this should be a panic.
-                               #[cfg(any(test, fuzzing))]
+                               #[cfg(debug_assertions)]
                                 panic!("ChannelManager generated a channel update for a channel that was not yet registered!");
-                               #[cfg(not(any(test, fuzzing)))]
-                               ChannelMonitorUpdateStatus::PermanentFailure
+                               #[cfg(not(debug_assertions))]
+                               ChannelMonitorUpdateStatus::InProgress
                         },
                         Some(monitor_state) => {
                                 let monitor = &monitor_state.monitor;
@@ -745,23 +765,28 @@ where C::Target: chain::Filter,
                                                 pending_monitor_updates.push(update_id);
                                                 log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} in progress", log_funding_info!(monitor));
                                         },
-                                       ChannelMonitorUpdateStatus::PermanentFailure => {
-                                               monitor_state.channel_perm_failed.store(true, Ordering::Release);
-                                               log_error!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} failed", log_funding_info!(monitor));
-                                       },
                                         ChannelMonitorUpdateStatus::Completed => {
                                                 log_debug!(self.logger, "Persistence of ChannelMonitorUpdate for channel {} completed", log_funding_info!(monitor));
                                         },
+                                       ChannelMonitorUpdateStatus::UnrecoverableError => { /* we'll panic in a moment */ },
                                 }
                                 if update_res.is_err() {
-                                       ChannelMonitorUpdateStatus::PermanentFailure
-                               } else if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
-                                       ChannelMonitorUpdateStatus::PermanentFailure
+                                       ChannelMonitorUpdateStatus::InProgress
                                 } else {
                                         persist_res
                                 }
                         }
+               };
+               if let ChannelMonitorUpdateStatus::UnrecoverableError = ret {
+                       // Take the monitors lock for writing so that we poison it and any future
+                       // operations going forward fail immediately.
+                       core::mem::drop(monitors);
+                       let _poison = self.monitors.write().unwrap();
+                       let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
+                       log_error!(self.logger, "{}", err_str);
+                       panic!("{}", err_str);
                 }
+               ret
         }
  
         fn release_pending_monitor_events(&self) -> Vec<(OutPoint, Vec<MonitorEvent>, Option<PublicKey>)> {
@@ -774,17 +799,6 @@ where C::Target: chain::Filter,
                         {
                                 log_debug!(self.logger, "A Channel Monitor sync is still in progress, refusing to provide monitor events!");
                         } else {
-                               if monitor_state.channel_perm_failed.load(Ordering::Acquire) {
-                                       // If a `UpdateOrigin::ChainSync` persistence failed with `PermanantFailure`,
-                                       // we don't really know if the latest `ChannelMonitor` state is on disk or not.
-                                       // We're supposed to hold monitor updates until the latest state is on disk to
-                                       // avoid duplicate events, but the user told us persistence is screw-y and may
-                                       // not complete. We can't hold events forever because we may learn some payment
-                                       // preimage, so instead we just log and hope the user complied with the
-                                       // `PermanentFailure` requirements of having at least the local-disk copy
-                                       // updated.
-                                       log_info!(self.logger, "A Channel Monitor sync returned PermanentFailure. Returning monitor events but duplicate events may appear after reload!");
-                               }
                                 if is_pending_monitor_update {
                                         log_error!(self.logger, "A ChannelMonitor sync took longer than {} blocks to complete.", LATENCY_GRACE_PERIOD_BLOCKS);
                                         log_error!(self.logger, "   To avoid funds-loss, we are allowing monitor updates to be released.");
@@ -831,12 +845,12 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, C: Deref, T: Deref, F: Deref, L
  
  #[cfg(test)]
  mod tests {
-       use crate::{check_added_monitors, check_closed_broadcast, check_closed_event};
+       use crate::check_added_monitors;
         use crate::{expect_payment_claimed, expect_payment_path_successful, get_event_msg};
         use crate::{get_htlc_update_msgs, get_local_commitment_txn, get_revoke_commit_msgs, get_route_and_payment_hash, unwrap_send_err};
         use crate::chain::{ChannelMonitorUpdateStatus, Confirm, Watch};
         use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
-       use crate::events::{Event, ClosureReason, MessageSendEvent, MessageSendEventsProvider};
+       use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider};
         use crate::ln::channelmanager::{PaymentSendFailure, PaymentId, RecipientOnionFields};
         use crate::ln::functional_test_utils::*;
         use crate::ln::msgs::ChannelMessageHandler;
@@ -988,12 +1002,8 @@ mod tests {
                 chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
                 unwrap_send_err!(nodes[0].node.send_payment_with_route(&route, second_payment_hash,
                                 RecipientOnionFields::secret_only(second_payment_secret), PaymentId(second_payment_hash.0)
-                       ), true, APIError::ChannelUnavailable { ref err },
-                       assert!(err.contains("ChannelMonitor storage failure")));
-               check_added_monitors!(nodes[0], 2); // After the failure we generate a close-channel monitor update
-               check_closed_broadcast!(nodes[0], true);
-               check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
-                       [nodes[1].node.get_our_node_id()], 100000);
+                       ), false, APIError::MonitorUpdateInProgress, {});
+               check_added_monitors!(nodes[0], 1);
  
                 // However, as the ChainMonitor is still waiting for the original persistence to complete,
                 // it won't yet release the MonitorEvents.
@@ -1022,7 +1032,8 @@ mod tests {
         }
  
         #[test]
-       fn update_during_chainsync_fails_channel() {
+       #[cfg(feature = "std")]
+       fn update_during_chainsync_poisons_channel() {
                 let chanmon_cfgs = create_chanmon_cfgs(2);
                 let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
                 let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
@@ -1030,18 +1041,15 @@ mod tests {
                 create_announced_chan_between_nodes(&nodes, 0, 1);
  
                 chanmon_cfgs[0].persister.chain_sync_monitor_persistences.lock().unwrap().clear();
-               chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-
-               connect_blocks(&nodes[0], 1);
-               // Before processing events, the ChannelManager will still think the Channel is open and
-               // there won't be any ChannelMonitorUpdates
-               assert_eq!(nodes[0].node.list_channels().len(), 1);
-               check_added_monitors!(nodes[0], 0);
-               // ... however once we get events once, the channel will close, creating a channel-closed
-               // ChannelMonitorUpdate.
-               check_closed_broadcast!(nodes[0], true);
-               check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() },
-                       [nodes[1].node.get_our_node_id()], 100000);
-               check_added_monitors!(nodes[0], 1);
+               chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::UnrecoverableError);
+
+               assert!(std::panic::catch_unwind(|| {
+                       // Returning an UnrecoverableError should always panic immediately
+                       connect_blocks(&nodes[0], 1);
+               }).is_err());
+               assert!(std::panic::catch_unwind(|| {
+                       // ...and also poison our locks causing later use to panic as well
+                       core::mem::drop(nodes);
+               }).is_err());
         }
  }
diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs

index 8c92ab47cc00b6611075e7a65d72cf4653365664..3f9c83bb54393290c40b9a6784bcce7dd48c53da 100644 (file)
--- a/lightning/src/chain/channelmonitor.rs
+++ b/lightning/src/chain/channelmonitor.rs
@@ -134,7 +134,7 @@ pub enum MonitorEvent {
         HTLCEvent(HTLCUpdate),
  
         /// A monitor event that the Channel's commitment transaction was confirmed.
-       CommitmentTxConfirmed(OutPoint),
+       HolderForceClosed(OutPoint),
  
         /// Indicates a [`ChannelMonitor`] update has completed. See
         /// [`ChannelMonitorUpdateStatus::InProgress`] for more information on how this is used.
@@ -150,24 +150,18 @@ pub enum MonitorEvent {
                 /// same [`ChannelMonitor`] have been applied and persisted.
                 monitor_update_id: u64,
         },
-
-       /// Indicates a [`ChannelMonitor`] update has failed. See
-       /// [`ChannelMonitorUpdateStatus::PermanentFailure`] for more information on how this is used.
-       ///
-       /// [`ChannelMonitorUpdateStatus::PermanentFailure`]: super::ChannelMonitorUpdateStatus::PermanentFailure
-       UpdateFailed(OutPoint),
  }
  impl_writeable_tlv_based_enum_upgradable!(MonitorEvent,
-       // Note that Completed and UpdateFailed are currently never serialized to disk as they are
-       // generated only in ChainMonitor
+       // Note that Completed is currently never serialized to disk as it is generated only in
+       // ChainMonitor.
         (0, Completed) => {
                 (0, funding_txo, required),
                 (2, monitor_update_id, required),
         },
  ;
         (2, HTLCEvent),
-       (4, CommitmentTxConfirmed),
-       (6, UpdateFailed),
+       (4, HolderForceClosed),
+       // 6 was `UpdateFailed` until LDK 0.0.117
  );
  
  /// Simple structure sent back by `chain::Watch` when an HTLC from a forward channel is detected on
@@ -1037,7 +1031,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Writeable for ChannelMonitorImpl<Signe
  
                 writer.write_all(&(self.pending_monitor_events.iter().filter(|ev| match ev {
                         MonitorEvent::HTLCEvent(_) => true,
-                       MonitorEvent::CommitmentTxConfirmed(_) => true,
+                       MonitorEvent::HolderForceClosed(_) => true,
                         _ => false,
                 }).count() as u64).to_be_bytes())?;
                 for event in self.pending_monitor_events.iter() {
@@ -1046,7 +1040,7 @@ impl<Signer: WriteableEcdsaChannelSigner> Writeable for ChannelMonitorImpl<Signe
                                         0u8.write(writer)?;
                                         upd.write(writer)?;
                                 },
-                               MonitorEvent::CommitmentTxConfirmed(_) => 1u8.write(writer)?,
+                               MonitorEvent::HolderForceClosed(_) => 1u8.write(writer)?,
                                 _ => {}, // Covered in the TLV writes below
                         }
                 }
@@ -1488,21 +1482,20 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitor<Signer> {
                 self.inner.lock().unwrap().counterparty_node_id
         }
  
-       /// Used by ChannelManager deserialization to broadcast the latest holder state if its copy of
-       /// the Channel was out-of-date.
+       /// Used by [`ChannelManager`] deserialization to broadcast the latest holder state if its copy
+       /// of the channel state was out-of-date.
         ///
         /// You may also use this to broadcast the latest local commitment transaction, either because
-       /// a monitor update failed with [`ChannelMonitorUpdateStatus::PermanentFailure`] or because we've
-       /// fallen behind (i.e. we've received proof that our counterparty side knows a revocation
-       /// secret we gave them that they shouldn't know).
+       /// a monitor update failed or because we've fallen behind (i.e. we've received proof that our
+       /// counterparty side knows a revocation secret we gave them that they shouldn't know).
         ///
         /// Broadcasting these transactions in the second case is UNSAFE, as they allow counterparty
         /// side to punish you. Nevertheless you may want to broadcast them if counterparty doesn't
         /// close channel with their commitment transaction after a substantial amount of time. Best
         /// may be to contact the other node operator out-of-band to coordinate other options available
-       /// to you. In any-case, the choice is up to you.
+       /// to you.
         ///
-       /// [`ChannelMonitorUpdateStatus::PermanentFailure`]: super::ChannelMonitorUpdateStatus::PermanentFailure
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
         pub fn get_latest_holder_commitment_txn<L: Deref>(&self, logger: &L) -> Vec<Transaction>
         where L::Target: Logger {
                 self.inner.lock().unwrap().get_latest_holder_commitment_txn(logger)
@@ -2533,7 +2526,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
                         txs.push(tx);
                 }
                 broadcaster.broadcast_transactions(&txs);
-               self.pending_monitor_events.push(MonitorEvent::CommitmentTxConfirmed(self.funding_info.0));
+               self.pending_monitor_events.push(MonitorEvent::HolderForceClosed(self.funding_info.0));
         }
  
         pub fn update_monitor<B: Deref, F: Deref, L: Deref>(&mut self, updates: &ChannelMonitorUpdate, broadcaster: &B, fee_estimator: F, logger: &L) -> Result<(), ()>
@@ -2599,6 +2592,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
                                 ChannelMonitorUpdateStep::CommitmentSecret { idx, secret } => {
                                         log_trace!(logger, "Updating ChannelMonitor with commitment secret");
                                         if let Err(e) = self.provide_secret(*idx, *secret) {
+                                               debug_assert!(false, "Latest counterparty commitment secret was invalid");
                                                 log_error!(logger, "Providing latest counterparty commitment secret failed/was refused:");
                                                 log_error!(logger, "    {}", e);
                                                 ret = Err(());
@@ -2645,7 +2639,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
                                                 log_error!(logger, "    in channel monitor for channel {}!", &self.funding_info.0.to_channel_id());
                                                 log_error!(logger, "    Read the docs for ChannelMonitor::get_latest_holder_commitment_txn and take manual action!");
                                         } else {
-                                               // If we generated a MonitorEvent::CommitmentTxConfirmed, the ChannelManager
+                                               // If we generated a MonitorEvent::HolderForceClosed, the ChannelManager
                                                 // will still give us a ChannelForceClosed event with !should_broadcast, but we
                                                 // shouldn't print the scary warning above.
                                                 log_info!(logger, "Channel off-chain state closed after we broadcasted our latest commitment transaction.");
@@ -3490,7 +3484,7 @@ impl<Signer: WriteableEcdsaChannelSigner> ChannelMonitorImpl<Signer> {
                         let funding_outp = HolderFundingOutput::build(self.funding_redeemscript.clone(), self.channel_value_satoshis, self.onchain_tx_handler.channel_type_features().clone());
                         let commitment_package = PackageTemplate::build_package(self.funding_info.0.txid.clone(), self.funding_info.0.index as u32, PackageSolvingData::HolderFundingOutput(funding_outp), self.best_block.height(), self.best_block.height());
                         claimable_outpoints.push(commitment_package);
-                       self.pending_monitor_events.push(MonitorEvent::CommitmentTxConfirmed(self.funding_info.0));
+                       self.pending_monitor_events.push(MonitorEvent::HolderForceClosed(self.funding_info.0));
                         // Although we aren't signing the transaction directly here, the transaction will be signed
                         // in the claim that is queued to OnchainTxHandler. We set holder_tx_signed here to reject
                         // new channel updates.
@@ -4254,7 +4248,7 @@ impl<'a, 'b, ES: EntropySource, SP: SignerProvider> ReadableArgs<(&'a ES, &'b SP
                 for _ in 0..pending_monitor_events_len {
                         let ev = match <u8 as Readable>::read(reader)? {
                                 0 => MonitorEvent::HTLCEvent(Readable::read(reader)?),
-                               1 => MonitorEvent::CommitmentTxConfirmed(funding_info.0),
+                               1 => MonitorEvent::HolderForceClosed(funding_info.0),
                                 _ => return Err(DecodeError::InvalidValue)
                         };
                         pending_monitor_events.as_mut().unwrap().push(ev);
@@ -4413,13 +4407,12 @@ mod tests {
         use crate::chain::chaininterface::LowerBoundedFeeEstimator;
  
         use super::ChannelMonitorUpdateStep;
-       use crate::{check_added_monitors, check_closed_broadcast, check_closed_event, check_spends, get_local_commitment_txn, get_monitor, get_route_and_payment_hash, unwrap_send_err};
+       use crate::{check_added_monitors, check_spends, get_local_commitment_txn, get_monitor, get_route_and_payment_hash, unwrap_send_err};
         use crate::chain::{BestBlock, Confirm};
         use crate::chain::channelmonitor::ChannelMonitor;
         use crate::chain::package::{weight_offered_htlc, weight_received_htlc, weight_revoked_offered_htlc, weight_revoked_received_htlc, WEIGHT_REVOKED_OUTPUT};
         use crate::chain::transaction::OutPoint;
         use crate::sign::InMemorySigner;
-       use crate::events::ClosureReason;
         use crate::ln::{PaymentPreimage, PaymentHash};
         use crate::ln::chan_utils;
         use crate::ln::chan_utils::{HTLCOutputInCommitment, ChannelPublicKeys, ChannelTransactionParameters, HolderCommitmentTransaction, CounterpartyChannelTransactionParameters};
@@ -4485,18 +4478,14 @@ mod tests {
                 let (route, payment_hash, _, payment_secret) = get_route_and_payment_hash!(nodes[1], nodes[0], 100_000);
                 unwrap_send_err!(nodes[1].node.send_payment_with_route(&route, payment_hash,
                                 RecipientOnionFields::secret_only(payment_secret), PaymentId(payment_hash.0)
-                       ), true, APIError::ChannelUnavailable { ref err },
-                       assert!(err.contains("ChannelMonitor storage failure")));
-               check_added_monitors!(nodes[1], 2); // After the failure we generate a close-channel monitor update
-               check_closed_broadcast!(nodes[1], true);
-               check_closed_event!(nodes[1], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() }, 
-                       [nodes[0].node.get_our_node_id()], 100000);
+                       ), false, APIError::MonitorUpdateInProgress, {});
+               check_added_monitors!(nodes[1], 1);
  
                 // Build a new ChannelMonitorUpdate which contains both the failing commitment tx update
                 // and provides the claim preimages for the two pending HTLCs. The first update generates
                 // an error, but the point of this test is to ensure the later updates are still applied.
                 let monitor_updates = nodes[1].chain_monitor.monitor_updates.lock().unwrap();
-               let mut replay_update = monitor_updates.get(&channel.2).unwrap().iter().rev().skip(1).next().unwrap().clone();
+               let mut replay_update = monitor_updates.get(&channel.2).unwrap().iter().rev().next().unwrap().clone();
                 assert_eq!(replay_update.updates.len(), 1);
                 if let ChannelMonitorUpdateStep::LatestCounterpartyCommitmentTXInfo { .. } = replay_update.updates[0] {
                 } else { panic!(); }
diff --git a/lightning/src/chain/mod.rs b/lightning/src/chain/mod.rs

index 236b10a7b19d1288b5d74f784d066c2a8bacc169..89e0b155cf68b2db3f24c1a0b23f1e01aacb67b1 100644 (file)
--- a/lightning/src/chain/mod.rs
+++ b/lightning/src/chain/mod.rs
@@ -176,6 +176,25 @@ pub trait Confirm {
  }
  
  /// An enum representing the status of a channel monitor update persistence.
+///
+/// These are generally used as the return value for an implementation of [`Persist`] which is used
+/// as the storage layer for a [`ChainMonitor`]. See the docs on [`Persist`] for a high-level
+/// explanation of how to handle different cases.
+///
+/// While `UnrecoverableError` is provided as a failure variant, it is not truly "handled" on the
+/// calling side, and generally results in an immediate panic. For those who prefer to avoid
+/// panics, `InProgress` can be used and you can retry the update operation in the background or
+/// shut down cleanly.
+///
+/// Note that channels should generally *not* be force-closed after a persistence failure.
+/// Force-closing with the latest [`ChannelMonitorUpdate`] applied may result in a transaction
+/// being broadcast which can only be spent by the latest [`ChannelMonitor`]! Thus, if the
+/// latest [`ChannelMonitor`] is not durably persisted anywhere and exists only in memory, naively
+/// calling [`ChannelManager::force_close_broadcasting_latest_txn`] *may result in loss of funds*!
+///
+/// [`Persist`]: chainmonitor::Persist
+/// [`ChainMonitor`]: chainmonitor::ChainMonitor
+/// [`ChannelManager::force_close_broadcasting_latest_txn`]: crate::ln::channelmanager::ChannelManager::force_close_broadcasting_latest_txn
  #[derive(Clone, Copy, Debug, PartialEq, Eq)]
  pub enum ChannelMonitorUpdateStatus {
         /// The update has been durably persisted and all copies of the relevant [`ChannelMonitor`]
@@ -184,17 +203,13 @@ pub enum ChannelMonitorUpdateStatus {
         /// This includes performing any `fsync()` calls required to ensure the update is guaranteed to
         /// be available on restart even if the application crashes.
         Completed,
-       /// Used to indicate a temporary failure (eg connection to a watchtower or remote backup of
-       /// our state failed, but is expected to succeed at some point in the future).
-       ///
-       /// Such a failure will "freeze" a channel, preventing us from revoking old states or
-       /// submitting new commitment transactions to the counterparty. Once the update(s) which failed
-       /// have been successfully applied, a [`MonitorEvent::Completed`] can be used to restore the
-       /// channel to an operational state.
+       /// Indicates that the update will happen asynchronously in the background or that a transient
+       /// failure occurred which is being retried in the background and will eventually complete.
         ///
-       /// Note that a given [`ChannelManager`] will *never* re-generate a [`ChannelMonitorUpdate`].
-       /// If you return this error you must ensure that it is written to disk safely before writing
-       /// the latest [`ChannelManager`] state, or you should return [`PermanentFailure`] instead.
+       /// This will "freeze" a channel, preventing us from revoking old states or submitting a new
+       /// commitment transaction to the counterparty. Once the update(s) which are `InProgress` have
+       /// been completed, a [`MonitorEvent::Completed`] can be used to restore the channel to an
+       /// operational state.
         ///
         /// Even when a channel has been "frozen", updates to the [`ChannelMonitor`] can continue to
         /// occur (e.g. if an inbound HTLC which we forwarded was claimed upstream, resulting in us
@@ -204,74 +219,40 @@ pub enum ChannelMonitorUpdateStatus {
         /// until a [`MonitorEvent::Completed`] is provided, even if you return no error on a later
         /// monitor update for the same channel.
         ///
-       /// For deployments where a copy of ChannelMonitors and other local state are backed up in a
-       /// remote location (with local copies persisted immediately), it is anticipated that all
+       /// For deployments where a copy of [`ChannelMonitor`]s and other local state are backed up in
+       /// a remote location (with local copies persisted immediately), it is anticipated that all
         /// updates will return [`InProgress`] until the remote copies could be updated.
         ///
-       /// [`PermanentFailure`]: ChannelMonitorUpdateStatus::PermanentFailure
+       /// Note that while fully asynchronous persistence of [`ChannelMonitor`] data is generally
+       /// reliable, this feature is considered beta, and a handful of edge-cases remain. Until the
+       /// remaining cases are fixed, in rare cases, *using this feature may lead to funds loss*.
+       ///
         /// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress
-       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
         InProgress,
-       /// Used to indicate no further channel monitor updates will be allowed (likely a disk failure
-       /// or a remote copy of this [`ChannelMonitor`] is no longer reachable and thus not updatable).
-       ///
-       /// When this is returned, [`ChannelManager`] will force-close the channel but *not* broadcast
-       /// our current commitment transaction. This avoids a dangerous case where a local disk failure
-       /// (e.g. the Linux-default remounting of the disk as read-only) causes [`PermanentFailure`]s
-       /// for all monitor updates. If we were to broadcast our latest commitment transaction and then
-       /// restart, we could end up reading a previous [`ChannelMonitor`] and [`ChannelManager`],
-       /// revoking our now-broadcasted state before seeing it confirm and losing all our funds.
-       ///
-       /// Note that this is somewhat of a tradeoff - if the disk is really gone and we may have lost
-       /// the data permanently, we really should broadcast immediately. If the data can be recovered
-       /// with manual intervention, we'd rather close the channel, rejecting future updates to it,
-       /// and broadcast the latest state only if we have HTLCs to claim which are timing out (which
-       /// we do as long as blocks are connected).
+       /// Indicates that an update has failed and will not complete at any point in the future.
         ///
-       /// In order to broadcast the latest local commitment transaction, you'll need to call
-       /// [`ChannelMonitor::get_latest_holder_commitment_txn`] and broadcast the resulting
-       /// transactions once you've safely ensured no further channel updates can be generated by your
-       /// [`ChannelManager`].
+       /// Currently returning this variant will cause LDK to immediately panic to encourage immediate
+       /// shutdown. In the future this may be updated to disconnect peers and refuse to continue
+       /// normal operation without a panic.
         ///
-       /// Note that at least one final [`ChannelMonitorUpdate`] may still be provided, which must
-       /// still be processed by a running [`ChannelMonitor`]. This final update will mark the
-       /// [`ChannelMonitor`] as finalized, ensuring no further updates (e.g. revocation of the latest
-       /// commitment transaction) are allowed.
+       /// Applications which wish to perform an orderly shutdown after failure should consider
+       /// returning [`InProgress`] instead and simply shut down without ever marking the update
+       /// complete.
         ///
-       /// Note that even if you return a [`PermanentFailure`] due to unavailability of secondary
-       /// [`ChannelMonitor`] copies, you should still make an attempt to store the update where
-       /// possible to ensure you can claim HTLC outputs on the latest commitment transaction
-       /// broadcasted later.
-       ///
-       /// In case of distributed watchtowers deployment, the new version must be written to disk, as
-       /// state may have been stored but rejected due to a block forcing a commitment broadcast. This
-       /// storage is used to claim outputs of rejected state confirmed onchain by another watchtower,
-       /// lagging behind on block processing.
-       ///
-       /// [`PermanentFailure`]: ChannelMonitorUpdateStatus::PermanentFailure
-       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
-       PermanentFailure,
+       /// [`InProgress`]: ChannelMonitorUpdateStatus::InProgress
+       UnrecoverableError,
  }
  
  /// The `Watch` trait defines behavior for watching on-chain activity pertaining to channels as
  /// blocks are connected and disconnected.
  ///
  /// Each channel is associated with a [`ChannelMonitor`]. Implementations of this trait are
-/// responsible for maintaining a set of monitors such that they can be updated accordingly as
-/// channel state changes and HTLCs are resolved. See method documentation for specific
-/// requirements.
-///
-/// Implementations **must** ensure that updates are successfully applied and persisted upon method
-/// completion. If an update fails with a [`PermanentFailure`], then it must immediately shut down
-/// without taking any further action such as persisting the current state.
+/// responsible for maintaining a set of monitors such that they can be updated as channel state
+/// changes. On each update, *all copies* of a [`ChannelMonitor`] must be updated and the update
+/// persisted to disk to ensure that the latest [`ChannelMonitor`] state can be reloaded if the
+/// application crashes.
  ///
-/// If an implementation maintains multiple instances of a channel's monitor (e.g., by storing
-/// backup copies), then it must ensure that updates are applied across all instances. Otherwise, it
-/// could result in a revoked transaction being broadcast, allowing the counterparty to claim all
-/// funds in the channel. See [`ChannelMonitorUpdateStatus`] for more details about how to handle
-/// multiple instances.
-///
-/// [`PermanentFailure`]: ChannelMonitorUpdateStatus::PermanentFailure
+/// See method documentation and [`ChannelMonitorUpdateStatus`] for specific requirements.
  pub trait Watch<ChannelSigner: WriteableEcdsaChannelSigner> {
         /// Watches a channel identified by `funding_txo` using `monitor`.
         ///
@@ -279,20 +260,32 @@ pub trait Watch<ChannelSigner: WriteableEcdsaChannelSigner> {
         /// with any spends of outputs returned by [`get_outputs_to_watch`]. In practice, this means
         /// calling [`block_connected`] and [`block_disconnected`] on the monitor.
         ///
-       /// Note: this interface MUST error with [`ChannelMonitorUpdateStatus::PermanentFailure`] if
-       /// the given `funding_txo` has previously been registered via `watch_channel`.
+       /// A return of `Err(())` indicates that the channel should immediately be force-closed without
+       /// broadcasting the funding transaction.
+       ///
+       /// If the given `funding_txo` has previously been registered via `watch_channel`, `Err(())`
+       /// must be returned.
         ///
         /// [`get_outputs_to_watch`]: channelmonitor::ChannelMonitor::get_outputs_to_watch
         /// [`block_connected`]: channelmonitor::ChannelMonitor::block_connected
         /// [`block_disconnected`]: channelmonitor::ChannelMonitor::block_disconnected
-       fn watch_channel(&self, funding_txo: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> ChannelMonitorUpdateStatus;
+       fn watch_channel(&self, funding_txo: OutPoint, monitor: ChannelMonitor<ChannelSigner>) -> Result<ChannelMonitorUpdateStatus, ()>;
  
         /// Updates a channel identified by `funding_txo` by applying `update` to its monitor.
         ///
-       /// Implementations must call [`update_monitor`] with the given update. See
-       /// [`ChannelMonitorUpdateStatus`] for invariants around returning an error.
+       /// Implementations must call [`ChannelMonitor::update_monitor`] with the given update. This
+       /// may fail (returning an `Err(())`), in which case this should return
+       /// [`ChannelMonitorUpdateStatus::InProgress`] (and the update should never complete). This
+       /// generally implies the channel has been closed (either by the funding outpoint being spent
+       /// on-chain or the [`ChannelMonitor`] having decided to do so and broadcasted a transaction),
+       /// and the [`ChannelManager`] state will be updated once it sees the funding spend on-chain.
         ///
-       /// [`update_monitor`]: channelmonitor::ChannelMonitor::update_monitor
+       /// In general, persistence failures should be retried after returning
+       /// [`ChannelMonitorUpdateStatus::InProgress`] and eventually complete. If a failure truly
+       /// cannot be retried, the node should shut down immediately after returning
+       /// [`ChannelMonitorUpdateStatus::UnrecoverableError`], see its documentation for more info.
+       ///
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
         fn update_channel(&self, funding_txo: OutPoint, update: &ChannelMonitorUpdate) -> ChannelMonitorUpdateStatus;
  
         /// Returns any monitor events since the last call. Subsequent calls must only return new
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs

index 33f4bbc8c26346a75af2b77880d2d7f8d0e06fa7..dc9bbd3e4048c902116b875a1a995e5186302266 100644 (file)
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -37,43 +37,6 @@ use bitcoin::hashes::Hash;
  use crate::prelude::*;
  use crate::sync::{Arc, Mutex};
  
-#[test]
-fn test_simple_monitor_permanent_update_fail() {
-       // Test that we handle a simple permanent monitor update failure
-       let chanmon_cfgs = create_chanmon_cfgs(2);
-       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
-       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
-       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
-       create_announced_chan_between_nodes(&nodes, 0, 1);
-
-       let (route, payment_hash_1, _, payment_secret_1) = get_route_and_payment_hash!(&nodes[0], nodes[1], 1000000);
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-       unwrap_send_err!(nodes[0].node.send_payment_with_route(&route, payment_hash_1,
-                       RecipientOnionFields::secret_only(payment_secret_1), PaymentId(payment_hash_1.0)
-               ), true, APIError::ChannelUnavailable {..}, {});
-       check_added_monitors!(nodes[0], 2);
-
-       let events_1 = nodes[0].node.get_and_clear_pending_msg_events();
-       assert_eq!(events_1.len(), 2);
-       match events_1[0] {
-               MessageSendEvent::BroadcastChannelUpdate { .. } => {},
-               _ => panic!("Unexpected event"),
-       };
-       match events_1[1] {
-               MessageSendEvent::HandleError { node_id, .. } => assert_eq!(node_id, nodes[1].node.get_our_node_id()),
-               _ => panic!("Unexpected event"),
-       };
-
-       assert!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().is_empty());
-
-       // TODO: Once we hit the chain with the failure transaction we should check that we get a
-       // PaymentPathFailed event
-
-       assert_eq!(nodes[0].node.list_channels().len(), 0);
-       check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
-               [nodes[1].node.get_our_node_id()], 100000);
-}
-
  #[test]
  fn test_monitor_and_persister_update_fail() {
         // Test that if both updating the `ChannelMonitor` and persisting the updated
@@ -117,14 +80,11 @@ fn test_monitor_and_persister_update_fail() {
                         new_monitor
                 };
                 let chain_mon = test_utils::TestChainMonitor::new(Some(&chain_source), &tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
-               assert_eq!(chain_mon.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
+               assert_eq!(chain_mon.watch_channel(outpoint, new_monitor), Ok(ChannelMonitorUpdateStatus::Completed));
                 chain_mon
         };
         chain_mon.chain_monitor.block_connected(&create_dummy_block(BlockHash::all_zeros(), 42, Vec::new()), 200);
  
-       // Set the persister's return value to be a InProgress.
-       persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
-
         // Try to update ChannelMonitor
         nodes[1].node.claim_funds(preimage);
         expect_payment_claimed!(nodes[1], payment_hash, 9_000_000);
@@ -133,17 +93,21 @@ fn test_monitor_and_persister_update_fail() {
         let updates = get_htlc_update_msgs!(nodes[1], nodes[0].node.get_our_node_id());
         assert_eq!(updates.update_fulfill_htlcs.len(), 1);
         nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]);
+
         {
                 let mut node_0_per_peer_lock;
                 let mut node_0_peer_state_lock;
                 if let ChannelPhase::Funded(ref mut channel) = get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan.2) {
                         if let Ok(Some(update)) = channel.commitment_signed(&updates.commitment_signed, &node_cfgs[0].logger) {
-                               // Check that even though the persister is returning a InProgress,
-                               // because the update is bogus, ultimately the error that's returned
-                               // should be a PermanentFailure.
-                               if let ChannelMonitorUpdateStatus::PermanentFailure = chain_mon.chain_monitor.update_channel(outpoint, &update) {} else { panic!("Expected monitor error to be permanent"); }
-                               logger.assert_log_regex("lightning::chain::chainmonitor", regex::Regex::new("Persistence of ChannelMonitorUpdate for channel [0-9a-f]* in progress").unwrap(), 1);
-                               assert_eq!(nodes[0].chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::Completed);
+                               // Check that the persister returns InProgress (and will never actually complete)
+                               // as the monitor update errors.
+                               if let ChannelMonitorUpdateStatus::InProgress = chain_mon.chain_monitor.update_channel(outpoint, &update) {} else { panic!("Expected monitor paused"); }
+                               logger.assert_log_regex("lightning::chain::chainmonitor", regex::Regex::new("Failed to update ChannelMonitor for channel [0-9a-f]*.").unwrap(), 1);
+
+                               // Apply the monitor update to the original ChainMonitor, ensuring the
+                               // ChannelManager and ChannelMonitor aren't out of sync.
+                               assert_eq!(nodes[0].chain_monitor.update_channel(outpoint, &update),
+                                       ChannelMonitorUpdateStatus::Completed);
                         } else { assert!(false); }
                 } else {
                         assert!(false);
@@ -151,8 +115,7 @@ fn test_monitor_and_persister_update_fail() {
         }
  
         check_added_monitors!(nodes[0], 1);
-       let events = nodes[0].node.get_and_clear_pending_events();
-       assert_eq!(events.len(), 1);
+       expect_payment_sent(&nodes[0], preimage, None, false, false);
  }
  
  fn do_test_simple_monitor_temporary_update_fail(disconnect: bool) {
@@ -2675,68 +2638,6 @@ fn test_temporary_error_during_shutdown() {
         check_closed_event!(nodes[0], 1, ClosureReason::CooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
  }
  
-#[test]
-fn test_permanent_error_during_sending_shutdown() {
-       // Test that permanent failures when updating the monitor's shutdown script result in a force
-       // close when initiating a cooperative close.
-       let mut config = test_default_channel_config();
-       config.channel_handshake_config.commit_upfront_shutdown_pubkey = false;
-
-       let chanmon_cfgs = create_chanmon_cfgs(2);
-       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
-       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(config), None]);
-       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
-
-       let channel_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-
-       assert!(nodes[0].node.close_channel(&channel_id, &nodes[1].node.get_our_node_id()).is_ok());
-
-       // We always send the `shutdown` response when initiating a shutdown, even if we immediately
-       // close the channel thereafter.
-       let msg_events = nodes[0].node.get_and_clear_pending_msg_events();
-       assert_eq!(msg_events.len(), 3);
-       if let MessageSendEvent::SendShutdown { .. } = msg_events[0] {} else { panic!(); }
-       if let MessageSendEvent::BroadcastChannelUpdate { .. } = msg_events[1] {} else { panic!(); }
-       if let MessageSendEvent::HandleError { .. } =  msg_events[2] {} else { panic!(); }
-
-       check_added_monitors!(nodes[0], 2);
-       check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
-               [nodes[1].node.get_our_node_id()], 100000);
-}
-
-#[test]
-fn test_permanent_error_during_handling_shutdown() {
-       // Test that permanent failures when updating the monitor's shutdown script result in a force
-       // close when handling a cooperative close.
-       let mut config = test_default_channel_config();
-       config.channel_handshake_config.commit_upfront_shutdown_pubkey = false;
-
-       let chanmon_cfgs = create_chanmon_cfgs(2);
-       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
-       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, Some(config)]);
-       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
-
-       let channel_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
-       chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-
-       assert!(nodes[0].node.close_channel(&channel_id, &nodes[1].node.get_our_node_id()).is_ok());
-       let shutdown = get_event_msg!(nodes[0], MessageSendEvent::SendShutdown, nodes[1].node.get_our_node_id());
-       nodes[1].node.handle_shutdown(&nodes[0].node.get_our_node_id(), &shutdown);
-
-       // We always send the `shutdown` response when receiving a shutdown, even if we immediately
-       // close the channel thereafter.
-       let msg_events = nodes[1].node.get_and_clear_pending_msg_events();
-       assert_eq!(msg_events.len(), 3);
-       if let MessageSendEvent::SendShutdown { .. } = msg_events[0] {} else { panic!(); }
-       if let MessageSendEvent::BroadcastChannelUpdate { .. } = msg_events[1] {} else { panic!(); }
-       if let MessageSendEvent::HandleError { .. } =  msg_events[2] {} else { panic!(); }
-
-       check_added_monitors!(nodes[1], 2);
-       check_closed_event!(nodes[1], 1, ClosureReason::ProcessingError { err: "ChannelMonitor storage failure".to_string() },
-               [nodes[0].node.get_our_node_id()], 100000);
-}
-
  #[test]
  fn double_temp_error() {
         // Test that it's OK to have multiple `ChainMonitor::update_channel` calls fail in a row.
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs

index 0cadbd41a29d21ccb99de1e0d7bf84319e33971c..8b524d8f3f4d81647f57853b618124960fd8ff84 100644 (file)
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -923,12 +923,14 @@ where
  /// called [`funding_transaction_generated`] for outbound channels) being closed.
  ///
  /// Note that you can be a bit lazier about writing out `ChannelManager` than you can be with
-/// [`ChannelMonitor`]. With [`ChannelMonitor`] you MUST write each monitor update out to disk before
-/// returning from [`chain::Watch::watch_channel`]/[`update_channel`], with ChannelManagers, writing updates
-/// happens out-of-band (and will prevent any other `ChannelManager` operations from occurring during
-/// the serialization process). If the deserialized version is out-of-date compared to the
-/// [`ChannelMonitor`] passed by reference to [`read`], those channels will be force-closed based on the
-/// `ChannelMonitor` state and no funds will be lost (mod on-chain transaction fees).
+/// [`ChannelMonitor`]. With [`ChannelMonitor`] you MUST durably write each
+/// [`ChannelMonitorUpdate`] before returning from
+/// [`chain::Watch::watch_channel`]/[`update_channel`] or before completing async writes. With
+/// `ChannelManager`s, writing updates happens out-of-band (and will prevent any other
+/// `ChannelManager` operations from occurring during the serialization process). If the
+/// deserialized version is out-of-date compared to the [`ChannelMonitor`] passed by reference to
+/// [`read`], those channels will be force-closed based on the `ChannelMonitor` state and no funds
+/// will be lost (modulo on-chain transaction fees).
  ///
  /// Note that the deserializer is only implemented for `(`[`BlockHash`]`, `[`ChannelManager`]`)`, which
  /// tells you the last block hash which was connected. You should get the best block tip before using the manager.
@@ -2040,56 +2042,30 @@ macro_rules! handle_monitor_update_completion {
  }
  
  macro_rules! handle_new_monitor_update {
-       ($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, _internal, $remove: expr, $completed: expr) => { {
-               // update_maps_on_chan_removal needs to be able to take id_to_peer, so make sure we can in
-               // any case so that it won't deadlock.
-               debug_assert_ne!($self.id_to_peer.held_by_thread(), LockHeldState::HeldByThread);
+       ($self: ident, $update_res: expr, $chan: expr, _internal, $completed: expr) => { {
                 debug_assert!($self.background_events_processed_since_startup.load(Ordering::Acquire));
                 match $update_res {
+                       ChannelMonitorUpdateStatus::UnrecoverableError => {
+                               let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
+                               log_error!($self.logger, "{}", err_str);
+                               panic!("{}", err_str);
+                       },
                         ChannelMonitorUpdateStatus::InProgress => {
                                 log_debug!($self.logger, "ChannelMonitor update for {} in flight, holding messages until the update completes.",
                                         &$chan.context.channel_id());
-                               Ok(false)
-                       },
-                       ChannelMonitorUpdateStatus::PermanentFailure => {
-                               log_error!($self.logger, "Closing channel {} due to monitor update ChannelMonitorUpdateStatus::PermanentFailure",
-                                       &$chan.context.channel_id());
-                               update_maps_on_chan_removal!($self, &$chan.context);
-                               let res = Err(MsgHandleErrInternal::from_finish_shutdown(
-                                       "ChannelMonitor storage failure".to_owned(), $chan.context.channel_id(),
-                                       $chan.context.get_user_id(), $chan.context.force_shutdown(false),
-                                       $self.get_channel_update_for_broadcast(&$chan).ok(), $chan.context.get_value_satoshis()));
-                               $remove;
-                               res
+                               false
                         },
                         ChannelMonitorUpdateStatus::Completed => {
                                 $completed;
-                               Ok(true)
+                               true
                         },
                 }
         } };
-       ($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING_INITIAL_MONITOR, $remove: expr) => {
-               handle_new_monitor_update!($self, $update_res, $peer_state_lock, $peer_state,
-                       $per_peer_state_lock, $chan, _internal, $remove,
+       ($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, INITIAL_MONITOR) => {
+               handle_new_monitor_update!($self, $update_res, $chan, _internal,
                         handle_monitor_update_completion!($self, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan))
         };
-       ($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan_entry: expr, INITIAL_MONITOR) => {
-               if let ChannelPhase::Funded(chan) = $chan_entry.get_mut() {
-                       handle_new_monitor_update!($self, $update_res, $peer_state_lock, $peer_state,
-                               $per_peer_state_lock, chan, MANUALLY_REMOVING_INITIAL_MONITOR, { $chan_entry.remove() })
-               } else {
-                       // We're not supposed to handle monitor updates for unfunded channels (they have no monitors to
-                       // update).
-                       debug_assert!(false);
-                       let channel_id = *$chan_entry.key();
-                       let (_, err) = convert_chan_phase_err!($self, ChannelError::Close(
-                               "Cannot update monitor for unfunded channels as they don't have monitors yet".into()),
-                               $chan_entry.get_mut(), &channel_id);
-                       $chan_entry.remove();
-                       Err(err)
-               }
-       };
-       ($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, MANUALLY_REMOVING, $remove: expr) => { {
+       ($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr) => { {
                 let in_flight_updates = $peer_state.in_flight_monitor_updates.entry($funding_txo)
                         .or_insert_with(Vec::new);
                 // During startup, we push monitor updates as background events through to here in
@@ -2101,8 +2077,7 @@ macro_rules! handle_new_monitor_update {
                                 in_flight_updates.len() - 1
                         });
                 let update_res = $self.chain_monitor.update_channel($funding_txo, &in_flight_updates[idx]);
-               handle_new_monitor_update!($self, update_res, $peer_state_lock, $peer_state,
-                       $per_peer_state_lock, $chan, _internal, $remove,
+               handle_new_monitor_update!($self, update_res, $chan, _internal,
                         {
                                 let _ = in_flight_updates.remove(idx);
                                 if in_flight_updates.is_empty() && $chan.blocked_monitor_updates_pending() == 0 {
@@ -2110,22 +2085,6 @@ macro_rules! handle_new_monitor_update {
                                 }
                         })
         } };
-       ($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan_entry: expr) => {
-               if let ChannelPhase::Funded(chan) = $chan_entry.get_mut() {
-                       handle_new_monitor_update!($self, $funding_txo, $update, $peer_state_lock, $peer_state,
-                               $per_peer_state_lock, chan, MANUALLY_REMOVING, { $chan_entry.remove() })
-               } else {
-                       // We're not supposed to handle monitor updates for unfunded channels (they have no monitors to
-                       // update).
-                       debug_assert!(false);
-                       let channel_id = *$chan_entry.key();
-                       let (_, err) = convert_chan_phase_err!($self, ChannelError::Close(
-                               "Cannot update monitor for unfunded channels as they don't have monitors yet".into()),
-                               $chan_entry.get_mut(), &channel_id);
-                       $chan_entry.remove();
-                       Err(err)
-               }
-       }
  }
  
  macro_rules! process_events_body {
@@ -2538,61 +2497,64 @@ where
                 let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(self);
  
                 let mut failed_htlcs: Vec<(HTLCSource, PaymentHash)>;
-               let result: Result<(), _> = loop {
-                       {
-                               let per_peer_state = self.per_peer_state.read().unwrap();
+               loop {
+                       let per_peer_state = self.per_peer_state.read().unwrap();
  
-                               let peer_state_mutex = per_peer_state.get(counterparty_node_id)
-                                       .ok_or_else(|| APIError::ChannelUnavailable { err: format!("Can't find a peer matching the passed counterparty node_id {}", counterparty_node_id) })?;
+                       let peer_state_mutex = per_peer_state.get(counterparty_node_id)
+                               .ok_or_else(|| APIError::ChannelUnavailable { err: format!("Can't find a peer matching the passed counterparty node_id {}", counterparty_node_id) })?;
  
-                               let mut peer_state_lock = peer_state_mutex.lock().unwrap();
-                               let peer_state = &mut *peer_state_lock;
+                       let mut peer_state_lock = peer_state_mutex.lock().unwrap();
+                       let peer_state = &mut *peer_state_lock;
  
-                               match peer_state.channel_by_id.entry(channel_id.clone()) {
-                                       hash_map::Entry::Occupied(mut chan_phase_entry) => {
-                                               if let ChannelPhase::Funded(chan) = chan_phase_entry.get_mut() {
-                                                       let funding_txo_opt = chan.context.get_funding_txo();
-                                                       let their_features = &peer_state.latest_features;
-                                                       let (shutdown_msg, mut monitor_update_opt, htlcs) =
-                                                               chan.get_shutdown(&self.signer_provider, their_features, target_feerate_sats_per_1000_weight, override_shutdown_script)?;
-                                                       failed_htlcs = htlcs;
+                       match peer_state.channel_by_id.entry(channel_id.clone()) {
+                               hash_map::Entry::Occupied(mut chan_phase_entry) => {
+                                       if let ChannelPhase::Funded(chan) = chan_phase_entry.get_mut() {
+                                               let funding_txo_opt = chan.context.get_funding_txo();
+                                               let their_features = &peer_state.latest_features;
+                                               let (shutdown_msg, mut monitor_update_opt, htlcs) =
+                                                       chan.get_shutdown(&self.signer_provider, their_features, target_feerate_sats_per_1000_weight, override_shutdown_script)?;
+                                               failed_htlcs = htlcs;
+
+                                               // We can send the `shutdown` message before updating the `ChannelMonitor`
+                                               // here as we don't need the monitor update to complete until we send a
+                                               // `shutdown_signed`, which we'll delay if we're pending a monitor update.
+                                               peer_state.pending_msg_events.push(events::MessageSendEvent::SendShutdown {
+                                                       node_id: *counterparty_node_id,
+                                                       msg: shutdown_msg,
+                                               });
  
-                                                       // We can send the `shutdown` message before updating the `ChannelMonitor`
-                                                       // here as we don't need the monitor update to complete until we send a
-                                                       // `shutdown_signed`, which we'll delay if we're pending a monitor update.
-                                                       peer_state.pending_msg_events.push(events::MessageSendEvent::SendShutdown {
-                                                               node_id: *counterparty_node_id,
-                                                               msg: shutdown_msg,
-                                                       });
+                                               debug_assert!(monitor_update_opt.is_none() || !chan.is_shutdown(),
+                                                       "We can't both complete shutdown and generate a monitor update");
  
-                                                       // Update the monitor with the shutdown script if necessary.
-                                                       if let Some(monitor_update) = monitor_update_opt.take() {
-                                                               break handle_new_monitor_update!(self, funding_txo_opt.unwrap(), monitor_update,
-                                                                       peer_state_lock, peer_state, per_peer_state, chan_phase_entry).map(|_| ());
-                                                       }
+                                               // Update the monitor with the shutdown script if necessary.
+                                               if let Some(monitor_update) = monitor_update_opt.take() {
+                                                       handle_new_monitor_update!(self, funding_txo_opt.unwrap(), monitor_update,
+                                                               peer_state_lock, peer_state, per_peer_state, chan);
+                                                       break;
+                                               }
  
-                                                       if chan.is_shutdown() {
-                                                               if let ChannelPhase::Funded(chan) = remove_channel_phase!(self, chan_phase_entry) {
-                                                                       if let Ok(channel_update) = self.get_channel_update_for_broadcast(&chan) {
-                                                                               peer_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
-                                                                                       msg: channel_update
-                                                                               });
-                                                                       }
-                                                                       self.issue_channel_close_events(&chan.context, ClosureReason::HolderForceClosed);
+                                               if chan.is_shutdown() {
+                                                       if let ChannelPhase::Funded(chan) = remove_channel_phase!(self, chan_phase_entry) {
+                                                               if let Ok(channel_update) = self.get_channel_update_for_broadcast(&chan) {
+                                                                       peer_state.pending_msg_events.push(events::MessageSendEvent::BroadcastChannelUpdate {
+                                                                               msg: channel_update
+                                                                       });
                                                                 }
+                                                               self.issue_channel_close_events(&chan.context, ClosureReason::HolderForceClosed);
                                                         }
-                                                       break Ok(());
                                                 }
-                                       },
-                                       hash_map::Entry::Vacant(_) => (),
-                               }
+                                               break;
+                                       }
+                               },
+                               hash_map::Entry::Vacant(_) => {
+                                       // If we reach this point, it means that the channel_id either refers to an unfunded channel or
+                                       // it does not exist for this peer. Either way, we can attempt to force-close it.
+                                       //
+                                       // An appropriate error will be returned for non-existence of the channel if that's the case.
+                                       return self.force_close_channel_with_peer(&channel_id, counterparty_node_id, None, false).map(|_| ())
+                               },
                         }
-                       // If we reach this point, it means that the channel_id either refers to an unfunded channel or
-                       // it does not exist for this peer. Either way, we can attempt to force-close it.
-                       //
-                       // An appropriate error will be returned for non-existence of the channel if that's the case.
-                       return self.force_close_channel_with_peer(&channel_id, counterparty_node_id, None, false).map(|_| ())
-               };
+               }
  
                 for htlc_source in failed_htlcs.drain(..) {
                         let reason = HTLCFailReason::from_failure_code(0x4000 | 8);
@@ -2600,7 +2562,6 @@ where
                         self.fail_htlc_backwards_internal(&htlc_source.0, &htlc_source.1, &reason, receiver);
                 }
  
-               let _ = handle_error!(self, result, *counterparty_node_id);
                 Ok(())
         }
  
@@ -3342,9 +3303,8 @@ where
                                                         }, onion_packet, None, &self.fee_estimator, &self.logger);
                                                 match break_chan_phase_entry!(self, send_res, chan_phase_entry) {
                                                         Some(monitor_update) => {
-                                                               match handle_new_monitor_update!(self, funding_txo, monitor_update, peer_state_lock, peer_state, per_peer_state, chan_phase_entry) {
-                                                                       Err(e) => break Err(e),
-                                                                       Ok(false) => {
+                                                               match handle_new_monitor_update!(self, funding_txo, monitor_update, peer_state_lock, peer_state, per_peer_state, chan) {
+                                                                       false => {
                                                                                 // Note that MonitorUpdateInProgress here indicates (per function
                                                                                 // docs) that we will resend the commitment update once monitor
                                                                                 // updating completes. Therefore, we must return an error
@@ -3353,7 +3313,7 @@ where
                                                                                 // MonitorUpdateInProgress, below.
                                                                                 return Err(APIError::MonitorUpdateInProgress);
                                                                         },
-                                                                       Ok(true) => {},
+                                                                       true => {},
                                                                 }
                                                         },
                                                         None => {},
@@ -4535,33 +4495,29 @@ where
                                 },
                                 BackgroundEvent::MonitorUpdateRegeneratedOnStartup { counterparty_node_id, funding_txo, update } => {
                                         let mut updated_chan = false;
-                                       let res = {
+                                       {
                                                 let per_peer_state = self.per_peer_state.read().unwrap();
                                                 if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
                                                         let mut peer_state_lock = peer_state_mutex.lock().unwrap();
                                                         let peer_state = &mut *peer_state_lock;
                                                         match peer_state.channel_by_id.entry(funding_txo.to_channel_id()) {
                                                                 hash_map::Entry::Occupied(mut chan_phase) => {
-                                                                       updated_chan = true;
-                                                                       handle_new_monitor_update!(self, funding_txo, update.clone(),
-                                                                               peer_state_lock, peer_state, per_peer_state, chan_phase).map(|_| ())
+                                                                       if let ChannelPhase::Funded(chan) = chan_phase.get_mut() {
+                                                                               updated_chan = true;
+                                                                               handle_new_monitor_update!(self, funding_txo, update.clone(),
+                                                                                       peer_state_lock, peer_state, per_peer_state, chan);
+                                                                       } else {
+                                                                               debug_assert!(false, "We shouldn't have an update for a non-funded channel");
+                                                                       }
                                                                 },
-                                                               hash_map::Entry::Vacant(_) => Ok(()),
+                                                               hash_map::Entry::Vacant(_) => {},
                                                         }
-                                               } else { Ok(()) }
-                                       };
+                                               }
+                                       }
                                         if !updated_chan {
                                                 // TODO: Track this as in-flight even though the channel is closed.
                                                 let _ = self.chain_monitor.update_channel(funding_txo, &update);
                                         }
-                                       // TODO: If this channel has since closed, we're likely providing a payment
-                                       // preimage update, which we must ensure is durable! We currently don't,
-                                       // however, ensure that.
-                                       if res.is_err() {
-                                               log_error!(self.logger,
-                                                       "Failed to provide ChannelMonitorUpdate to closed channel! This likely lost us a payment preimage!");
-                                       }
-                                       let _ = handle_error!(self, res, counterparty_node_id);
                                 },
                                 BackgroundEvent::MonitorUpdatesComplete { counterparty_node_id, channel_id } => {
                                         let per_peer_state = self.per_peer_state.read().unwrap();
@@ -5284,15 +5240,8 @@ where
                                                                 peer_state.monitor_update_blocked_actions.entry(chan_id).or_insert(Vec::new()).push(action);
                                                         }
                                                         if !during_init {
-                                                               let res = handle_new_monitor_update!(self, prev_hop.outpoint, monitor_update, peer_state_lock,
-                                                                       peer_state, per_peer_state, chan_phase_entry);
-                                                               if let Err(e) = res {
-                                                                       // TODO: This is a *critical* error - we probably updated the outbound edge
-                                                                       // of the HTLC's monitor with a preimage. We should retry this monitor
-                                                                       // update over and over again until morale improves.
-                                                                       log_error!(self.logger, "Failed to update channel monitor with preimage {:?}", payment_preimage);
-                                                                       return Err((counterparty_node_id, e));
-                                                               }
+                                                               handle_new_monitor_update!(self, prev_hop.outpoint, monitor_update, peer_state_lock,
+                                                                       peer_state, per_peer_state, chan);
                                                         } else {
                                                                 // If we're running during init we cannot update a monitor directly -
                                                                 // they probably haven't actually been loaded yet. Instead, push the
@@ -5919,47 +5868,42 @@ where
                                 Err(MsgHandleErrInternal::send_err_msg_no_close("Already had channel with the new channel_id".to_owned(), funding_msg.channel_id))
                         },
                         hash_map::Entry::Vacant(e) => {
-                               match self.id_to_peer.lock().unwrap().entry(chan.context.channel_id()) {
+                               let mut id_to_peer_lock = self.id_to_peer.lock().unwrap();
+                               match id_to_peer_lock.entry(chan.context.channel_id()) {
                                         hash_map::Entry::Occupied(_) => {
                                                 return Err(MsgHandleErrInternal::send_err_msg_no_close(
                                                         "The funding_created message had the same funding_txid as an existing channel - funding is not possible".to_owned(),
                                                         funding_msg.channel_id))
                                         },
                                         hash_map::Entry::Vacant(i_e) => {
-                                               i_e.insert(chan.context.get_counterparty_node_id());
-                                       }
-                               }
-
-                               // There's no problem signing a counterparty's funding transaction if our monitor
-                               // hasn't persisted to disk yet - we can't lose money on a transaction that we haven't
-                               // accepted payment from yet. We do, however, need to wait to send our channel_ready
-                               // until we have persisted our monitor.
-                               let new_channel_id = funding_msg.channel_id;
-                               peer_state.pending_msg_events.push(events::MessageSendEvent::SendFundingSigned {
-                                       node_id: counterparty_node_id.clone(),
-                                       msg: funding_msg,
-                               });
+                                               let monitor_res = self.chain_monitor.watch_channel(monitor.get_funding_txo().0, monitor);
+                                               if let Ok(persist_state) = monitor_res {
+                                                       i_e.insert(chan.context.get_counterparty_node_id());
+                                                       mem::drop(id_to_peer_lock);
+
+                                                       // There's no problem signing a counterparty's funding transaction if our monitor
+                                                       // hasn't persisted to disk yet - we can't lose money on a transaction that we haven't
+                                                       // accepted payment from yet. We do, however, need to wait to send our channel_ready
+                                                       // until we have persisted our monitor.
+                                                       peer_state.pending_msg_events.push(events::MessageSendEvent::SendFundingSigned {
+                                                               node_id: counterparty_node_id.clone(),
+                                                               msg: funding_msg,
+                                                       });
  
-                               let monitor_res = self.chain_monitor.watch_channel(monitor.get_funding_txo().0, monitor);
-
-                               if let ChannelPhase::Funded(chan) = e.insert(ChannelPhase::Funded(chan)) {
-                                       let mut res = handle_new_monitor_update!(self, monitor_res, peer_state_lock, peer_state,
-                                               per_peer_state, chan, MANUALLY_REMOVING_INITIAL_MONITOR,
-                                               { peer_state.channel_by_id.remove(&new_channel_id) });
-
-                                       // Note that we reply with the new channel_id in error messages if we gave up on the
-                                       // channel, not the temporary_channel_id. This is compatible with ourselves, but the
-                                       // spec is somewhat ambiguous here. Not a huge deal since we'll send error messages for
-                                       // any messages referencing a previously-closed channel anyway.
-                                       // We do not propagate the monitor update to the user as it would be for a monitor
-                                       // that we didn't manage to store (and that we don't care about - we don't respond
-                                       // with the funding_signed so the channel can never go on chain).
-                                       if let Err(MsgHandleErrInternal { shutdown_finish: Some((res, _)), .. }) = &mut res {
-                                               res.0 = None;
+                                                       if let ChannelPhase::Funded(chan) = e.insert(ChannelPhase::Funded(chan)) {
+                                                               handle_new_monitor_update!(self, persist_state, peer_state_lock, peer_state,
+                                                                       per_peer_state, chan, INITIAL_MONITOR);
+                                                       } else {
+                                                               unreachable!("This must be a funded channel as we just inserted it.");
+                                                       }
+                                                       Ok(())
+                                               } else {
+                                                       log_error!(self.logger, "Persisting initial ChannelMonitor failed, implying the funding outpoint was duplicated");
+                                                       return Err(MsgHandleErrInternal::send_err_msg_no_close(
+                                                               "The funding_created message had the same funding_txid as an existing channel - funding is not possible".to_owned(),
+                                                               funding_msg.channel_id));
+                                               }
                                         }
-                                       res.map(|_| ())
-                               } else {
-                                       unreachable!("This must be a funded channel as we just inserted it.");
                                 }
                         }
                 }
@@ -5982,17 +5926,12 @@ where
                                         ChannelPhase::Funded(ref mut chan) => {
                                                 let monitor = try_chan_phase_entry!(self,
                                                         chan.funding_signed(&msg, best_block, &self.signer_provider, &self.logger), chan_phase_entry);
-                                               let update_res = self.chain_monitor.watch_channel(chan.context.get_funding_txo().unwrap(), monitor);
-                                               let mut res = handle_new_monitor_update!(self, update_res, peer_state_lock, peer_state, per_peer_state, chan_phase_entry, INITIAL_MONITOR);
-                                               if let Err(MsgHandleErrInternal { ref mut shutdown_finish, .. }) = res {
-                                                       // We weren't able to watch the channel to begin with, so no updates should be made on
-                                                       // it. Previously, full_stack_target found an (unreachable) panic when the
-                                                       // monitor update contained within `shutdown_finish` was applied.
-                                                       if let Some((ref mut shutdown_finish, _)) = shutdown_finish {
-                                                               shutdown_finish.0.take();
-                                                       }
+                                               if let Ok(persist_status) = self.chain_monitor.watch_channel(chan.context.get_funding_txo().unwrap(), monitor) {
+                                                       handle_new_monitor_update!(self, persist_status, peer_state_lock, peer_state, per_peer_state, chan, INITIAL_MONITOR);
+                                                       Ok(())
+                                               } else {
+                                                       try_chan_phase_entry!(self, Err(ChannelError::Close("Channel funding outpoint was a duplicate".to_owned())), chan_phase_entry)
                                                 }
-                                               res.map(|_| ())
                                         },
                                         _ => {
                                                 return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel".to_owned(), msg.channel_id));
@@ -6059,7 +5998,7 @@ where
  
         fn internal_shutdown(&self, counterparty_node_id: &PublicKey, msg: &msgs::Shutdown) -> Result<(), MsgHandleErrInternal> {
                 let mut dropped_htlcs: Vec<(HTLCSource, PaymentHash)>;
-               let result: Result<(), _> = loop {
+               {
                         let per_peer_state = self.per_peer_state.read().unwrap();
                         let peer_state_mutex = per_peer_state.get(counterparty_node_id)
                                 .ok_or_else(|| {
@@ -6094,10 +6033,9 @@ where
                                                 }
                                                 // Update the monitor with the shutdown script if necessary.
                                                 if let Some(monitor_update) = monitor_update_opt {
-                                                       break handle_new_monitor_update!(self, funding_txo_opt.unwrap(), monitor_update,
-                                                               peer_state_lock, peer_state, per_peer_state, chan_phase_entry).map(|_| ());
+                                                       handle_new_monitor_update!(self, funding_txo_opt.unwrap(), monitor_update,
+                                                               peer_state_lock, peer_state, per_peer_state, chan);
                                                 }
-                                               break Ok(());
                                         },
                                         ChannelPhase::UnfundedInboundV1(_) | ChannelPhase::UnfundedOutboundV1(_) => {
                                                 let context = phase.context_mut();
@@ -6111,14 +6049,14 @@ where
                         } else {
                                 return Err(MsgHandleErrInternal::send_err_msg_no_close(format!("Got a message for a channel from the wrong node! No such channel for the passed counterparty_node_id {}", counterparty_node_id), msg.channel_id))
                         }
-               };
+               }
                 for htlc_source in dropped_htlcs.drain(..) {
                         let receiver = HTLCDestination::NextHopChannel { node_id: Some(counterparty_node_id.clone()), channel_id: msg.channel_id };
                         let reason = HTLCFailReason::from_failure_code(0x4000 | 8);
                         self.fail_htlc_backwards_internal(&htlc_source.0, &htlc_source.1, &reason, receiver);
                 }
  
-               result
+               Ok(())
         }
  
         fn internal_closing_signed(&self, counterparty_node_id: &PublicKey, msg: &msgs::ClosingSigned) -> Result<(), MsgHandleErrInternal> {
@@ -6348,8 +6286,9 @@ where
                                         let monitor_update_opt = try_chan_phase_entry!(self, chan.commitment_signed(&msg, &self.logger), chan_phase_entry);
                                         if let Some(monitor_update) = monitor_update_opt {
                                                 handle_new_monitor_update!(self, funding_txo.unwrap(), monitor_update, peer_state_lock,
-                                                       peer_state, per_peer_state, chan_phase_entry).map(|_| ())
-                                       } else { Ok(()) }
+                                                       peer_state, per_peer_state, chan);
+                                       }
+                                       Ok(())
                                 } else {
                                         return try_chan_phase_entry!(self, Err(ChannelError::Close(
                                                 "Got a commitment_signed message for an unfunded channel!".into())), chan_phase_entry);
@@ -6498,7 +6437,7 @@ where
         }
  
         fn internal_revoke_and_ack(&self, counterparty_node_id: &PublicKey, msg: &msgs::RevokeAndACK) -> Result<(), MsgHandleErrInternal> {
-               let (htlcs_to_fail, res) = {
+               let htlcs_to_fail = {
                         let per_peer_state = self.per_peer_state.read().unwrap();
                         let mut peer_state_lock = per_peer_state.get(counterparty_node_id)
                                 .ok_or_else(|| {
@@ -6517,13 +6456,13 @@ where
                                                 } else { false };
                                                 let (htlcs_to_fail, monitor_update_opt) = try_chan_phase_entry!(self,
                                                         chan.revoke_and_ack(&msg, &self.fee_estimator, &self.logger, mon_update_blocked), chan_phase_entry);
-                                               let res = if let Some(monitor_update) = monitor_update_opt {
+                                               if let Some(monitor_update) = monitor_update_opt {
                                                         let funding_txo = funding_txo_opt
                                                                 .expect("Funding outpoint must have been set for RAA handling to succeed");
                                                         handle_new_monitor_update!(self, funding_txo, monitor_update,
-                                                               peer_state_lock, peer_state, per_peer_state, chan_phase_entry).map(|_| ())
-                                               } else { Ok(()) };
-                                               (htlcs_to_fail, res)
+                                                               peer_state_lock, peer_state, per_peer_state, chan);
+                                               }
+                                               htlcs_to_fail
                                         } else {
                                                 return try_chan_phase_entry!(self, Err(ChannelError::Close(
                                                         "Got a revoke_and_ack message for an unfunded channel!".into())), chan_phase_entry);
@@ -6533,7 +6472,7 @@ where
                         }
                 };
                 self.fail_holding_cell_htlcs(htlcs_to_fail, msg.channel_id, counterparty_node_id);
-               res
+               Ok(())
         }
  
         fn internal_update_fee(&self, counterparty_node_id: &PublicKey, msg: &msgs::UpdateFee) -> Result<(), MsgHandleErrInternal> {
@@ -6729,8 +6668,7 @@ where
                                                         self.fail_htlc_backwards_internal(&htlc_update.source, &htlc_update.payment_hash, &reason, receiver);
                                                 }
                                         },
-                                       MonitorEvent::CommitmentTxConfirmed(funding_outpoint) |
-                                       MonitorEvent::UpdateFailed(funding_outpoint) => {
+                                       MonitorEvent::HolderForceClosed(funding_outpoint) => {
                                                 let counterparty_node_id_opt = match counterparty_node_id {
                                                         Some(cp_id) => Some(cp_id),
                                                         None => {
@@ -6754,12 +6692,7 @@ where
                                                                                                 msg: update
                                                                                         });
                                                                                 }
-                                                                               let reason = if let MonitorEvent::UpdateFailed(_) = monitor_event {
-                                                                                       ClosureReason::ProcessingError { err: "Failed to persist ChannelMonitor update during chain sync".to_string() }
-                                                                               } else {
-                                                                                       ClosureReason::CommitmentTxConfirmed
-                                                                               };
-                                                                               self.issue_channel_close_events(&chan.context, reason);
+                                                                               self.issue_channel_close_events(&chan.context, ClosureReason::HolderForceClosed);
                                                                                 pending_msg_events.push(events::MessageSendEvent::HandleError {
                                                                                         node_id: chan.context.get_counterparty_node_id(),
                                                                                         action: msgs::ErrorAction::SendErrorMessage {
@@ -6800,7 +6733,6 @@ where
         fn check_free_holding_cells(&self) -> bool {
                 let mut has_monitor_update = false;
                 let mut failed_htlcs = Vec::new();
-               let mut handle_errors = Vec::new();
  
                 // Walk our list of channels and find any that need to update. Note that when we do find an
                 // update, if it includes actions that must be taken afterwards, we have to drop the
@@ -6825,13 +6757,8 @@ where
                                                 if let Some(monitor_update) = monitor_opt {
                                                         has_monitor_update = true;
  
-                                                       let channel_id: ChannelId = *channel_id;
-                                                       let res = handle_new_monitor_update!(self, funding_txo.unwrap(), monitor_update,
-                                                               peer_state_lock, peer_state, per_peer_state, chan, MANUALLY_REMOVING,
-                                                               peer_state.channel_by_id.remove(&channel_id));
-                                                       if res.is_err() {
-                                                               handle_errors.push((counterparty_node_id, res));
-                                                       }
+                                                       handle_new_monitor_update!(self, funding_txo.unwrap(), monitor_update,
+                                                               peer_state_lock, peer_state, per_peer_state, chan);
                                                         continue 'peer_loop;
                                                 }
                                         }
@@ -6841,15 +6768,11 @@ where
                         break 'peer_loop;
                 }
  
-               let has_update = has_monitor_update || !failed_htlcs.is_empty() || !handle_errors.is_empty();
+               let has_update = has_monitor_update || !failed_htlcs.is_empty();
                 for (failures, channel_id, counterparty_node_id) in failed_htlcs.drain(..) {
                         self.fail_holding_cell_htlcs(failures, channel_id, &counterparty_node_id);
                 }
  
-               for (counterparty_node_id, err) in handle_errors.drain(..) {
-                       let _ = handle_error!(self, err, counterparty_node_id);
-               }
-
                 has_update
         }
  
@@ -7144,7 +7067,6 @@ where
         /// operation. It will double-check that nothing *else* is also blocking the same channel from
         /// making progress and then let any blocked [`ChannelMonitorUpdate`]s fly.
         fn handle_monitor_update_release(&self, counterparty_node_id: PublicKey, channel_funding_outpoint: OutPoint, mut completed_blocker: Option<RAAMonitorUpdateBlockingAction>) {
-               let mut errors = Vec::new();
                 loop {
                         let per_peer_state = self.per_peer_state.read().unwrap();
                         if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
@@ -7176,11 +7098,8 @@ where
                                                 if let Some((monitor_update, further_update_exists)) = chan.unblock_next_blocked_monitor_update() {
                                                         log_debug!(self.logger, "Unlocking monitor updating for channel {} and updating monitor",
                                                                 channel_funding_outpoint.to_channel_id());
-                                                       if let Err(e) = handle_new_monitor_update!(self, channel_funding_outpoint, monitor_update,
-                                                               peer_state_lck, peer_state, per_peer_state, chan_phase_entry)
-                                                       {
-                                                               errors.push((e, counterparty_node_id));
-                                                       }
+                                                       handle_new_monitor_update!(self, channel_funding_outpoint, monitor_update,
+                                                               peer_state_lck, peer_state, per_peer_state, chan);
                                                         if further_update_exists {
                                                                 // If there are more `ChannelMonitorUpdate`s to process, restart at the
                                                                 // top of the loop.
@@ -7199,10 +7118,6 @@ where
                         }
                         break;
                 }
-               for (err, counterparty_node_id) in errors {
-                       let res = Err::<(), _>(err);
-                       let _ = handle_error!(self, res, counterparty_node_id);
-               }
         }
  
         fn handle_post_event_actions(&self, actions: Vec<EventCompletionAction>) {
diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs

index f6684485dba0d69964686b145b9fd3b383c854cc..5a1a21e2999863b9f6af7b75da6c467b4228e7f3 100644 (file)
--- a/lightning/src/ln/functional_test_utils.rs
+++ b/lightning/src/ln/functional_test_utils.rs
@@ -422,6 +422,10 @@ pub struct Node<'chan_man, 'node_cfg: 'chan_man, 'chan_mon_cfg: 'node_cfg> {
                 &'chan_mon_cfg test_utils::TestLogger,
         >,
  }
+#[cfg(feature = "std")]
+impl<'a, 'b, 'c> std::panic::UnwindSafe for Node<'a, 'b, 'c> {}
+#[cfg(feature = "std")]
+impl<'a, 'b, 'c> std::panic::RefUnwindSafe for Node<'a, 'b, 'c> {}
  impl<'a, 'b, 'c> Node<'a, 'b, 'c> {
         pub fn best_block_hash(&self) -> BlockHash {
                 self.blocks.lock().unwrap().last().unwrap().0.block_hash()
@@ -578,7 +582,7 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> {
                         let chain_source = test_utils::TestChainSource::new(Network::Testnet);
                         let chain_monitor = test_utils::TestChainMonitor::new(Some(&chain_source), &broadcaster, &self.logger, &feeest, &persister, &self.keys_manager);
                         for deserialized_monitor in deserialized_monitors.drain(..) {
-                               if chain_monitor.watch_channel(deserialized_monitor.get_funding_txo().0, deserialized_monitor) != ChannelMonitorUpdateStatus::Completed {
+                               if chain_monitor.watch_channel(deserialized_monitor.get_funding_txo().0, deserialized_monitor) != Ok(ChannelMonitorUpdateStatus::Completed) {
                                         panic!();
                                 }
                         }
@@ -977,7 +981,7 @@ pub fn _reload_node<'a, 'b, 'c>(node: &'a Node<'a, 'b, 'c>, default_config: User
  
         for monitor in monitors_read.drain(..) {
                 assert_eq!(node.chain_monitor.watch_channel(monitor.get_funding_txo().0, monitor),
-                       ChannelMonitorUpdateStatus::Completed);
+                       Ok(ChannelMonitorUpdateStatus::Completed));
                 check_added_monitors!(node, 1);
         }
  
diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs

index b252a352c48df981c2a2d102ba188c9943942143..fa766f0ab552abf624d8a25cc2903fcf9e6952ff 100644 (file)
--- a/lightning/src/ln/functional_tests.rs
+++ b/lightning/src/ln/functional_tests.rs
@@ -2408,6 +2408,7 @@ fn channel_monitor_network_test() {
                 }
                 check_added_monitors!(nodes[4], 1);
                 test_txn_broadcast(&nodes[4], &chan_4, None, HTLCType::SUCCESS);
+               check_closed_event!(nodes[4], 1, ClosureReason::HolderForceClosed, [nodes[3].node.get_our_node_id()], 100000);
  
                 mine_transaction(&nodes[4], &node_txn[0]);
                 check_preimage_claim(&nodes[4], &node_txn);
@@ -2419,9 +2420,8 @@ fn channel_monitor_network_test() {
         assert_eq!(nodes[4].node.list_channels().len(), 0);
  
         assert_eq!(nodes[3].chain_monitor.chain_monitor.watch_channel(OutPoint { txid: chan_3.3.txid(), index: 0 }, chan_3_mon),
-               ChannelMonitorUpdateStatus::Completed);
-       check_closed_event!(nodes[3], 1, ClosureReason::CommitmentTxConfirmed, [nodes[4].node.get_our_node_id()], 100000);
-       check_closed_event!(nodes[4], 1, ClosureReason::CommitmentTxConfirmed, [nodes[3].node.get_our_node_id()], 100000);
+               Ok(ChannelMonitorUpdateStatus::Completed));
+       check_closed_event!(nodes[3], 1, ClosureReason::HolderForceClosed, [nodes[4].node.get_our_node_id()], 100000);
  }
  
  #[test]
@@ -5660,7 +5660,7 @@ fn do_htlc_claim_local_commitment_only(use_dust: bool) {
         test_txn_broadcast(&nodes[1], &chan, None, if use_dust { HTLCType::NONE } else { HTLCType::SUCCESS });
         check_closed_broadcast!(nodes[1], true);
         check_added_monitors!(nodes[1], 1);
-       check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 100000);
+       check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed, [nodes[0].node.get_our_node_id()], 100000);
  }
  
  fn do_htlc_claim_current_remote_commitment_only(use_dust: bool) {
@@ -5691,7 +5691,7 @@ fn do_htlc_claim_current_remote_commitment_only(use_dust: bool) {
         test_txn_broadcast(&nodes[0], &chan, None, HTLCType::NONE);
         check_closed_broadcast!(nodes[0], true);
         check_added_monitors!(nodes[0], 1);
-       check_closed_event!(nodes[0], 1, ClosureReason::CommitmentTxConfirmed, [nodes[1].node.get_our_node_id()], 100000);
+       check_closed_event!(nodes[0], 1, ClosureReason::HolderForceClosed, [nodes[1].node.get_our_node_id()], 100000);
  }
  
  fn do_htlc_claim_previous_remote_commitment_only(use_dust: bool, check_revoke_no_close: bool) {
@@ -5737,7 +5737,7 @@ fn do_htlc_claim_previous_remote_commitment_only(use_dust: bool, check_revoke_no
                 test_txn_broadcast(&nodes[0], &chan, None, HTLCType::NONE);
                 check_closed_broadcast!(nodes[0], true);
                 check_added_monitors!(nodes[0], 1);
-               check_closed_event!(nodes[0], 1, ClosureReason::CommitmentTxConfirmed, [nodes[1].node.get_our_node_id()], 100000);
+               check_closed_event!(nodes[0], 1, ClosureReason::HolderForceClosed, [nodes[1].node.get_our_node_id()], 100000);
         } else {
                 expect_payment_failed!(nodes[0], our_payment_hash, true);
         }
@@ -8453,7 +8453,7 @@ fn test_update_err_monitor_lockdown() {
                         new_monitor
                 };
                 let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
-               assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
+               assert_eq!(watchtower.watch_channel(outpoint, new_monitor), Ok(ChannelMonitorUpdateStatus::Completed));
                 watchtower
         };
         let block = create_dummy_block(BlockHash::all_zeros(), 42, Vec::new());
@@ -8475,7 +8475,7 @@ fn test_update_err_monitor_lockdown() {
                 let mut node_0_peer_state_lock;
                 if let ChannelPhase::Funded(ref mut channel) = get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1.2) {
                         if let Ok(Some(update)) = channel.commitment_signed(&updates.commitment_signed, &node_cfgs[0].logger) {
-                               assert_eq!(watchtower.chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::PermanentFailure);
+                               assert_eq!(watchtower.chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::InProgress);
                                 assert_eq!(nodes[0].chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::Completed);
                         } else { assert!(false); }
                 } else {
@@ -8526,7 +8526,7 @@ fn test_concurrent_monitor_claim() {
                         new_monitor
                 };
                 let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &alice_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
-               assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
+               assert_eq!(watchtower.watch_channel(outpoint, new_monitor), Ok(ChannelMonitorUpdateStatus::Completed));
                 watchtower
         };
         let block = create_dummy_block(BlockHash::all_zeros(), 42, Vec::new());
@@ -8557,7 +8557,7 @@ fn test_concurrent_monitor_claim() {
                         new_monitor
                 };
                 let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &bob_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
-               assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
+               assert_eq!(watchtower.watch_channel(outpoint, new_monitor), Ok(ChannelMonitorUpdateStatus::Completed));
                 watchtower
         };
         watchtower_bob.chain_monitor.block_connected(&create_dummy_block(BlockHash::all_zeros(), 42, Vec::new()), HTLC_TIMEOUT_BROADCAST - 1);
@@ -8577,7 +8577,7 @@ fn test_concurrent_monitor_claim() {
                 if let ChannelPhase::Funded(ref mut channel) = get_channel_ref!(nodes[0], nodes[1], node_0_per_peer_lock, node_0_peer_state_lock, chan_1.2) {
                         if let Ok(Some(update)) = channel.commitment_signed(&updates.commitment_signed, &node_cfgs[0].logger) {
                                 // Watchtower Alice should already have seen the block and reject the update
-                               assert_eq!(watchtower_alice.chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::PermanentFailure);
+                               assert_eq!(watchtower_alice.chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::InProgress);
                                 assert_eq!(watchtower_bob.chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::Completed);
                                 assert_eq!(nodes[0].chain_monitor.update_channel(outpoint, &update), ChannelMonitorUpdateStatus::Completed);
                         } else { assert!(false); }
@@ -8603,7 +8603,7 @@ fn test_concurrent_monitor_claim() {
         let height = HTLC_TIMEOUT_BROADCAST + 1;
         connect_blocks(&nodes[0], height - nodes[0].best_block_info().1);
         check_closed_broadcast(&nodes[0], 1, true);
-       check_closed_event!(&nodes[0], 1, ClosureReason::CommitmentTxConfirmed, false,
+       check_closed_event!(&nodes[0], 1, ClosureReason::HolderForceClosed, false,
                 [nodes[1].node.get_our_node_id()], 100000);
         watchtower_alice.chain_monitor.block_connected(&create_dummy_block(BlockHash::all_zeros(), 42, vec![bob_state_y.clone()]), height);
         check_added_monitors(&nodes[0], 1);
diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs

index ece24794f63f593ecba133660fa73a466fc547a7..cb78cda714f83d4edec2b64f44364fc88f55864d 100644 (file)
--- a/lightning/src/ln/monitor_tests.rs
+++ b/lightning/src/ln/monitor_tests.rs
@@ -1046,14 +1046,14 @@ fn do_test_revoked_counterparty_commitment_balances(confirm_htlc_spend_first: bo
         assert!(failed_payments.is_empty());
         if let Event::PendingHTLCsForwardable { .. } = events[0] {} else { panic!(); }
         match &events[1] {
-               Event::ChannelClosed { reason: ClosureReason::CommitmentTxConfirmed, .. } => {},
+               Event::ChannelClosed { reason: ClosureReason::HolderForceClosed, .. } => {},
                 _ => panic!(),
         }
  
         connect_blocks(&nodes[1], htlc_cltv_timeout + 1 - 10);
         check_closed_broadcast!(nodes[1], true);
         check_added_monitors!(nodes[1], 1);
-       check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
+       check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed, [nodes[0].node.get_our_node_id()], 1000000);
  
         // Prior to channel closure, B considers the preimage HTLC as its own, and otherwise only
         // lists the two on-chain timeout-able HTLCs as claimable balances.
diff --git a/lightning/src/ln/payment_tests.rs b/lightning/src/ln/payment_tests.rs

index 3def4e3629bf2fb98d227d9a95e5b1faf6493bc9..e5af136656f8ab8328e47f796bb6e8c29139f5ac 100644 (file)
--- a/lightning/src/ln/payment_tests.rs
+++ b/lightning/src/ln/payment_tests.rs
@@ -2258,12 +2258,14 @@ fn auto_retry_partial_failure() {
         let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
         let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
  
+       // Open three channels, the first has plenty of liquidity, the second and third have ~no
+       // available liquidity, causing any outbound payments routed over it to fail immediately.
         let chan_1_id = create_announced_chan_between_nodes(&nodes, 0, 1).0.contents.short_channel_id;
-       let chan_2_id = create_announced_chan_between_nodes(&nodes, 0, 1).0.contents.short_channel_id;
-       let chan_3_id = create_announced_chan_between_nodes(&nodes, 0, 1).0.contents.short_channel_id;
+       let chan_2_id = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 989_000_000).0.contents.short_channel_id;
+       let chan_3_id = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 989_000_000).0.contents.short_channel_id;
  
         // Marshall data to send the payment
-       let amt_msat = 20_000;
+       let amt_msat = 10_000_000;
         let (_, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(&nodes[0], nodes[1], amt_msat);
         #[cfg(feature = "std")]
         let payment_expiry_secs = SystemTime::UNIX_EPOCH.elapsed().unwrap().as_secs() + 60 * 60;
@@ -2278,16 +2280,6 @@ fn auto_retry_partial_failure() {
                 .with_bolt11_features(invoice_features).unwrap();
         let route_params = RouteParameters::from_payment_params_and_value(payment_params, amt_msat);
  
-       // Ensure the first monitor update (for the initial send path1 over chan_1) succeeds, but the
-       // second (for the initial send path2 over chan_2) fails.
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-       // Ensure third monitor update (for the retry1's path1 over chan_1) succeeds, but the fourth (for
-       // the retry1's path2 over chan_3) fails, and monitor updates succeed after that.
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
-
         // Configure the initial send, retry1 and retry2's paths.
         let send_route = Route {
                 paths: vec![
@@ -2364,32 +2356,23 @@ fn auto_retry_partial_failure() {
         // Send a payment that will partially fail on send, then partially fail on retry, then succeed.
         nodes[0].node.send_payment(payment_hash, RecipientOnionFields::secret_only(payment_secret),
                 PaymentId(payment_hash.0), route_params, Retry::Attempts(3)).unwrap();
-       let closed_chan_events = nodes[0].node.get_and_clear_pending_events();
-       assert_eq!(closed_chan_events.len(), 4);
-       match closed_chan_events[0] {
-               Event::ChannelClosed { .. } => {},
-               _ => panic!("Unexpected event"),
-       }
-       match closed_chan_events[1] {
+       let payment_failed_events = nodes[0].node.get_and_clear_pending_events();
+       assert_eq!(payment_failed_events.len(), 2);
+       match payment_failed_events[0] {
                 Event::PaymentPathFailed { .. } => {},
                 _ => panic!("Unexpected event"),
         }
-       match closed_chan_events[2] {
-               Event::ChannelClosed { .. } => {},
-               _ => panic!("Unexpected event"),
-       }
-       match closed_chan_events[3] {
+       match payment_failed_events[1] {
                 Event::PaymentPathFailed { .. } => {},
                 _ => panic!("Unexpected event"),
         }
  
         // Pass the first part of the payment along the path.
-       check_added_monitors!(nodes[0], 5); // three outbound channel updates succeeded, two permanently failed
+       check_added_monitors!(nodes[0], 1); // only one HTLC actually made it out
         let mut msg_events = nodes[0].node.get_and_clear_pending_msg_events();
  
-       // First message is the first update_add, remaining messages are broadcasting channel updates and
-       // errors for the permfailed channels
-       assert_eq!(msg_events.len(), 5);
+       // Only one HTLC/channel update actually made it out
+       assert_eq!(msg_events.len(), 1);
         let mut payment_event = SendEvent::from_event(msg_events.remove(0));
  
         nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &payment_event.msgs[0]);
@@ -2478,12 +2461,13 @@ fn auto_retry_zero_attempts_send_error() {
         let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
         let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
  
-       create_announced_chan_between_nodes(&nodes, 0, 1).0.contents.short_channel_id;
-       create_announced_chan_between_nodes(&nodes, 0, 1).0.contents.short_channel_id;
+       // Open a single channel that does not have sufficient liquidity for the payment we want to
+       // send.
+       let chan_id  = create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 1_000_000, 989_000_000).0.contents.short_channel_id;
  
         // Marshall data to send the payment
-       let amt_msat = 20_000;
-       let (_, payment_hash, _, payment_secret) = get_route_and_payment_hash!(&nodes[0], nodes[1], amt_msat);
+       let amt_msat = 10_000_000;
+       let (_, payment_hash, payment_secret) = get_payment_preimage_hash(&nodes[1], Some(amt_msat), None);
         #[cfg(feature = "std")]
         let payment_expiry_secs = SystemTime::UNIX_EPOCH.elapsed().unwrap().as_secs() + 60 * 60;
         #[cfg(not(feature = "std"))]
@@ -2497,16 +2481,31 @@ fn auto_retry_zero_attempts_send_error() {
                 .with_bolt11_features(invoice_features).unwrap();
         let route_params = RouteParameters::from_payment_params_and_value(payment_params, amt_msat);
  
-       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::PermanentFailure);
+       // Override the route search to return a route, rather than failing at the route-finding step.
+       let send_route = Route {
+               paths: vec![
+                       Path { hops: vec![RouteHop {
+                               pubkey: nodes[1].node.get_our_node_id(),
+                               node_features: nodes[1].node.node_features(),
+                               short_channel_id: chan_id,
+                               channel_features: nodes[1].node.channel_features(),
+                               fee_msat: amt_msat,
+                               cltv_expiry_delta: 100,
+                               maybe_announced_channel: true,
+                       }], blinded_tail: None },
+               ],
+               route_params: Some(route_params.clone()),
+       };
+       nodes[0].router.expect_find_route(route_params.clone(), Ok(send_route));
+
         nodes[0].node.send_payment(payment_hash, RecipientOnionFields::secret_only(payment_secret),
                 PaymentId(payment_hash.0), route_params, Retry::Attempts(0)).unwrap();
-       assert_eq!(nodes[0].node.get_and_clear_pending_msg_events().len(), 2); // channel close messages
+       assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
         let events = nodes[0].node.get_and_clear_pending_events();
-       assert_eq!(events.len(), 3);
-       if let Event::ChannelClosed { .. } = events[0] { } else { panic!(); }
-       if let Event::PaymentPathFailed { .. } = events[1] { } else { panic!(); }
-       if let Event::PaymentFailed { .. } = events[2] { } else { panic!(); }
-       check_added_monitors!(nodes[0], 2);
+       assert_eq!(events.len(), 2);
+       if let Event::PaymentPathFailed { .. } = events[0] { } else { panic!(); }
+       if let Event::PaymentFailed { .. } = events[1] { } else { panic!(); }
+       check_added_monitors!(nodes[0], 0);
  }
  
  #[test]
diff --git a/lightning/src/ln/reload_tests.rs b/lightning/src/ln/reload_tests.rs

index c3a428ae0147d85ceb8ddf2bef58cfc75a9ff34c..fa08fba99fabda640739f714dd9eb6818d2ab374 100644 (file)
--- a/lightning/src/ln/reload_tests.rs
+++ b/lightning/src/ln/reload_tests.rs
@@ -448,7 +448,7 @@ fn test_manager_serialize_deserialize_inconsistent_monitor() {
  
         for monitor in node_0_monitors.drain(..) {
                 assert_eq!(nodes[0].chain_monitor.watch_channel(monitor.get_funding_txo().0, monitor),
-                       ChannelMonitorUpdateStatus::Completed);
+                       Ok(ChannelMonitorUpdateStatus::Completed));
                 check_added_monitors!(nodes[0], 1);
         }
         nodes[0].node = &nodes_0_deserialized;
diff --git a/lightning/src/ln/reorg_tests.rs b/lightning/src/ln/reorg_tests.rs

index 7c57b553068ada64d561f805a53013569254d68b..cb3471763d4b7e2075761ed466b718cd64d6797d 100644 (file)
--- a/lightning/src/ln/reorg_tests.rs
+++ b/lightning/src/ln/reorg_tests.rs
@@ -462,7 +462,7 @@ fn test_set_outpoints_partial_claiming() {
         // Connect blocks on node B
         connect_blocks(&nodes[1], TEST_FINAL_CLTV + LATENCY_GRACE_PERIOD_BLOCKS + 1);
         check_closed_broadcast!(nodes[1], true);
-       check_closed_event!(nodes[1], 1, ClosureReason::CommitmentTxConfirmed, [nodes[0].node.get_our_node_id()], 1000000);
+       check_closed_event!(nodes[1], 1, ClosureReason::HolderForceClosed, [nodes[0].node.get_our_node_id()], 1000000);
         check_added_monitors!(nodes[1], 1);
         // Verify node B broadcast 2 HTLC-timeout txn
         let partial_claim_tx = {
diff --git a/lightning/src/util/persist.rs b/lightning/src/util/persist.rs

index 372a094a931bed6dcca159e4bab310cdd7863a4b..431c62c9fb83de88830319e6b87b2f7370ac7f90 100644 (file)
--- a/lightning/src/util/persist.rs
+++ b/lightning/src/util/persist.rs
@@ -174,8 +174,8 @@ impl<'a, A: KVStore, M: Deref, T: Deref, ES: Deref, NS: Deref, SP: Deref, F: Der
  impl<ChannelSigner: WriteableEcdsaChannelSigner, K: KVStore> Persist<ChannelSigner> for K {
         // TODO: We really need a way for the persister to inform the user that its time to crash/shut
         // down once these start returning failure.
-       // A PermanentFailure implies we should probably just shut down the node since we're
-       // force-closing channels without even broadcasting!
+       // Then we should return InProgress rather than UnrecoverableError, implying we should probably
+       // just shut down the node since we're not retrying persistence!
  
         fn persist_new_channel(&self, funding_txo: OutPoint, monitor: &ChannelMonitor<ChannelSigner>, _update_id: MonitorUpdateId) -> chain::ChannelMonitorUpdateStatus {
                 let key = format!("{}_{}", funding_txo.txid.to_hex(), funding_txo.index);
@@ -185,7 +185,7 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, K: KVStore> Persist<ChannelSign
                         &key, &monitor.encode())
                 {
                         Ok(()) => chain::ChannelMonitorUpdateStatus::Completed,
-                       Err(_) => chain::ChannelMonitorUpdateStatus::PermanentFailure,
+                       Err(_) => chain::ChannelMonitorUpdateStatus::UnrecoverableError
                 }
         }
  
@@ -197,7 +197,7 @@ impl<ChannelSigner: WriteableEcdsaChannelSigner, K: KVStore> Persist<ChannelSign
                         &key, &monitor.encode())
                 {
                         Ok(()) => chain::ChannelMonitorUpdateStatus::Completed,
-                       Err(_) => chain::ChannelMonitorUpdateStatus::PermanentFailure,
+                       Err(_) => chain::ChannelMonitorUpdateStatus::UnrecoverableError
                 }
         }
  }
diff --git a/lightning/src/util/test_utils.rs b/lightning/src/util/test_utils.rs

index 785b40befd472ba10b30fb618bd75eb9ab20c15a..d53cd39b119fac08d8ebb151204d9c4e53a2a4a3 100644 (file)
--- a/lightning/src/util/test_utils.rs
+++ b/lightning/src/util/test_utils.rs
@@ -225,7 +225,7 @@ impl<'a> TestChainMonitor<'a> {
         }
  }
  impl<'a> chain::Watch<TestChannelSigner> for TestChainMonitor<'a> {
-       fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> chain::ChannelMonitorUpdateStatus {
+       fn watch_channel(&self, funding_txo: OutPoint, monitor: channelmonitor::ChannelMonitor<TestChannelSigner>) -> Result<chain::ChannelMonitorUpdateStatus, ()> {
                 // At every point where we get a monitor update, we should be able to send a useful monitor
                 // to a watchtower and disk...
                 let mut w = TestVecWriter(Vec::new());
author	Matt Corallo <649246+TheBlueMatt@users.noreply.github.com>
	Thu, 21 Sep 2023 20:22:16 +0000 (20:22 +0000)
committer	GitHub <noreply@github.com>
	Thu, 21 Sep 2023 20:22:16 +0000 (20:22 +0000)
fuzz/src/chanmon_consistency.rs		patch \| blob \| history
lightning-persister/src/fs_store.rs		patch \| blob \| history
lightning/src/chain/chainmonitor.rs		patch \| blob \| history
lightning/src/chain/channelmonitor.rs		patch \| blob \| history
lightning/src/chain/mod.rs		patch \| blob \| history
lightning/src/ln/chanmon_update_fail_tests.rs		patch \| blob \| history
lightning/src/ln/channelmanager.rs		patch \| blob \| history
lightning/src/ln/functional_test_utils.rs		patch \| blob \| history
lightning/src/ln/functional_tests.rs		patch \| blob \| history
lightning/src/ln/monitor_tests.rs		patch \| blob \| history
lightning/src/ln/payment_tests.rs		patch \| blob \| history
lightning/src/ln/reload_tests.rs		patch \| blob \| history
lightning/src/ln/reorg_tests.rs		patch \| blob \| history
lightning/src/util/persist.rs		patch \| blob \| history
lightning/src/util/test_utils.rs		patch \| blob \| history