Do not broadcast commitment txn on Permanent mon update failure

author Matt Corallo <git@bluematt.me>

Tue, 5 Oct 2021 04:32:49 +0000 (04:32 +0000)

committer Matt Corallo <git@bluematt.me>

Thu, 15 Sep 2022 18:18:06 +0000 (18:18 +0000)
author Matt Corallo <git@bluematt.me>
Tue, 5 Oct 2021 04:32:49 +0000 (04:32 +0000)
committer Matt Corallo <git@bluematt.me>
Thu, 15 Sep 2022 18:18:06 +0000 (18:18 +0000)
diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs

index 748adbd035983b574bccd4836e2f6aecd48a9323..69cdf824a61db429725ba7808aa7820712fbb68d 100644 (file)
--- a/lightning/src/chain/channelmonitor.rs
+++ b/lightning/src/chain/channelmonitor.rs
@@ -76,12 +76,14 @@ pub struct ChannelMonitorUpdate {
         /// increasing and increase by one for each new update, with one exception specified below.
         ///
         /// This sequence number is also used to track up to which points updates which returned
-       /// ChannelMonitorUpdateErr::TemporaryFailure have been applied to all copies of a given
+       /// [`ChannelMonitorUpdateErr::TemporaryFailure`] have been applied to all copies of a given
         /// ChannelMonitor when ChannelManager::channel_monitor_updated is called.
         ///
         /// The only instance where update_id values are not strictly increasing is the case where we
         /// allow post-force-close updates with a special update ID of [`CLOSED_CHANNEL_UPDATE_ID`]. See
         /// its docs for more details.
+       ///
+       /// [`ChannelMonitorUpdateErr::TemporaryFailure`]: super::ChannelMonitorUpdateErr::TemporaryFailure
         pub update_id: u64,
  }
  
@@ -1314,14 +1316,20 @@ impl<Signer: Sign> ChannelMonitor<Signer> {
         }
  
         /// Used by ChannelManager deserialization to broadcast the latest holder state if its copy of
-       /// the Channel was out-of-date. You may use it to get a broadcastable holder toxic tx in case of
-       /// fallen-behind, i.e when receiving a channel_reestablish with a proof that our counterparty side knows
-       /// a higher revocation secret than the holder commitment number we are aware of. Broadcasting these
-       /// transactions are UNSAFE, as they allow counterparty side to punish you. Nevertheless you may want to
-       /// broadcast them if counterparty don't close channel with his higher commitment transaction after a
-       /// substantial amount of time (a month or even a year) to get back funds. Best may be to contact
-       /// out-of-band the other node operator to coordinate with him if option is available to you.
-       /// In any-case, choice is up to the user.
+       /// the Channel was out-of-date.
+       ///
+       /// You may also use this to broadcast the latest local commitment transaction, either because
+       /// a monitor update failed with [`ChannelMonitorUpdateErr::PermanentFailure`] or because we've
+       /// fallen behind (i.e we've received proof that our counterparty side knows a revocation
+       /// secret we gave them that they shouldn't know).
+       ///
+       /// Broadcasting these transactions in the second case is UNSAFE, as they allow counterparty
+       /// side to punish you. Nevertheless you may want to broadcast them if counterparty doesn't
+       /// close channel with their commitment transaction after a substantial amount of time. Best
+       /// may be to contact the other node operator out-of-band to coordinate other options available
+       /// to you. In any-case, the choice is up to you.
+       ///
+       /// [`ChannelMonitorUpdateErr::PermanentFailure`]: super::ChannelMonitorUpdateErr::PermanentFailure
         pub fn get_latest_holder_commitment_txn<L: Deref>(&self, logger: &L) -> Vec<Transaction>
         where L::Target: Logger {
                 self.inner.lock().unwrap().get_latest_holder_commitment_txn(logger)
@@ -2248,7 +2256,9 @@ impl<Signer: Sign> ChannelMonitorImpl<Signer> {
                                         if *should_broadcast {
                                                 self.broadcast_latest_holder_commitment_txn(broadcaster, logger);
                                         } else if !self.holder_tx_signed {
-                                               log_error!(logger, "You have a toxic holder commitment transaction avaible in channel monitor, read comment in ChannelMonitor::get_latest_holder_commitment_txn to be informed of manual action to take");
+                                               log_error!(logger, "WARNING: You have a potentially-unsafe holder commitment transaction available to broadcast");
+                                               log_error!(logger, "    in channel monitor for channel {}!", log_bytes!(self.funding_info.0.to_channel_id()));
+                                               log_error!(logger, "    Read the docs for ChannelMonitor::get_latest_holder_commitment_txn and take manual action!");
                                         } else {
                                                 // If we generated a MonitorEvent::CommitmentTxConfirmed, the ChannelManager
                                                 // will still give us a ChannelForceClosed event with !should_broadcast, but we
diff --git a/lightning/src/chain/mod.rs b/lightning/src/chain/mod.rs

index f0544679817db3434a679662901240b99d68da54..66491d7282e4556f8121c88b07c493a54d9e0941 100644 (file)
--- a/lightning/src/chain/mod.rs
+++ b/lightning/src/chain/mod.rs
@@ -194,61 +194,67 @@ pub enum ChannelMonitorUpdateErr {
         /// our state failed, but is expected to succeed at some point in the future).
         ///
         /// Such a failure will "freeze" a channel, preventing us from revoking old states or
-       /// submitting new commitment transactions to the counterparty. Once the update(s) that failed
-       /// have been successfully applied, a [`MonitorEvent::UpdateCompleted`] event should be returned
-       /// via [`Watch::release_pending_monitor_events`] which will then restore the channel to an
-       /// operational state.
+       /// submitting new commitment transactions to the counterparty. Once the update(s) which failed
+       /// have been successfully applied, a [`MonitorEvent::UpdateCompleted`] can be used to restore
+       /// the channel to an operational state.
         ///
-       /// Note that a given ChannelManager will *never* re-generate a given ChannelMonitorUpdate. If
-       /// you return a TemporaryFailure you must ensure that it is written to disk safely before
-       /// writing out the latest ChannelManager state.
+       /// Note that a given [`ChannelManager`] will *never* re-generate a [`ChannelMonitorUpdate`].
+       /// If you return this error you must ensure that it is written to disk safely before writing
+       /// the latest [`ChannelManager`] state, or you should return [`PermanentFailure`] instead.
         ///
-       /// Even when a channel has been "frozen" updates to the ChannelMonitor can continue to occur
-       /// (eg if an inbound HTLC which we forwarded was claimed upstream resulting in us attempting
-       /// to claim it on this channel) and those updates must be applied wherever they can be. At
-       /// least one such updated ChannelMonitor must be persisted otherwise PermanentFailure should
-       /// be returned to get things on-chain ASAP using only the in-memory copy. Obviously updates to
-       /// the channel which would invalidate previous ChannelMonitors are not made when a channel has
-       /// been "frozen".
+       /// Even when a channel has been "frozen", updates to the [`ChannelMonitor`] can continue to
+       /// occur (e.g. if an inbound HTLC which we forwarded was claimed upstream, resulting in us
+       /// attempting to claim it on this channel) and those updates must still be persisted.
         ///
-       /// Note that even if updates made after TemporaryFailure succeed you must still provide a
-       /// [`MonitorEvent::UpdateCompleted`] to ensure you have the latest monitor and re-enable
-       /// normal channel operation. Note that this is normally generated through a call to
-       /// [`ChainMonitor::channel_monitor_updated`].
-       ///
-       /// Note that the update being processed here will not be replayed for you when you return a
-       /// [`MonitorEvent::UpdateCompleted`] event via [`Watch::release_pending_monitor_events`], so
-       /// you must store the update itself on your own local disk prior to returning a
-       /// TemporaryFailure. You may, of course, employ a journaling approach, storing only the
-       /// ChannelMonitorUpdate on disk without updating the monitor itself, replaying the journal at
-       /// reload-time.
+       /// No updates to the channel will be made which could invalidate other [`ChannelMonitor`]s
+       /// until a [`MonitorEvent::UpdateCompleted`] is provided, even if you return no error on a
+       /// later monitor update for the same channel.
         ///
         /// For deployments where a copy of ChannelMonitors and other local state are backed up in a
         /// remote location (with local copies persisted immediately), it is anticipated that all
         /// updates will return TemporaryFailure until the remote copies could be updated.
         ///
-       /// [`ChainMonitor::channel_monitor_updated`]: chainmonitor::ChainMonitor::channel_monitor_updated
+       /// [`PermanentFailure`]: ChannelMonitorUpdateErr::PermanentFailure
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
         TemporaryFailure,
-       /// Used to indicate no further channel monitor updates will be allowed (eg we've moved on to a
-       /// different watchtower and cannot update with all watchtowers that were previously informed
-       /// of this channel).
+       /// Used to indicate no further channel monitor updates will be allowed (likely a disk failure
+       /// or a remote copy of this [`ChannelMonitor`] is no longer reachable and thus not updatable).
+       ///
+       /// When this is returned, [`ChannelManager`] will force-close the channel but *not* broadcast
+       /// our current commitment transaction. This avoids a dangerous case where a local disk failure
+       /// (e.g. the Linux-default remounting of the disk as read-only) causes [`PermanentFailure`]s
+       /// for all monitor updates. If we were to broadcast our latest commitment transaction and then
+       /// restart, we could end up reading a previous [`ChannelMonitor`] and [`ChannelManager`],
+       /// revoking our now-broadcasted state before seeing it confirm and losing all our funds.
         ///
-       /// At reception of this error, ChannelManager will force-close the channel and return at
-       /// least a final ChannelMonitorUpdate::ChannelForceClosed which must be delivered to at
-       /// least one ChannelMonitor copy. Revocation secret MUST NOT be released and offchain channel
-       /// update must be rejected.
+       /// Note that this is somewhat of a tradeoff - if the disk is really gone and we may have lost
+       /// the data permanently, we really should broadcast immediately. If the data can be recovered
+       /// with manual intervention, we'd rather close the channel, rejecting future updates to it,
+       /// and broadcast the latest state only if we have HTLCs to claim which are timing out (which
+       /// we do as long as blocks are connected).
         ///
-       /// This failure may also signal a failure to update the local persisted copy of one of
-       /// the channel monitor instance.
+       /// In order to broadcast the latest local commitment transaction, you'll need to call
+       /// [`ChannelMonitor::get_latest_holder_commitment_txn`] and broadcast the resulting
+       /// transactions once you've safely ensured no further channel updates can be generated by your
+       /// [`ChannelManager`].
         ///
-       /// Note that even when you fail a holder commitment transaction update, you must store the
-       /// update to ensure you can claim from it in case of a duplicate copy of this ChannelMonitor
-       /// broadcasts it (e.g distributed channel-monitor deployment)
+       /// Note that at least one final [`ChannelMonitorUpdate`] may still be provided, which must
+       /// still be processed by a running [`ChannelMonitor`]. This final update will mark the
+       /// [`ChannelMonitor`] as finalized, ensuring no further updates (e.g. revocation of the latest
+       /// commitment transaction) are allowed.
+       ///
+       /// Note that even if you return a [`PermanentFailure`] due to unavailability of secondary
+       /// [`ChannelMonitor`] copies, you should still make an attempt to store the update where
+       /// possible to ensure you can claim HTLC outputs on the latest commitment transaction
+       /// broadcasted later.
         ///
         /// In case of distributed watchtowers deployment, the new version must be written to disk, as
         /// state may have been stored but rejected due to a block forcing a commitment broadcast. This
         /// storage is used to claim outputs of rejected state confirmed onchain by another watchtower,
         /// lagging behind on block processing.
+       ///
+       /// [`PermanentFailure`]: ChannelMonitorUpdateErr::PermanentFailure
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
         PermanentFailure,
  }
  
@@ -278,7 +284,7 @@ pub trait Watch<ChannelSigner: Sign> {
         /// with any spends of outputs returned by [`get_outputs_to_watch`]. In practice, this means
         /// calling [`block_connected`] and [`block_disconnected`] on the monitor.
         ///
-       /// Note: this interface MUST error with `ChannelMonitorUpdateErr::PermanentFailure` if
+       /// Note: this interface MUST error with [`ChannelMonitorUpdateErr::PermanentFailure`] if
         /// the given `funding_txo` has previously been registered via `watch_channel`.
         ///
         /// [`get_outputs_to_watch`]: channelmonitor::ChannelMonitor::get_outputs_to_watch
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs

index ced4ceee95577007e51da30e8f703942f8619e04..aca5503521e75c7b16cb3444d545226d09be057a 100644 (file)
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -65,6 +65,8 @@ fn test_simple_monitor_permanent_update_fail() {
                 _ => panic!("Unexpected event"),
         };
  
+       assert!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().is_empty());
+
         // TODO: Once we hit the chain with the failure transaction we should check that we get a
         // PaymentPathFailed event
  
diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs

index 4b9d67b889be28c9c9c0ab0ad45d10d44f11bc88..8872a905ef347d00ca0a84b76267a995b1c571ff 100644 (file)
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -1171,8 +1171,8 @@ pub enum PaymentSendFailure {
         ///
         /// Any entries which contain Err(APIError::MonitorUpdateFailed) or Ok(()) MUST NOT be retried
         /// as they will result in over-/re-payment. These HTLCs all either successfully sent (in the
-       /// case of Ok(())) or will send once channel_monitor_updated is called on the next-hop channel
-       /// with the latest update_id.
+       /// case of Ok(())) or will send once a [`MonitorEvent::UpdateCompleted`] is provided for the
+       /// next-hop channel with the latest update_id.
         PartialFailure {
                 /// The errors themselves, in the same order as the route hops.
                 results: Vec<Result<(), APIError>>,
@@ -1345,7 +1345,7 @@ macro_rules! handle_monitor_err {
                                 // given up the preimage yet, so might as well just wait until the payment is
                                 // retried, avoiding the on-chain fees.
                                 let res: Result<(), _> = Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure".to_owned(), *$chan_id, $chan.get_user_id(),
-                                               $chan.force_shutdown(true), $self.get_channel_update_for_broadcast(&$chan).ok() ));
+                                               $chan.force_shutdown(false), $self.get_channel_update_for_broadcast(&$chan).ok() ));
                                 (res, true)
                         },
                         ChannelMonitorUpdateErr::TemporaryFailure => {
@@ -4492,7 +4492,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                                         // We do not do a force-close here as that would generate a monitor update for
                                         // a monitor that we didn't manage to store (and that we don't care about - we
                                         // don't respond with the funding_signed so the channel can never go on chain).
-                                       let (_monitor_update, failed_htlcs) = chan.force_shutdown(true);
+                                       let (_monitor_update, failed_htlcs) = chan.force_shutdown(false);
                                         assert!(failed_htlcs.is_empty());
                                         return Err(MsgHandleErrInternal::send_err_msg_no_close("ChannelMonitor storage failure".to_owned(), funding_msg.channel_id));
                                 },
author	Matt Corallo <git@bluematt.me>
	Tue, 5 Oct 2021 04:32:49 +0000 (04:32 +0000)
committer	Matt Corallo <git@bluematt.me>
	Thu, 15 Sep 2022 18:18:06 +0000 (18:18 +0000)
lightning/src/chain/channelmonitor.rs		patch \| blob \| history
lightning/src/chain/mod.rs		patch \| blob \| history
lightning/src/ln/chanmon_update_fail_tests.rs		patch \| blob \| history
lightning/src/ln/channelmanager.rs		patch \| blob \| history