Handling for sign_counterparty_commitment failing during normal op
authorMatt Corallo <git@bluematt.me>
Tue, 5 Sep 2023 20:46:28 +0000 (20:46 +0000)
committerChris Waterson <waterson@gmail.com>
Wed, 1 Nov 2023 21:29:59 +0000 (14:29 -0700)
If sign_counterparty_commitment fails (i.e. because the signer is
temporarily disconnected), this really indicates that we should
retry the message sending later, rather than force-closing the
channel (which probably won't even work if the signer is missing).

Here we add initial handling of sign_counterparty_commitment
failing during normal channel operation, setting a new flag in
`ChannelContext` which indicates we should retry sending the
commitment update later. We don't yet add any ability to do that
retry.

lightning/src/ln/channel.rs

index 33a46084abb4d1bba4bf3de72168c4d1b3933b7d..9ab9e7e07b060a0395a825ba3407075812bf9e4c 100644 (file)
@@ -749,6 +749,14 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
        monitor_pending_failures: Vec<(HTLCSource, PaymentHash, HTLCFailReason)>,
        monitor_pending_finalized_fulfills: Vec<HTLCSource>,
 
+       /// If we went to send a commitment update (ie some messages then [`msgs::CommitmentSigned`])
+       /// but our signer (initially) refused to give us a signature, we should retry at some point in
+       /// the future when the signer indicates it may have a signature for us.
+       ///
+       /// This flag is set in such a case. Note that we don't need to persist this as we'll end up
+       /// setting it again as a side-effect of [`Channel::channel_reestablish`].
+       signer_pending_commitment_update: bool,
+
        // pending_update_fee is filled when sending and receiving update_fee.
        //
        // Because it follows the same commitment flow as HTLCs, `FeeUpdateState` is either `Outbound`
@@ -3166,8 +3174,8 @@ impl<SP: Deref> Channel<SP> where
                        self.context.monitor_pending_revoke_and_ack = true;
                        if need_commitment && (self.context.channel_state & (ChannelState::AwaitingRemoteRevoke as u32)) == 0 {
                                // If we were going to send a commitment_signed after the RAA, go ahead and do all
-                               // the corresponding HTLC status updates so that get_last_commitment_update
-                               // includes the right HTLCs.
+                               // the corresponding HTLC status updates so that
+                               // get_last_commitment_update_for_send includes the right HTLCs.
                                self.context.monitor_pending_commitment_signed = true;
                                let mut additional_update = self.build_commitment_no_status_check(logger);
                                // build_commitment_no_status_check may bump latest_monitor_id but we want them to be
@@ -3541,9 +3549,10 @@ impl<SP: Deref> Channel<SP> where
                        // cells) while we can't update the monitor, so we just return what we have.
                        if require_commitment {
                                self.context.monitor_pending_commitment_signed = true;
-                               // When the monitor updating is restored we'll call get_last_commitment_update(),
-                               // which does not update state, but we're definitely now awaiting a remote revoke
-                               // before we can step forward any more, so set it here.
+                               // When the monitor updating is restored we'll call
+                               // get_last_commitment_update_for_send(), which does not update state, but we're
+                               // definitely now awaiting a remote revoke before we can step forward any more, so
+                               // set it here.
                                let mut additional_update = self.build_commitment_no_status_check(logger);
                                // build_commitment_no_status_check may bump latest_monitor_id but we want them to be
                                // strictly increasing by one, so decrement it here.
@@ -3846,9 +3855,11 @@ impl<SP: Deref> Channel<SP> where
                        Some(self.get_last_revoke_and_ack())
                } else { None };
                let commitment_update = if self.context.monitor_pending_commitment_signed {
-                       self.mark_awaiting_response();
-                       Some(self.get_last_commitment_update(logger))
+                       self.get_last_commitment_update_for_send(logger).ok()
                } else { None };
+               if commitment_update.is_some() {
+                       self.mark_awaiting_response();
+               }
 
                self.context.monitor_pending_revoke_and_ack = false;
                self.context.monitor_pending_commitment_signed = false;
@@ -3909,7 +3920,8 @@ impl<SP: Deref> Channel<SP> where
                }
        }
 
-       fn get_last_commitment_update<L: Deref>(&self, logger: &L) -> msgs::CommitmentUpdate where L::Target: Logger {
+       /// Gets the last commitment update for immediate sending to our peer.
+       fn get_last_commitment_update_for_send<L: Deref>(&mut self, logger: &L) -> Result<msgs::CommitmentUpdate, ()> where L::Target: Logger {
                let mut update_add_htlcs = Vec::new();
                let mut update_fulfill_htlcs = Vec::new();
                let mut update_fail_htlcs = Vec::new();
@@ -3965,13 +3977,26 @@ impl<SP: Deref> Channel<SP> where
                        })
                } else { None };
 
-               log_trace!(logger, "Regenerated latest commitment update in channel {} with{} {} update_adds, {} update_fulfills, {} update_fails, and {} update_fail_malformeds",
+               log_trace!(logger, "Regenerating latest commitment update in channel {} with{} {} update_adds, {} update_fulfills, {} update_fails, and {} update_fail_malformeds",
                                &self.context.channel_id(), if update_fee.is_some() { " update_fee," } else { "" },
                                update_add_htlcs.len(), update_fulfill_htlcs.len(), update_fail_htlcs.len(), update_fail_malformed_htlcs.len());
-               msgs::CommitmentUpdate {
+               let commitment_signed = if let Ok(update) = self.send_commitment_no_state_update(logger).map(|(cu, _)| cu) {
+                       if self.context.signer_pending_commitment_update {
+                               log_trace!(logger, "Commitment update generated: clearing signer_pending_commitment_update");
+                               self.context.signer_pending_commitment_update = false;
+                       }
+                       update
+               } else {
+                       if !self.context.signer_pending_commitment_update {
+                               log_trace!(logger, "Commitment update awaiting signer: setting signer_pending_commitment_update");
+                               self.context.signer_pending_commitment_update = true;
+                       }
+                       return Err(());
+               };
+               Ok(msgs::CommitmentUpdate {
                        update_add_htlcs, update_fulfill_htlcs, update_fail_htlcs, update_fail_malformed_htlcs, update_fee,
-                       commitment_signed: self.send_commitment_no_state_update(logger).expect("It looks like we failed to re-generate a commitment_signed we had previously sent?").0,
-               }
+                       commitment_signed,
+               })
        }
 
        /// Gets the `Shutdown` message we should send our peer on reconnect, if any.
@@ -4151,7 +4176,7 @@ impl<SP: Deref> Channel<SP> where
                                Ok(ReestablishResponses {
                                        channel_ready, shutdown_msg, announcement_sigs,
                                        raa: required_revoke,
-                                       commitment_update: Some(self.get_last_commitment_update(logger)),
+                                       commitment_update: self.get_last_commitment_update_for_send(logger).ok(),
                                        order: self.context.resend_order.clone(),
                                })
                        }
@@ -5525,7 +5550,7 @@ impl<SP: Deref> Channel<SP> where
                                        }
 
                                        let res = ecdsa.sign_counterparty_commitment(&commitment_stats.tx, commitment_stats.preimages, &self.context.secp_ctx)
-                                               .map_err(|_| ChannelError::Close("Failed to get signatures for new commitment_signed".to_owned()))?;
+                                               .map_err(|_| ChannelError::Ignore("Failed to get signatures for new commitment_signed".to_owned()))?;
                                        signature = res.0;
                                        htlc_signatures = res.1;
 
@@ -5848,6 +5873,8 @@ impl<SP: Deref> OutboundV1Channel<SP> where SP::Target: SignerProvider {
                                monitor_pending_failures: Vec::new(),
                                monitor_pending_finalized_fulfills: Vec::new(),
 
+                               signer_pending_commitment_update: false,
+
                                #[cfg(debug_assertions)]
                                holder_max_commitment_tx_output: Mutex::new((channel_value_satoshis * 1000 - push_msat, push_msat)),
                                #[cfg(debug_assertions)]
@@ -6502,6 +6529,8 @@ impl<SP: Deref> InboundV1Channel<SP> where SP::Target: SignerProvider {
                                monitor_pending_failures: Vec::new(),
                                monitor_pending_finalized_fulfills: Vec::new(),
 
+                               signer_pending_commitment_update: false,
+
                                #[cfg(debug_assertions)]
                                holder_max_commitment_tx_output: Mutex::new((msg.push_msat, msg.funding_satoshis * 1000 - msg.push_msat)),
                                #[cfg(debug_assertions)]
@@ -7593,6 +7622,8 @@ impl<'a, 'b, 'c, ES: Deref, SP: Deref> ReadableArgs<(&'a ES, &'b SP, u32, &'c Ch
                                monitor_pending_failures,
                                monitor_pending_finalized_fulfills: monitor_pending_finalized_fulfills.unwrap(),
 
+                               signer_pending_commitment_update: false,
+
                                pending_update_fee,
                                holding_cell_update_fee,
                                next_holder_htlc_id,