Stop failing back HTLCs on peer disconnection
authorMatt Corallo <git@bluematt.me>
Fri, 20 Nov 2020 20:49:53 +0000 (15:49 -0500)
committerMatt Corallo <git@bluematt.me>
Fri, 21 May 2021 15:10:45 +0000 (15:10 +0000)
Previously, if we got disconnected from a peer while there were
HTLCs pending forwarding in the holding cell, we'd clear them and
fail them all backwards. This is largely fine, but since we now
have support for handling such HTLCs on reconnect, we might as
well not, instead relying on our timeout logic to fail them
backwards if it takes too long to forward them.

lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs

index 56d170df551b6b8353f03528d9814e3c9e70aaa0..8db346a9e3fa46782c54cb5becbb531d8fa75657 100644 (file)
@@ -2695,19 +2695,16 @@ impl<Signer: Sign> Channel<Signer> {
                }
        }
 
-       /// Removes any uncommitted HTLCs, to be used on peer disconnection, including any pending
-       /// HTLCs that we intended to add but haven't as we were waiting on a remote revoke.
-       /// Returns the set of PendingHTLCStatuses from remote uncommitted HTLCs (which we're
-       /// implicitly dropping) and the payment_hashes of HTLCs we tried to add but are dropping.
+       /// Removes any uncommitted inbound HTLCs and resets the state of uncommitted outbound HTLC
+       /// updates, to be used on peer disconnection. After this, update_*_htlc messages need to be
+       /// resent.
        /// No further message handling calls may be made until a channel_reestablish dance has
        /// completed.
-       pub fn remove_uncommitted_htlcs_and_mark_paused<L: Deref>(&mut self, logger: &L) -> Vec<(HTLCSource, PaymentHash)> where L::Target: Logger {
-               let mut outbound_drops = Vec::new();
-
+       pub fn remove_uncommitted_htlcs_and_mark_paused<L: Deref>(&mut self, logger: &L)  where L::Target: Logger {
                assert_eq!(self.channel_state & ChannelState::ShutdownComplete as u32, 0);
                if self.channel_state < ChannelState::FundingSent as u32 {
                        self.channel_state = ChannelState::ShutdownComplete as u32;
-                       return outbound_drops;
+                       return;
                }
                // Upon reconnect we have to start the closing_signed dance over, but shutdown messages
                // will be retransmitted.
@@ -2750,23 +2747,8 @@ impl<Signer: Sign> Channel<Signer> {
                        }
                }
 
-               self.holding_cell_htlc_updates.retain(|htlc_update| {
-                       match htlc_update {
-                               // Note that currently on channel reestablish we assert that there are
-                               // no holding cell HTLC update_adds, so if in the future we stop
-                               // dropping added HTLCs here and failing them backwards, then there will
-                               // need to be corresponding changes made in the Channel's re-establish
-                               // logic.
-                               &HTLCUpdateAwaitingACK::AddHTLC { ref payment_hash, ref source, .. } => {
-                                       outbound_drops.push((source.clone(), payment_hash.clone()));
-                                       false
-                               },
-                               &HTLCUpdateAwaitingACK::ClaimHTLC {..} | &HTLCUpdateAwaitingACK::FailHTLC {..} => true,
-                       }
-               });
                self.channel_state |= ChannelState::PeerDisconnected as u32;
-               log_debug!(logger, "Peer disconnection resulted in {} remote-announced HTLC drops and {} waiting-to-locally-announced HTLC drops on channel {}", outbound_drops.len(), inbound_drop_count, log_bytes!(self.channel_id()));
-               outbound_drops
+               log_debug!(logger, "Peer disconnection resulted in {} remote-announced HTLC drops on channel {}", inbound_drop_count, log_bytes!(self.channel_id()));
        }
 
        /// Indicates that a ChannelMonitor update failed to be stored by the client and further
@@ -2925,7 +2907,7 @@ impl<Signer: Sign> Channel<Signer> {
 
        /// May panic if some calls other than message-handling calls (which will all Err immediately)
        /// have been called between remove_uncommitted_htlcs_and_mark_paused and this call.
-       pub fn channel_reestablish<L: Deref>(&mut self, msg: &msgs::ChannelReestablish, logger: &L) -> Result<(Option<msgs::FundingLocked>, Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, Option<ChannelMonitorUpdate>, RAACommitmentOrder, Option<msgs::Shutdown>), ChannelError> where L::Target: Logger {
+       pub fn channel_reestablish<L: Deref>(&mut self, msg: &msgs::ChannelReestablish, logger: &L) -> Result<(Option<msgs::FundingLocked>, Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, Option<ChannelMonitorUpdate>, RAACommitmentOrder, Vec<(HTLCSource, PaymentHash)>, Option<msgs::Shutdown>), ChannelError> where L::Target: Logger {
                if self.channel_state & (ChannelState::PeerDisconnected as u32) == 0 {
                        // While BOLT 2 doesn't indicate explicitly we should error this channel here, it
                        // almost certainly indicates we are going to end up out-of-sync in some way, so we
@@ -2976,7 +2958,7 @@ impl<Signer: Sign> Channel<Signer> {
                                        return Err(ChannelError::Close("Peer claimed they saw a revoke_and_ack but we haven't sent funding_locked yet".to_owned()));
                                }
                                // Short circuit the whole handler as there is nothing we can resend them
-                               return Ok((None, None, None, None, RAACommitmentOrder::CommitmentFirst, shutdown_msg));
+                               return Ok((None, None, None, None, RAACommitmentOrder::CommitmentFirst, Vec::new(), shutdown_msg));
                        }
 
                        // We have OurFundingLocked set!
@@ -2984,7 +2966,7 @@ impl<Signer: Sign> Channel<Signer> {
                        return Ok((Some(msgs::FundingLocked {
                                channel_id: self.channel_id(),
                                next_per_commitment_point,
-                       }), None, None, None, RAACommitmentOrder::CommitmentFirst, shutdown_msg));
+                       }), None, None, None, RAACommitmentOrder::CommitmentFirst, Vec::new(), shutdown_msg));
                }
 
                let required_revoke = if msg.next_remote_commitment_number + 1 == INITIAL_COMMITMENT_NUMBER - self.cur_holder_commitment_transaction_number {
@@ -3025,14 +3007,6 @@ impl<Signer: Sign> Channel<Signer> {
                        }
 
                        if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::MonitorUpdateFailed as u32)) == 0 {
-                               // Note that if in the future we no longer drop holding cell update_adds on peer
-                               // disconnect, this logic will need to be updated.
-                               for htlc_update in self.holding_cell_htlc_updates.iter() {
-                                       if let &HTLCUpdateAwaitingACK::AddHTLC { .. } = htlc_update {
-                                               debug_assert!(false, "There shouldn't be any add-HTLCs in the holding cell now because they should have been dropped on peer disconnect. Panic here because said HTLCs won't be handled correctly.");
-                                       }
-                               }
-
                                // We're up-to-date and not waiting on a remote revoke (if we are our
                                // channel_reestablish should result in them sending a revoke_and_ack), but we may
                                // have received some updates while we were disconnected. Free the holding cell
@@ -3041,20 +3015,14 @@ impl<Signer: Sign> Channel<Signer> {
                                        Err(ChannelError::Close(msg)) => return Err(ChannelError::Close(msg)),
                                        Err(ChannelError::Ignore(_)) | Err(ChannelError::CloseDelayBroadcast(_)) => panic!("Got non-channel-failing result from free_holding_cell_htlcs"),
                                        Ok((Some((commitment_update, monitor_update)), htlcs_to_fail)) => {
-                                               // If in the future we no longer drop holding cell update_adds on peer
-                                               // disconnect, we may be handed some HTLCs to fail backwards here.
-                                               assert!(htlcs_to_fail.is_empty());
-                                               return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), shutdown_msg));
+                                               return Ok((resend_funding_locked, required_revoke, Some(commitment_update), Some(monitor_update), self.resend_order.clone(), htlcs_to_fail, shutdown_msg));
                                        },
                                        Ok((None, htlcs_to_fail)) => {
-                                               // If in the future we no longer drop holding cell update_adds on peer
-                                               // disconnect, we may be handed some HTLCs to fail backwards here.
-                                               assert!(htlcs_to_fail.is_empty());
-                                               return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
+                                               return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), htlcs_to_fail, shutdown_msg));
                                        },
                                }
                        } else {
-                               return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), shutdown_msg));
+                               return Ok((resend_funding_locked, required_revoke, None, None, self.resend_order.clone(), Vec::new(), shutdown_msg));
                        }
                } else if msg.next_local_commitment_number == next_counterparty_commitment_number - 1 {
                        if required_revoke.is_some() {
@@ -3065,10 +3033,10 @@ impl<Signer: Sign> Channel<Signer> {
 
                        if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 {
                                self.monitor_pending_commitment_signed = true;
-                               return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), shutdown_msg));
+                               return Ok((resend_funding_locked, None, None, None, self.resend_order.clone(), Vec::new(), shutdown_msg));
                        }
 
-                       return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update(logger)), None, self.resend_order.clone(), shutdown_msg));
+                       return Ok((resend_funding_locked, required_revoke, Some(self.get_last_commitment_update(logger)), None, self.resend_order.clone(), Vec::new(), shutdown_msg));
                } else {
                        return Err(ChannelError::Close("Peer attempted to reestablish channel with a very old remote commitment transaction".to_owned()));
                }
@@ -4404,7 +4372,7 @@ impl Readable for ChannelUpdateStatus {
 impl<Signer: Sign> Writeable for Channel<Signer> {
        fn write<W: Writer>(&self, writer: &mut W) -> Result<(), ::std::io::Error> {
                // Note that we write out as if remove_uncommitted_htlcs_and_mark_paused had just been
-               // called but include holding cell updates (and obviously we don't modify self).
+               // called.
 
                writer.write_all(&[SERIALIZATION_VERSION; 1])?;
                writer.write_all(&[MIN_SERIALIZATION_VERSION; 1])?;
index d1b810ddd772894656c0101140accd8849a840c0..9445f518c54a7d82e6d83e09b6c45f19cc3c37fb 100644 (file)
@@ -3363,7 +3363,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
        }
 
        fn internal_channel_reestablish(&self, counterparty_node_id: &PublicKey, msg: &msgs::ChannelReestablish) -> Result<(), MsgHandleErrInternal> {
-               let chan_restoration_res = {
+               let (htlcs_failed_forward, chan_restoration_res) = {
                        let mut channel_state_lock = self.channel_state.lock().unwrap();
                        let channel_state = &mut *channel_state_lock;
 
@@ -3376,7 +3376,7 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                                        // disconnect, so Channel's reestablish will never hand us any holding cell
                                        // freed HTLCs to fail backwards. If in the future we no longer drop pending
                                        // add-HTLCs on disconnect, we may be handed HTLCs to fail backwards here.
-                                       let (funding_locked, revoke_and_ack, commitment_update, monitor_update_opt, order, shutdown) =
+                                       let (funding_locked, revoke_and_ack, commitment_update, monitor_update_opt, order, htlcs_failed_forward, shutdown) =
                                                try_chan_entry!(self, chan.get_mut().channel_reestablish(msg, &self.logger), channel_state, chan);
                                        if let Some(msg) = shutdown {
                                                channel_state.pending_msg_events.push(events::MessageSendEvent::SendShutdown {
@@ -3384,12 +3384,13 @@ impl<Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelMana
                                                        msg,
                                                });
                                        }
-                                       handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked)
+                                       (htlcs_failed_forward, handle_chan_restoration_locked!(self, channel_state_lock, channel_state, chan, revoke_and_ack, commitment_update, order, monitor_update_opt, Vec::new(), None, funding_locked))
                                },
                                hash_map::Entry::Vacant(_) => return Err(MsgHandleErrInternal::send_err_msg_no_close("Failed to find corresponding channel".to_owned(), msg.channel_id))
                        }
                };
                post_handle_chan_restoration!(self, chan_restoration_res);
+               self.fail_holding_cell_htlcs(htlcs_failed_forward, msg.channel_id);
                Ok(())
        }
 
@@ -4052,7 +4053,6 @@ impl<Signer: Sign, M: Deref , T: Deref , K: Deref , F: Deref , L: Deref >
        fn peer_disconnected(&self, counterparty_node_id: &PublicKey, no_connection_possible: bool) {
                let _persistence_guard = PersistenceNotifierGuard::notify_on_drop(&self.total_consistency_lock, &self.persistence_notifier);
                let mut failed_channels = Vec::new();
-               let mut failed_payments = Vec::new();
                let mut no_channels_remain = true;
                {
                        let mut channel_state_lock = self.channel_state.lock().unwrap();
@@ -4081,15 +4081,7 @@ impl<Signer: Sign, M: Deref , T: Deref , K: Deref , F: Deref , L: Deref >
                                log_debug!(self.logger, "Marking channels with {} disconnected and generating channel_updates", log_pubkey!(counterparty_node_id));
                                channel_state.by_id.retain(|_, chan| {
                                        if chan.get_counterparty_node_id() == *counterparty_node_id {
-                                               // Note that currently on channel reestablish we assert that there are no
-                                               // holding cell add-HTLCs, so if in the future we stop removing uncommitted HTLCs
-                                               // on peer disconnect here, there will need to be corresponding changes in
-                                               // reestablish logic.
-                                               let failed_adds = chan.remove_uncommitted_htlcs_and_mark_paused(&self.logger);
-                                               if !failed_adds.is_empty() {
-                                                       let chan_update = self.get_channel_update(&chan).map(|u| u.encode_with_len()).unwrap(); // Cannot add/recv HTLCs before we have a short_id so unwrap is safe
-                                                       failed_payments.push((chan_update, failed_adds));
-                                               }
+                                               chan.remove_uncommitted_htlcs_and_mark_paused(&self.logger);
                                                if chan.is_shutdown() {
                                                        if let Some(short_id) = chan.get_short_channel_id() {
                                                                short_to_id.remove(&short_id);
@@ -4133,11 +4125,6 @@ impl<Signer: Sign, M: Deref , T: Deref , K: Deref , F: Deref , L: Deref >
                for failure in failed_channels.drain(..) {
                        self.finish_force_close_channel(failure);
                }
-               for (chan_update, mut htlc_sources) in failed_payments {
-                       for (htlc_source, payment_hash) in htlc_sources.drain(..) {
-                               self.fail_htlc_backwards_internal(self.channel_state.lock().unwrap(), htlc_source, &payment_hash, HTLCFailReason::Reason { failure_code: 0x1000 | 7, data: chan_update.clone() });
-                       }
-               }
        }
 
        fn peer_connected(&self, counterparty_node_id: &PublicKey, init_msg: &msgs::Init) {