Don't send init `closing_signed` too early after final HTLC removal 2023-08-shutdown-remove-early-sign
authorMatt Corallo <git@bluematt.me>
Sat, 26 Aug 2023 19:59:21 +0000 (19:59 +0000)
committerMatt Corallo <git@bluematt.me>
Sat, 11 Nov 2023 20:24:58 +0000 (20:24 +0000)
If we remove an HTLC (or fee update), commit, and receive our
counterparty's `revoke_and_ack`, we remove all knowledge of said
HTLC (or fee update). However, the latest local commitment
transaction that we can broadcast still contains the HTLC (or old
fee), thus we are not eligible for initiating the `closing_signed`
negotiation if we're shutting down and are generally expecting a
counterparty `commitment_signed` immediately.

Because we don't have any tracking of these updates in the `Channel`
(only the `ChannelMonitor` is aware of the HTLC being in our latest
local commitment transaction), we'd previously send a
`closing_signed` too early, causing LDK<->LDK channels with an HTLC
pending towards the channel initiator at the time of `shutdown` to
always fail to cooperatively close.

To fix this race, we add an additional unpersisted bool to
`Channel` and use that to gate sending the initial `closing_signed`.

lightning/src/ln/channel.rs
lightning/src/ln/shutdown_tests.rs

index 52db68c1323c771a45807fe5d21002b887d9e8d8..f367774673e69200a23cf69136e5a8a393905512 100644 (file)
@@ -815,6 +815,19 @@ pub(super) struct ChannelContext<SP: Deref> where SP::Target: SignerProvider {
        #[cfg(not(test))]
        closing_fee_limits: Option<(u64, u64)>,
 
+       /// If we remove an HTLC (or fee update), commit, and receive our counterparty's
+       /// `revoke_and_ack`, we remove all knowledge of said HTLC (or fee update). However, the latest
+       /// local commitment transaction that we can broadcast still contains the HTLC (or old fee)
+       /// until we receive a further `commitment_signed`. Thus we are not eligible for initiating the
+       /// `closing_signed` negotiation if we're expecting a counterparty `commitment_signed`.
+       ///
+       /// To ensure we don't send a `closing_signed` too early, we track this state here, waiting
+       /// until we see a `commitment_signed` before doing so.
+       ///
+       /// We don't bother to persist this - we anticipate this state won't last longer than a few
+       /// milliseconds, so any accidental force-closes here should be exceedingly rare.
+       expecting_peer_commitment_signed: bool,
+
        /// The hash of the block in which the funding transaction was included.
        funding_tx_confirmed_in: Option<BlockHash>,
        funding_tx_confirmation_height: u32,
@@ -3248,6 +3261,7 @@ impl<SP: Deref> Channel<SP> where
                };
 
                self.context.cur_holder_commitment_transaction_number -= 1;
+               self.context.expecting_peer_commitment_signed = false;
                // Note that if we need_commitment & !AwaitingRemoteRevoke we'll call
                // build_commitment_no_status_check() next which will reset this to RAAFirst.
                self.context.resend_order = RAACommitmentOrder::CommitmentFirst;
@@ -3513,6 +3527,7 @@ impl<SP: Deref> Channel<SP> where
                        // Take references explicitly so that we can hold multiple references to self.context.
                        let pending_inbound_htlcs: &mut Vec<_> = &mut self.context.pending_inbound_htlcs;
                        let pending_outbound_htlcs: &mut Vec<_> = &mut self.context.pending_outbound_htlcs;
+                       let expecting_peer_commitment_signed = &mut self.context.expecting_peer_commitment_signed;
 
                        // We really shouldnt have two passes here, but retain gives a non-mutable ref (Rust bug)
                        pending_inbound_htlcs.retain(|htlc| {
@@ -3521,6 +3536,7 @@ impl<SP: Deref> Channel<SP> where
                                        if let &InboundHTLCRemovalReason::Fulfill(_) = reason {
                                                value_to_self_msat_diff += htlc.amount_msat as i64;
                                        }
+                                       *expecting_peer_commitment_signed = true;
                                        false
                                } else { true }
                        });
@@ -3580,6 +3596,7 @@ impl<SP: Deref> Channel<SP> where
                                if let OutboundHTLCState::LocalAnnounced(_) = htlc.state {
                                        log_trace!(logger, " ...promoting outbound LocalAnnounced {} to Committed", &htlc.payment_hash);
                                        htlc.state = OutboundHTLCState::Committed;
+                                       *expecting_peer_commitment_signed = true;
                                }
                                if let &mut OutboundHTLCState::AwaitingRemoteRevokeToRemove(ref mut outcome) = &mut htlc.state {
                                        log_trace!(logger, " ...promoting outbound AwaitingRemoteRevokeToRemove {} to AwaitingRemovedRemoteRevoke", &htlc.payment_hash);
@@ -3600,6 +3617,7 @@ impl<SP: Deref> Channel<SP> where
                                        log_trace!(logger, " ...promoting outbound fee update {} to Committed", feerate);
                                        self.context.feerate_per_kw = feerate;
                                        self.context.pending_update_fee = None;
+                                       self.context.expecting_peer_commitment_signed = true;
                                },
                                FeeUpdateState::RemoteAnnounced => { debug_assert!(!self.context.is_outbound()); },
                                FeeUpdateState::AwaitingRemoteRevokeToAnnounce => {
@@ -4380,6 +4398,10 @@ impl<SP: Deref> Channel<SP> where
                -> Result<(Option<msgs::ClosingSigned>, Option<Transaction>, Option<ShutdownResult>), ChannelError>
                where F::Target: FeeEstimator, L::Target: Logger
        {
+               // If we're waiting on a monitor persistence, that implies we're also waiting to send some
+               // message to our counterparty (probably a `revoke_and_ack`). In such a case, we shouldn't
+               // initiate `closing_signed` negotiation until we're clear of all pending messages. Note
+               // that closing_negotiation_ready checks this case (as well as a few others).
                if self.context.last_sent_closing_fee.is_some() || !self.closing_negotiation_ready() {
                        return Ok((None, None, None));
                }
@@ -4391,6 +4413,12 @@ impl<SP: Deref> Channel<SP> where
                        return Ok((None, None, None));
                }
 
+               // If we're waiting on a counterparty `commitment_signed` to clear some updates from our
+               // local commitment transaction, we can't yet initiate `closing_signed` negotiation.
+               if self.context.expecting_peer_commitment_signed {
+                       return Ok((None, None, None));
+               }
+
                let (our_min_fee, our_max_fee) = self.calculate_closing_fee_limits(fee_estimator);
 
                assert!(self.context.shutdown_scriptpubkey.is_some());
@@ -6004,6 +6032,7 @@ impl<SP: Deref> OutboundV1Channel<SP> where SP::Target: SignerProvider {
 
                                last_sent_closing_fee: None,
                                pending_counterparty_closing_signed: None,
+                               expecting_peer_commitment_signed: false,
                                closing_fee_limits: None,
                                target_closing_feerate_sats_per_kw: None,
 
@@ -6636,6 +6665,7 @@ impl<SP: Deref> InboundV1Channel<SP> where SP::Target: SignerProvider {
 
                                last_sent_closing_fee: None,
                                pending_counterparty_closing_signed: None,
+                               expecting_peer_commitment_signed: false,
                                closing_fee_limits: None,
                                target_closing_feerate_sats_per_kw: None,
 
@@ -7715,6 +7745,7 @@ impl<'a, 'b, 'c, ES: Deref, SP: Deref> ReadableArgs<(&'a ES, &'b SP, u32, &'c Ch
 
                                last_sent_closing_fee: None,
                                pending_counterparty_closing_signed: None,
+                               expecting_peer_commitment_signed: false,
                                closing_fee_limits: None,
                                target_closing_feerate_sats_per_kw,
 
index 0d02e89b8bbdcaaf01d1268eeba8523529f8fc0a..bbfcf4f283cc057102c64b95b29fd4b4d6e83682 100644 (file)
@@ -10,8 +10,9 @@
 //! Tests of our shutdown and closing_signed negotiation logic.
 
 use crate::sign::{EntropySource, SignerProvider};
+use crate::chain::ChannelMonitorUpdateStatus;
 use crate::chain::transaction::OutPoint;
-use crate::events::{MessageSendEvent, MessageSendEventsProvider, ClosureReason};
+use crate::events::{MessageSendEvent, HTLCDestination, MessageSendEventsProvider, ClosureReason};
 use crate::ln::channelmanager::{self, PaymentSendFailure, PaymentId, RecipientOnionFields, ChannelShutdownState, ChannelDetails};
 use crate::routing::router::{PaymentParameters, get_route, RouteParameters};
 use crate::ln::msgs;
@@ -1237,3 +1238,102 @@ fn simple_target_feerate_shutdown() {
        check_closed_event!(nodes[0], 1, ClosureReason::CooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
        check_closed_event!(nodes[1], 1, ClosureReason::CooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
 }
+
+fn do_outbound_update_no_early_closing_signed(use_htlc: bool) {
+       // Previously, if we have a pending inbound HTLC (or fee update) on a channel which has
+       // initiated shutdown, we'd send our initial closing_signed immediately after receiving the
+       // peer's last RAA to remove the HTLC/fee update, but before receiving their final
+       // commitment_signed for a commitment without the HTLC/with the new fee. This caused at least
+       // LDK peers to force-close as we initiated closing_signed prior to the channel actually being
+       // fully empty of pending updates/HTLCs.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+       let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+       let chan_id = create_announced_chan_between_nodes(&nodes, 0, 1).2;
+
+       send_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+       let payment_hash_opt = if use_htlc {
+               Some(route_payment(&nodes[1], &[&nodes[0]], 10_000).1)
+       } else {
+               None
+       };
+
+       if use_htlc {
+               nodes[0].node.fail_htlc_backwards(&payment_hash_opt.unwrap());
+               expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[0],
+                       [HTLCDestination::FailedPayment { payment_hash: payment_hash_opt.unwrap() }]);
+       } else {
+               *chanmon_cfgs[0].fee_estimator.sat_per_kw.lock().unwrap() *= 10;
+               nodes[0].node.timer_tick_occurred();
+       }
+       let updates = get_htlc_update_msgs(&nodes[0], &nodes[1].node.get_our_node_id());
+       check_added_monitors(&nodes[0], 1);
+
+       nodes[1].node.close_channel(&chan_id, &nodes[0].node.get_our_node_id()).unwrap();
+       let node_0_shutdown = get_event_msg!(nodes[1], MessageSendEvent::SendShutdown, nodes[0].node.get_our_node_id());
+       nodes[0].node.close_channel(&chan_id, &nodes[1].node.get_our_node_id()).unwrap();
+       let node_1_shutdown = get_event_msg!(nodes[0], MessageSendEvent::SendShutdown, nodes[1].node.get_our_node_id());
+
+       nodes[0].node.handle_shutdown(&nodes[1].node.get_our_node_id(), &node_0_shutdown);
+       nodes[1].node.handle_shutdown(&nodes[0].node.get_our_node_id(), &node_1_shutdown);
+
+       if use_htlc {
+               nodes[1].node.handle_update_fail_htlc(&nodes[0].node.get_our_node_id(), &updates.update_fail_htlcs[0]);
+       } else {
+               nodes[1].node.handle_update_fee(&nodes[0].node.get_our_node_id(), &updates.update_fee.unwrap());
+       }
+       nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &updates.commitment_signed);
+       check_added_monitors(&nodes[1], 1);
+       let (bs_raa, bs_cs) = get_revoke_commit_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
+
+       nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_raa);
+       check_added_monitors(&nodes[0], 1);
+
+       // At this point the Channel on nodes[0] has no record of any HTLCs but the latest
+       // broadcastable commitment does contain the HTLC (but only the ChannelMonitor knows this).
+       // Thus, the channel should not yet initiate closing_signed negotiation (but previously did).
+       assert_eq!(nodes[0].node.get_and_clear_pending_msg_events(), Vec::new());
+
+       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+       nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_cs);
+       check_added_monitors(&nodes[0], 1);
+       assert_eq!(nodes[0].node.get_and_clear_pending_msg_events(), Vec::new());
+
+       expect_channel_shutdown_state!(nodes[0], chan_id, ChannelShutdownState::ResolvingHTLCs);
+       assert_eq!(nodes[0].node.get_and_clear_pending_msg_events(), Vec::new());
+       let (outpoint, latest_update, _) = nodes[0].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone();
+       nodes[0].chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, latest_update);
+
+       let as_raa_closing_signed = nodes[0].node.get_and_clear_pending_msg_events();
+       assert_eq!(as_raa_closing_signed.len(), 2);
+
+       if let MessageSendEvent::SendRevokeAndACK { msg, .. } = &as_raa_closing_signed[0] {
+               nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &msg);
+               check_added_monitors(&nodes[1], 1);
+               if use_htlc {
+                       expect_payment_failed!(nodes[1], payment_hash_opt.unwrap(), true);
+               }
+       } else { panic!("Unexpected message {:?}", as_raa_closing_signed[0]); }
+
+       if let MessageSendEvent::SendClosingSigned { msg, .. } = &as_raa_closing_signed[1] {
+               nodes[1].node.handle_closing_signed(&nodes[0].node.get_our_node_id(), &msg);
+       } else { panic!("Unexpected message {:?}", as_raa_closing_signed[1]); }
+
+       let bs_closing_signed = get_event_msg!(nodes[1], MessageSendEvent::SendClosingSigned, nodes[0].node.get_our_node_id());
+       nodes[0].node.handle_closing_signed(&nodes[1].node.get_our_node_id(), &bs_closing_signed);
+       let (_, as_2nd_closing_signed) = get_closing_signed_broadcast!(nodes[0].node, nodes[1].node.get_our_node_id());
+       nodes[1].node.handle_closing_signed(&nodes[0].node.get_our_node_id(), &as_2nd_closing_signed.unwrap());
+       let (_, node_1_none) = get_closing_signed_broadcast!(nodes[1].node, nodes[0].node.get_our_node_id());
+       assert!(node_1_none.is_none());
+
+       check_closed_event!(nodes[0], 1, ClosureReason::CooperativeClosure, [nodes[1].node.get_our_node_id()], 100000);
+       check_closed_event!(nodes[1], 1, ClosureReason::CooperativeClosure, [nodes[0].node.get_our_node_id()], 100000);
+}
+
+#[test]
+fn outbound_update_no_early_closing_signed() {
+       do_outbound_update_no_early_closing_signed(true);
+       do_outbound_update_no_early_closing_signed(false);
+}