]> git.bitcoin.ninja Git - rust-lightning/commitdiff
Fix bug failing CS-RAA resend order on pending commitment signatures
authorAlec Chen <alecchendev@gmail.com>
Sun, 30 Jun 2024 20:55:18 +0000 (13:55 -0700)
committerAlec Chen <alecchendev@gmail.com>
Tue, 9 Jul 2024 00:35:25 +0000 (17:35 -0700)
lightning/src/ln/async_signer_tests.rs
lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs

index b6e5a222d272c9d576f92956e87154ce9d03033b..533be389fd505b112a97e8046b4ca9772885eeaf 100644 (file)
@@ -15,11 +15,12 @@ use bitcoin::blockdata::locktime::absolute::LockTime;
 use bitcoin::transaction::Version;
 
 use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS;
+use crate::chain::ChannelMonitorUpdateStatus;
 use crate::events::bump_transaction::WalletSource;
-use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider, ClosureReason};
-use crate::ln::functional_test_utils::*;
+use crate::events::{ClosureReason, Event, MessageSendEvent, MessageSendEventsProvider, PaymentPurpose};
+use crate::ln::{functional_test_utils::*, msgs};
 use crate::ln::msgs::ChannelMessageHandler;
-use crate::ln::channelmanager::{PaymentId, RecipientOnionFields};
+use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields};
 use crate::util::test_channel_signer::SignerOp;
 
 #[test]
@@ -272,7 +273,28 @@ fn test_async_commitment_signature_for_funding_signed_0conf() {
 }
 
 #[test]
-fn test_async_commitment_signature_for_peer_disconnect() {
+fn test_async_commitment_signature_peer_disconnect() {
+       do_test_async_commitment_signature_peer_disconnect(0);
+}
+
+#[test]
+fn test_async_commitment_signature_peer_disconnect_signer_restored_before_monitor_completion() {
+       // This tests that if we were pending a monitor update completion across a disconnect,
+       // and needed to send a CS, that if our signer becomes available before the monitor
+       // update completes, then we don't send duplicate messages upon calling `signer_unblocked`
+       // after the monitor update completes.
+       do_test_async_commitment_signature_peer_disconnect(1);
+}
+
+#[test]
+fn test_async_commitment_signature_peer_disconnect_signer_restored_before_reestablish() {
+       // This tests that if we tried to send a commitment_signed, but our signer was blocked,
+       // if we disconnect, reconnect, the signer becomes available, then handle channel_reestablish,
+       // that we don't send duplicate messages upon calling `signer_unblocked`.
+       do_test_async_commitment_signature_peer_disconnect(2);
+}
+
+fn do_test_async_commitment_signature_peer_disconnect(test_case: u8) {
        let chanmon_cfgs = create_chanmon_cfgs(2);
        let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
        let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
@@ -298,35 +320,236 @@ fn test_async_commitment_signature_for_peer_disconnect() {
 
        dst.node.handle_update_add_htlc(&src.node.get_our_node_id(), &payment_event.msgs[0]);
 
+       if test_case == 1 {
+               // Fail to persist the monitor update when handling the commitment_signed.
+               chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+       }
+
        // Mark dst's signer as unavailable and handle src's commitment_signed: while dst won't yet have a
        // `commitment_signed` of its own to offer, it should publish a `revoke_and_ack`.
        dst.disable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment);
        dst.node.handle_commitment_signed(&src.node.get_our_node_id(), &payment_event.commitment_msg);
        check_added_monitors(dst, 1);
 
-       get_event_msg!(dst, MessageSendEvent::SendRevokeAndACK, src.node.get_our_node_id());
+       if test_case != 1 {
+               get_event_msg!(dst, MessageSendEvent::SendRevokeAndACK, src.node.get_our_node_id());
+       }
 
        // Now disconnect and reconnect the peers.
        src.node.peer_disconnected(&dst.node.get_our_node_id());
        dst.node.peer_disconnected(&src.node.get_our_node_id());
-       let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]);
-       reconnect_args.send_channel_ready = (false, false);
-       reconnect_args.pending_raa = (true, false);
-       reconnect_nodes(reconnect_args);
+
+       // do reestablish stuff
+       src.node.peer_connected(&dst.node.get_our_node_id(), &msgs::Init {
+               features: dst.node.init_features(), networks: None, remote_network_address: None
+       }, true).unwrap();
+       let reestablish_1 = get_chan_reestablish_msgs!(src, dst);
+       assert_eq!(reestablish_1.len(), 1);
+       dst.node.peer_connected(&src.node.get_our_node_id(), &msgs::Init {
+               features: src.node.init_features(), networks: None, remote_network_address: None
+       }, false).unwrap();
+       let reestablish_2 = get_chan_reestablish_msgs!(dst, src);
+       assert_eq!(reestablish_2.len(), 1);
+
+       if test_case == 2 {
+               // Reenable the signer before the reestablish.
+               dst.enable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment);
+       }
+
+       dst.node.handle_channel_reestablish(&src.node.get_our_node_id(), &reestablish_1[0]);
+
+       if test_case == 1 {
+               dst.enable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment);
+               chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
+               let (outpoint, latest_update, _) = dst.chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone();
+               dst.chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, latest_update);
+               check_added_monitors!(dst, 0);
+       }
+
+       // Expect the RAA
+       let (_, revoke_and_ack, commitment_signed, _) = handle_chan_reestablish_msgs!(dst, src);
+       assert!(revoke_and_ack.is_some());
+       if test_case == 0 {
+               assert!(commitment_signed.is_none());
+       } else {
+               assert!(commitment_signed.is_some());
+       }
 
        // Mark dst's signer as available and retry: we now expect to see dst's `commitment_signed`.
        dst.enable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment);
        dst.node.signer_unblocked(Some((src.node.get_our_node_id(), chan_id)));
 
-       {
-               let events = dst.node.get_and_clear_pending_msg_events();
-               assert_eq!(events.len(), 1, "expected one message, got {}", events.len());
-               if let MessageSendEvent::UpdateHTLCs { ref node_id, .. } = events[0] {
-                       assert_eq!(node_id, &src.node.get_our_node_id());
-               } else {
-                       panic!("expected UpdateHTLCs message, not {:?}", events[0]);
-               };
+       if test_case == 0 {
+               let (_, _, commitment_signed, _) = handle_chan_reestablish_msgs!(dst, src);
+               assert!(commitment_signed.is_some());
+       } else {
+               // Make sure we don't double send the CS.
+               let (_, _, commitment_signed, _) = handle_chan_reestablish_msgs!(dst, src);
+               assert!(commitment_signed.is_none());
+       }
+}
+
+#[test]
+fn test_async_commitment_signature_ordering_reestablish() {
+       do_test_async_commitment_signature_ordering(false);
+}
+
+#[test]
+fn test_async_commitment_signature_ordering_monitor_restored() {
+       do_test_async_commitment_signature_ordering(true);
+}
+
+fn do_test_async_commitment_signature_ordering(monitor_update_failure: bool) {
+       // Across disconnects we may end up in a situation where we need to send a
+       // commitment_signed and then revoke_and_ack. We need to make sure that if
+       // the signer is pending for commitment_signed but not revoke_and_ack, we don't
+       // screw up the order by sending the revoke_and_ack first.
+       //
+       // We test this for both the case where we send messages after a channel
+       // reestablish, as well as restoring a channel after persisting
+       // a monitor update.
+       //
+       // The set up for this test is based on
+       // `test_drop_messages_peer_disconnect_dual_htlc`.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+       let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1);
+
+       let (payment_preimage_1, payment_hash_1, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+
+       // Start to send the second update_add_htlc + commitment_signed, but don't actually make it
+       // to the peer.
+       let (route, payment_hash_2, payment_preimage_2, payment_secret_2) = get_route_and_payment_hash!(nodes[0], nodes[1], 1000000);
+       nodes[0].node.send_payment_with_route(&route, payment_hash_2,
+               RecipientOnionFields::secret_only(payment_secret_2), PaymentId(payment_hash_2.0)).unwrap();
+       check_added_monitors!(nodes[0], 1);
+
+       get_htlc_update_msgs(&nodes[0], &nodes[1].node.get_our_node_id());
+
+       // Send back update_fulfill_htlc + commitment_signed for the first payment.
+       nodes[1].node.claim_funds(payment_preimage_1);
+       expect_payment_claimed!(nodes[1], payment_hash_1, 1_000_000);
+       check_added_monitors!(nodes[1], 1);
+
+       // Handle the update_fulfill_htlc, but fail to persist the monitor update when handling the
+       // commitment_signed.
+       let events_2 = nodes[1].node.get_and_clear_pending_msg_events();
+       assert_eq!(events_2.len(), 1);
+       match events_2[0] {
+               MessageSendEvent::UpdateHTLCs { node_id: _, updates: msgs::CommitmentUpdate { ref update_fulfill_htlcs, ref commitment_signed, .. } } => {
+                       nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &update_fulfill_htlcs[0]);
+                       expect_payment_sent(&nodes[0], payment_preimage_1, None, false, false);
+                       if monitor_update_failure {
+                               chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+                       }
+                       nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), commitment_signed);
+                       if monitor_update_failure {
+                               assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+                       } else {
+                               let _ = get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
+                       }
+                       // No commitment_signed so get_event_msg's assert(len == 1) passes
+                       check_added_monitors!(nodes[0], 1);
+               },
+               _ => panic!("Unexpected event"),
+       }
+
+       // Disconnect and reconnect the peers so that nodes[0] will
+       // need to re-send the commitment update *and then* revoke_and_ack.
+       nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id());
+       nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id());
+
+       nodes[0].node.peer_connected(&nodes[1].node.get_our_node_id(), &msgs::Init {
+               features: nodes[1].node.init_features(), networks: None, remote_network_address: None
+       }, true).unwrap();
+       let reestablish_1 = get_chan_reestablish_msgs!(nodes[0], nodes[1]);
+       assert_eq!(reestablish_1.len(), 1);
+       nodes[1].node.peer_connected(&nodes[0].node.get_our_node_id(), &msgs::Init {
+               features: nodes[0].node.init_features(), networks: None, remote_network_address: None
+       }, false).unwrap();
+       let reestablish_2 = get_chan_reestablish_msgs!(nodes[1], nodes[0]);
+       assert_eq!(reestablish_2.len(), 1);
+
+       // With a fully working signer, here we would send a commitment_signed,
+       // and then revoke_and_ack. With commitment_signed disabled, since
+       // our ordering is CS then RAA, we should make sure we don't send the RAA.
+       nodes[0].disable_channel_signer_op(&nodes[1].node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment);
+       nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &reestablish_2[0]);
+       let as_resp = handle_chan_reestablish_msgs!(nodes[0], nodes[1]);
+       assert!(as_resp.0.is_none());
+       assert!(as_resp.1.is_none());
+       assert!(as_resp.2.is_none());
+
+       if monitor_update_failure {
+               chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed);
+               let (outpoint, latest_update, _) = nodes[0].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone();
+               nodes[0].chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, latest_update);
+               check_added_monitors!(nodes[0], 0);
        }
+
+       // Make sure that on signer_unblocked we have the same behavior (even though RAA is ready,
+       // we don't send CS yet).
+       nodes[0].node.signer_unblocked(Some((nodes[1].node.get_our_node_id(), chan_id)));
+       let as_resp = handle_chan_reestablish_msgs!(nodes[0], nodes[1]);
+       assert!(as_resp.0.is_none());
+       assert!(as_resp.1.is_none());
+       assert!(as_resp.2.is_none());
+
+       nodes[0].enable_channel_signer_op(&nodes[1].node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment);
+       nodes[0].node.signer_unblocked(Some((nodes[1].node.get_our_node_id(), chan_id)));
+
+       let as_resp = handle_chan_reestablish_msgs!(nodes[0], nodes[1]);
+       nodes[1].node.handle_channel_reestablish(&nodes[0].node.get_our_node_id(), &reestablish_1[0]);
+       let bs_resp = handle_chan_reestablish_msgs!(nodes[1], nodes[0]);
+
+       assert!(as_resp.0.is_none());
+       assert!(bs_resp.0.is_none());
+
+       assert!(bs_resp.1.is_none());
+       assert!(bs_resp.2.is_none());
+
+       assert!(as_resp.3 == RAACommitmentOrder::CommitmentFirst);
+
+       // Now that everything is restored, get the CS + RAA and handle them.
+       nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &as_resp.2.as_ref().unwrap().update_add_htlcs[0]);
+       nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_resp.2.as_ref().unwrap().commitment_signed);
+       nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), as_resp.1.as_ref().unwrap());
+       let (bs_revoke_and_ack, bs_second_commitment_signed) = get_revoke_commit_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
+       check_added_monitors!(nodes[1], 2);
+
+       // The rest of this is boilerplate for resolving the previous state.
+
+       nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack);
+       let as_commitment_signed = get_htlc_update_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+       check_added_monitors!(nodes[0], 1);
+
+       nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_commitment_signed);
+       let as_revoke_and_ack = get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id());
+       // No commitment_signed so get_event_msg's assert(len == 1) passes
+       check_added_monitors!(nodes[0], 1);
+
+       nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_commitment_signed.commitment_signed);
+       let bs_second_revoke_and_ack = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+       // No commitment_signed so get_event_msg's assert(len == 1) passes
+       check_added_monitors!(nodes[1], 1);
+
+       nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_revoke_and_ack);
+       assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+       check_added_monitors!(nodes[1], 1);
+
+       nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_second_revoke_and_ack);
+       assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+       check_added_monitors!(nodes[0], 1);
+
+       expect_pending_htlcs_forwardable!(nodes[1]);
+
+       let events_5 = nodes[1].node.get_and_clear_pending_events();
+       check_payment_claimable(&events_5[0], payment_hash_2, payment_secret_2, 1_000_000, None, nodes[1].node.get_our_node_id());
+
+       expect_payment_path_successful!(nodes[0]);
+       claim_payment(&nodes[0], &[&nodes[1]], payment_preimage_2);
 }
 
 fn do_test_async_holder_signatures(anchors: bool, remote_commitment: bool) {
index 8d83bf46025abb253b1f93d3bb69c4f7bb690542..7077932f465d46444ae0d1e3b345a373068ebf86 100644 (file)
@@ -5313,12 +5313,18 @@ impl<SP: Deref> Channel<SP> where
                        };
                }
 
-               let raa = if self.context.monitor_pending_revoke_and_ack {
+               let mut raa = if self.context.monitor_pending_revoke_and_ack {
                        Some(self.get_last_revoke_and_ack())
                } else { None };
                let commitment_update = if self.context.monitor_pending_commitment_signed {
                        self.get_last_commitment_update_for_send(logger).ok()
                } else { None };
+               if self.context.resend_order == RAACommitmentOrder::CommitmentFirst
+                       && self.context.signer_pending_commitment_update && raa.is_some() {
+                       self.context.signer_pending_revoke_and_ack = true;
+                       raa = None;
+               }
+
                if commitment_update.is_some() {
                        self.mark_awaiting_response();
                }
@@ -5417,11 +5423,12 @@ impl<SP: Deref> Channel<SP> where
                        commitment_update = None;
                }
 
-               log_trace!(logger, "Signer unblocked with {} commitment_update, {} revoke_and_ack, {} funding_signed and {} channel_ready",
+               log_trace!(logger, "Signer unblocked with {} commitment_update, {} revoke_and_ack, {} funding_signed and {} channel_ready, with resend order {:?}",
                        if commitment_update.is_some() { "a" } else { "no" },
                        if revoke_and_ack.is_some() { "a" } else { "no" },
                        if funding_signed.is_some() { "a" } else { "no" },
-                       if channel_ready.is_some() { "a" } else { "no" });
+                       if channel_ready.is_some() { "a" } else { "no" },
+                       self.context.resend_order);
 
                SignerResumeUpdates {
                        commitment_update,
@@ -5704,10 +5711,18 @@ impl<SP: Deref> Channel<SP> where
                                        order: self.context.resend_order.clone(),
                                })
                        } else {
+                               let commitment_update = self.get_last_commitment_update_for_send(logger).ok();
+                               let raa = if self.context.resend_order == RAACommitmentOrder::CommitmentFirst
+                                       && self.context.signer_pending_commitment_update && required_revoke.is_some() {
+                                       log_trace!(logger, "Reconnected channel {} with lost outbound RAA and lost remote commitment tx, but unable to send due to resend order, waiting on signer for commitment update", &self.context.channel_id());
+                                       self.context.signer_pending_revoke_and_ack = true;
+                                       None
+                               } else {
+                                       required_revoke
+                               };
                                Ok(ReestablishResponses {
                                        channel_ready, shutdown_msg, announcement_sigs,
-                                       raa: required_revoke,
-                                       commitment_update: self.get_last_commitment_update_for_send(logger).ok(),
+                                       raa, commitment_update,
                                        order: self.context.resend_order.clone(),
                                })
                        }
index 622cd10e6bc0bb1df649f6e23bbf21cb3d219cb1..b97509c8bad9fbb9354480c0751057d293643caa 100644 (file)
@@ -667,7 +667,7 @@ pub(super) const MIN_HTLC_RELAY_HOLDING_CELL_MILLIS: u64 = 100;
 /// be sent in the order they appear in the return value, however sometimes the order needs to be
 /// variable at runtime (eg Channel::channel_reestablish needs to re-send messages in the order
 /// they were originally sent). In those cases, this enum is also returned.
-#[derive(Clone, PartialEq)]
+#[derive(Clone, PartialEq, Debug)]
 pub(super) enum RAACommitmentOrder {
        /// Send the CommitmentUpdate messages first
        CommitmentFirst,