From: Alec Chen Date: Sun, 30 Jun 2024 20:55:18 +0000 (-0700) Subject: Fix bug failing CS-RAA resend order on pending commitment signatures X-Git-Tag: v0.0.124-beta~58^2 X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=315193daec6f6b8d382d07e25351c6a9b55f8863;p=rust-lightning Fix bug failing CS-RAA resend order on pending commitment signatures --- diff --git a/lightning/src/ln/async_signer_tests.rs b/lightning/src/ln/async_signer_tests.rs index b6e5a222d..533be389f 100644 --- a/lightning/src/ln/async_signer_tests.rs +++ b/lightning/src/ln/async_signer_tests.rs @@ -15,11 +15,12 @@ use bitcoin::blockdata::locktime::absolute::LockTime; use bitcoin::transaction::Version; use crate::chain::channelmonitor::LATENCY_GRACE_PERIOD_BLOCKS; +use crate::chain::ChannelMonitorUpdateStatus; use crate::events::bump_transaction::WalletSource; -use crate::events::{Event, MessageSendEvent, MessageSendEventsProvider, ClosureReason}; -use crate::ln::functional_test_utils::*; +use crate::events::{ClosureReason, Event, MessageSendEvent, MessageSendEventsProvider, PaymentPurpose}; +use crate::ln::{functional_test_utils::*, msgs}; use crate::ln::msgs::ChannelMessageHandler; -use crate::ln::channelmanager::{PaymentId, RecipientOnionFields}; +use crate::ln::channelmanager::{PaymentId, RAACommitmentOrder, RecipientOnionFields}; use crate::util::test_channel_signer::SignerOp; #[test] @@ -272,7 +273,28 @@ fn test_async_commitment_signature_for_funding_signed_0conf() { } #[test] -fn test_async_commitment_signature_for_peer_disconnect() { +fn test_async_commitment_signature_peer_disconnect() { + do_test_async_commitment_signature_peer_disconnect(0); +} + +#[test] +fn test_async_commitment_signature_peer_disconnect_signer_restored_before_monitor_completion() { + // This tests that if we were pending a monitor update completion across a disconnect, + // and needed to send a CS, that if our signer becomes available before the monitor + // update completes, then we don't send duplicate messages upon calling `signer_unblocked` + // after the monitor update completes. + do_test_async_commitment_signature_peer_disconnect(1); +} + +#[test] +fn test_async_commitment_signature_peer_disconnect_signer_restored_before_reestablish() { + // This tests that if we tried to send a commitment_signed, but our signer was blocked, + // if we disconnect, reconnect, the signer becomes available, then handle channel_reestablish, + // that we don't send duplicate messages upon calling `signer_unblocked`. + do_test_async_commitment_signature_peer_disconnect(2); +} + +fn do_test_async_commitment_signature_peer_disconnect(test_case: u8) { let chanmon_cfgs = create_chanmon_cfgs(2); let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); @@ -298,35 +320,236 @@ fn test_async_commitment_signature_for_peer_disconnect() { dst.node.handle_update_add_htlc(&src.node.get_our_node_id(), &payment_event.msgs[0]); + if test_case == 1 { + // Fail to persist the monitor update when handling the commitment_signed. + chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + } + // Mark dst's signer as unavailable and handle src's commitment_signed: while dst won't yet have a // `commitment_signed` of its own to offer, it should publish a `revoke_and_ack`. dst.disable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment); dst.node.handle_commitment_signed(&src.node.get_our_node_id(), &payment_event.commitment_msg); check_added_monitors(dst, 1); - get_event_msg!(dst, MessageSendEvent::SendRevokeAndACK, src.node.get_our_node_id()); + if test_case != 1 { + get_event_msg!(dst, MessageSendEvent::SendRevokeAndACK, src.node.get_our_node_id()); + } // Now disconnect and reconnect the peers. src.node.peer_disconnected(&dst.node.get_our_node_id()); dst.node.peer_disconnected(&src.node.get_our_node_id()); - let mut reconnect_args = ReconnectArgs::new(&nodes[0], &nodes[1]); - reconnect_args.send_channel_ready = (false, false); - reconnect_args.pending_raa = (true, false); - reconnect_nodes(reconnect_args); + + // do reestablish stuff + src.node.peer_connected(&dst.node.get_our_node_id(), &msgs::Init { + features: dst.node.init_features(), networks: None, remote_network_address: None + }, true).unwrap(); + let reestablish_1 = get_chan_reestablish_msgs!(src, dst); + assert_eq!(reestablish_1.len(), 1); + dst.node.peer_connected(&src.node.get_our_node_id(), &msgs::Init { + features: src.node.init_features(), networks: None, remote_network_address: None + }, false).unwrap(); + let reestablish_2 = get_chan_reestablish_msgs!(dst, src); + assert_eq!(reestablish_2.len(), 1); + + if test_case == 2 { + // Reenable the signer before the reestablish. + dst.enable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment); + } + + dst.node.handle_channel_reestablish(&src.node.get_our_node_id(), &reestablish_1[0]); + + if test_case == 1 { + dst.enable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment); + chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed); + let (outpoint, latest_update, _) = dst.chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone(); + dst.chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, latest_update); + check_added_monitors!(dst, 0); + } + + // Expect the RAA + let (_, revoke_and_ack, commitment_signed, _) = handle_chan_reestablish_msgs!(dst, src); + assert!(revoke_and_ack.is_some()); + if test_case == 0 { + assert!(commitment_signed.is_none()); + } else { + assert!(commitment_signed.is_some()); + } // Mark dst's signer as available and retry: we now expect to see dst's `commitment_signed`. dst.enable_channel_signer_op(&src.node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment); dst.node.signer_unblocked(Some((src.node.get_our_node_id(), chan_id))); - { - let events = dst.node.get_and_clear_pending_msg_events(); - assert_eq!(events.len(), 1, "expected one message, got {}", events.len()); - if let MessageSendEvent::UpdateHTLCs { ref node_id, .. } = events[0] { - assert_eq!(node_id, &src.node.get_our_node_id()); - } else { - panic!("expected UpdateHTLCs message, not {:?}", events[0]); - }; + if test_case == 0 { + let (_, _, commitment_signed, _) = handle_chan_reestablish_msgs!(dst, src); + assert!(commitment_signed.is_some()); + } else { + // Make sure we don't double send the CS. + let (_, _, commitment_signed, _) = handle_chan_reestablish_msgs!(dst, src); + assert!(commitment_signed.is_none()); + } +} + +#[test] +fn test_async_commitment_signature_ordering_reestablish() { + do_test_async_commitment_signature_ordering(false); +} + +#[test] +fn test_async_commitment_signature_ordering_monitor_restored() { + do_test_async_commitment_signature_ordering(true); +} + +fn do_test_async_commitment_signature_ordering(monitor_update_failure: bool) { + // Across disconnects we may end up in a situation where we need to send a + // commitment_signed and then revoke_and_ack. We need to make sure that if + // the signer is pending for commitment_signed but not revoke_and_ack, we don't + // screw up the order by sending the revoke_and_ack first. + // + // We test this for both the case where we send messages after a channel + // reestablish, as well as restoring a channel after persisting + // a monitor update. + // + // The set up for this test is based on + // `test_drop_messages_peer_disconnect_dual_htlc`. + let chanmon_cfgs = create_chanmon_cfgs(2); + let node_cfgs = create_node_cfgs(2, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]); + let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs); + let (_, _, chan_id, _) = create_announced_chan_between_nodes(&nodes, 0, 1); + + let (payment_preimage_1, payment_hash_1, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000); + + // Start to send the second update_add_htlc + commitment_signed, but don't actually make it + // to the peer. + let (route, payment_hash_2, payment_preimage_2, payment_secret_2) = get_route_and_payment_hash!(nodes[0], nodes[1], 1000000); + nodes[0].node.send_payment_with_route(&route, payment_hash_2, + RecipientOnionFields::secret_only(payment_secret_2), PaymentId(payment_hash_2.0)).unwrap(); + check_added_monitors!(nodes[0], 1); + + get_htlc_update_msgs(&nodes[0], &nodes[1].node.get_our_node_id()); + + // Send back update_fulfill_htlc + commitment_signed for the first payment. + nodes[1].node.claim_funds(payment_preimage_1); + expect_payment_claimed!(nodes[1], payment_hash_1, 1_000_000); + check_added_monitors!(nodes[1], 1); + + // Handle the update_fulfill_htlc, but fail to persist the monitor update when handling the + // commitment_signed. + let events_2 = nodes[1].node.get_and_clear_pending_msg_events(); + assert_eq!(events_2.len(), 1); + match events_2[0] { + MessageSendEvent::UpdateHTLCs { node_id: _, updates: msgs::CommitmentUpdate { ref update_fulfill_htlcs, ref commitment_signed, .. } } => { + nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &update_fulfill_htlcs[0]); + expect_payment_sent(&nodes[0], payment_preimage_1, None, false, false); + if monitor_update_failure { + chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress); + } + nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), commitment_signed); + if monitor_update_failure { + assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty()); + } else { + let _ = get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id()); + } + // No commitment_signed so get_event_msg's assert(len == 1) passes + check_added_monitors!(nodes[0], 1); + }, + _ => panic!("Unexpected event"), + } + + // Disconnect and reconnect the peers so that nodes[0] will + // need to re-send the commitment update *and then* revoke_and_ack. + nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id()); + nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id()); + + nodes[0].node.peer_connected(&nodes[1].node.get_our_node_id(), &msgs::Init { + features: nodes[1].node.init_features(), networks: None, remote_network_address: None + }, true).unwrap(); + let reestablish_1 = get_chan_reestablish_msgs!(nodes[0], nodes[1]); + assert_eq!(reestablish_1.len(), 1); + nodes[1].node.peer_connected(&nodes[0].node.get_our_node_id(), &msgs::Init { + features: nodes[0].node.init_features(), networks: None, remote_network_address: None + }, false).unwrap(); + let reestablish_2 = get_chan_reestablish_msgs!(nodes[1], nodes[0]); + assert_eq!(reestablish_2.len(), 1); + + // With a fully working signer, here we would send a commitment_signed, + // and then revoke_and_ack. With commitment_signed disabled, since + // our ordering is CS then RAA, we should make sure we don't send the RAA. + nodes[0].disable_channel_signer_op(&nodes[1].node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment); + nodes[0].node.handle_channel_reestablish(&nodes[1].node.get_our_node_id(), &reestablish_2[0]); + let as_resp = handle_chan_reestablish_msgs!(nodes[0], nodes[1]); + assert!(as_resp.0.is_none()); + assert!(as_resp.1.is_none()); + assert!(as_resp.2.is_none()); + + if monitor_update_failure { + chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::Completed); + let (outpoint, latest_update, _) = nodes[0].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id).unwrap().clone(); + nodes[0].chain_monitor.chain_monitor.force_channel_monitor_updated(outpoint, latest_update); + check_added_monitors!(nodes[0], 0); } + + // Make sure that on signer_unblocked we have the same behavior (even though RAA is ready, + // we don't send CS yet). + nodes[0].node.signer_unblocked(Some((nodes[1].node.get_our_node_id(), chan_id))); + let as_resp = handle_chan_reestablish_msgs!(nodes[0], nodes[1]); + assert!(as_resp.0.is_none()); + assert!(as_resp.1.is_none()); + assert!(as_resp.2.is_none()); + + nodes[0].enable_channel_signer_op(&nodes[1].node.get_our_node_id(), &chan_id, SignerOp::SignCounterpartyCommitment); + nodes[0].node.signer_unblocked(Some((nodes[1].node.get_our_node_id(), chan_id))); + + let as_resp = handle_chan_reestablish_msgs!(nodes[0], nodes[1]); + nodes[1].node.handle_channel_reestablish(&nodes[0].node.get_our_node_id(), &reestablish_1[0]); + let bs_resp = handle_chan_reestablish_msgs!(nodes[1], nodes[0]); + + assert!(as_resp.0.is_none()); + assert!(bs_resp.0.is_none()); + + assert!(bs_resp.1.is_none()); + assert!(bs_resp.2.is_none()); + + assert!(as_resp.3 == RAACommitmentOrder::CommitmentFirst); + + // Now that everything is restored, get the CS + RAA and handle them. + nodes[1].node.handle_update_add_htlc(&nodes[0].node.get_our_node_id(), &as_resp.2.as_ref().unwrap().update_add_htlcs[0]); + nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_resp.2.as_ref().unwrap().commitment_signed); + nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), as_resp.1.as_ref().unwrap()); + let (bs_revoke_and_ack, bs_second_commitment_signed) = get_revoke_commit_msgs(&nodes[1], &nodes[0].node.get_our_node_id()); + check_added_monitors!(nodes[1], 2); + + // The rest of this is boilerplate for resolving the previous state. + + nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_revoke_and_ack); + let as_commitment_signed = get_htlc_update_msgs!(nodes[0], nodes[1].node.get_our_node_id()); + check_added_monitors!(nodes[0], 1); + + nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_second_commitment_signed); + let as_revoke_and_ack = get_event_msg!(nodes[0], MessageSendEvent::SendRevokeAndACK, nodes[1].node.get_our_node_id()); + // No commitment_signed so get_event_msg's assert(len == 1) passes + check_added_monitors!(nodes[0], 1); + + nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_commitment_signed.commitment_signed); + let bs_second_revoke_and_ack = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id()); + // No commitment_signed so get_event_msg's assert(len == 1) passes + check_added_monitors!(nodes[1], 1); + + nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_revoke_and_ack); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + check_added_monitors!(nodes[1], 1); + + nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_second_revoke_and_ack); + assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty()); + check_added_monitors!(nodes[0], 1); + + expect_pending_htlcs_forwardable!(nodes[1]); + + let events_5 = nodes[1].node.get_and_clear_pending_events(); + check_payment_claimable(&events_5[0], payment_hash_2, payment_secret_2, 1_000_000, None, nodes[1].node.get_our_node_id()); + + expect_payment_path_successful!(nodes[0]); + claim_payment(&nodes[0], &[&nodes[1]], payment_preimage_2); } fn do_test_async_holder_signatures(anchors: bool, remote_commitment: bool) { diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 8d83bf460..7077932f4 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -5313,12 +5313,18 @@ impl Channel where }; } - let raa = if self.context.monitor_pending_revoke_and_ack { + let mut raa = if self.context.monitor_pending_revoke_and_ack { Some(self.get_last_revoke_and_ack()) } else { None }; let commitment_update = if self.context.monitor_pending_commitment_signed { self.get_last_commitment_update_for_send(logger).ok() } else { None }; + if self.context.resend_order == RAACommitmentOrder::CommitmentFirst + && self.context.signer_pending_commitment_update && raa.is_some() { + self.context.signer_pending_revoke_and_ack = true; + raa = None; + } + if commitment_update.is_some() { self.mark_awaiting_response(); } @@ -5417,11 +5423,12 @@ impl Channel where commitment_update = None; } - log_trace!(logger, "Signer unblocked with {} commitment_update, {} revoke_and_ack, {} funding_signed and {} channel_ready", + log_trace!(logger, "Signer unblocked with {} commitment_update, {} revoke_and_ack, {} funding_signed and {} channel_ready, with resend order {:?}", if commitment_update.is_some() { "a" } else { "no" }, if revoke_and_ack.is_some() { "a" } else { "no" }, if funding_signed.is_some() { "a" } else { "no" }, - if channel_ready.is_some() { "a" } else { "no" }); + if channel_ready.is_some() { "a" } else { "no" }, + self.context.resend_order); SignerResumeUpdates { commitment_update, @@ -5704,10 +5711,18 @@ impl Channel where order: self.context.resend_order.clone(), }) } else { + let commitment_update = self.get_last_commitment_update_for_send(logger).ok(); + let raa = if self.context.resend_order == RAACommitmentOrder::CommitmentFirst + && self.context.signer_pending_commitment_update && required_revoke.is_some() { + log_trace!(logger, "Reconnected channel {} with lost outbound RAA and lost remote commitment tx, but unable to send due to resend order, waiting on signer for commitment update", &self.context.channel_id()); + self.context.signer_pending_revoke_and_ack = true; + None + } else { + required_revoke + }; Ok(ReestablishResponses { channel_ready, shutdown_msg, announcement_sigs, - raa: required_revoke, - commitment_update: self.get_last_commitment_update_for_send(logger).ok(), + raa, commitment_update, order: self.context.resend_order.clone(), }) } diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 622cd10e6..b97509c8b 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -667,7 +667,7 @@ pub(super) const MIN_HTLC_RELAY_HOLDING_CELL_MILLIS: u64 = 100; /// be sent in the order they appear in the return value, however sometimes the order needs to be /// variable at runtime (eg Channel::channel_reestablish needs to re-send messages in the order /// they were originally sent). In those cases, this enum is also returned. -#[derive(Clone, PartialEq)] +#[derive(Clone, PartialEq, Debug)] pub(super) enum RAACommitmentOrder { /// Send the CommitmentUpdate messages first CommitmentFirst,