From: Matt Corallo Date: Mon, 29 Jul 2019 17:45:35 +0000 (-0400) Subject: Handle monitor update failure during funding on the fundee side X-Git-Tag: v0.0.12~194^2 X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=8ba352952242421041cef4019d0bf2172e712986;p=rust-lightning Handle monitor update failure during funding on the fundee side This carries a surprising amount of complexity despite only being possible in the case where monitor updating failed during the processing of funding_generated. Specifically, this requires handling rebroadcasting funding_locked once we successfully persist our monitor again. As an alternative we could never send funding_signed when the monitor failed to persist, but this approach avoids needless delays during funding. --- diff --git a/src/ln/chanmon_update_fail_tests.rs b/src/ln/chanmon_update_fail_tests.rs index 4803b0054..137d98346 100644 --- a/src/ln/chanmon_update_fail_tests.rs +++ b/src/ln/chanmon_update_fail_tests.rs @@ -1557,7 +1557,9 @@ fn monitor_update_claim_fail_no_response() { // Note that restore_between_fails with !fail_on_generate is useless // Also note that !fail_on_generate && !fail_on_signed is useless // Finally, note that !fail_on_signed is not possible with fail_on_generate && !restore_between_fails -fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: bool, fail_on_signed: bool) { +// confirm_a_first and restore_b_before_conf are wholly unrelated to earlier bools and +// restore_b_before_conf has no meaning if !confirm_a_first +fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: bool, fail_on_signed: bool, confirm_a_first: bool, restore_b_before_conf: bool) { // Test that if the monitor update generated by funding_transaction_generated fails we continue // the channel setup happily after the update is restored. let mut nodes = create_network(2, &[None, None]); @@ -1574,6 +1576,7 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: nodes[0].node.funding_transaction_generated(&temporary_channel_id, funding_output); check_added_monitors!(nodes[0], 1); + *nodes[1].chan_monitor.update_ret.lock().unwrap() = Err(ChannelMonitorUpdateErr::TemporaryFailure); nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id())).unwrap(); check_added_monitors!(nodes[1], 1); @@ -1623,8 +1626,45 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: _ => panic!("Unexpected event"), }; - let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm(&nodes[0], &nodes[1], &funding_tx); - let (announcement, as_update, bs_update) = create_chan_between_nodes_with_value_b(&nodes[0], &nodes[1], &funding_locked); + if confirm_a_first { + confirm_transaction(&nodes[0].chain_monitor, &funding_tx, funding_tx.version); + nodes[1].node.handle_funding_locked(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendFundingLocked, nodes[1].node.get_our_node_id())).unwrap(); + } else { + assert!(!restore_b_before_conf); + confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + } + + // Make sure nodes[1] isn't stupid enough to re-send the FundingLocked on reconnect + nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false); + nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false); + reconnect_nodes(&nodes[0], &nodes[1], (false, confirm_a_first), (0, 0), (0, 0), (0, 0), (0, 0), (false, false)); + assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty()); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + + if !restore_b_before_conf { + confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version); + assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty()); + assert!(nodes[1].node.get_and_clear_pending_events().is_empty()); + } + + *nodes[1].chan_monitor.update_ret.lock().unwrap() = Ok(()); + nodes[1].node.test_restore_channel_monitor(); + check_added_monitors!(nodes[1], 1); + + let (channel_id, (announcement, as_update, bs_update)) = if !confirm_a_first { + nodes[0].node.handle_funding_locked(&nodes[1].node.get_our_node_id(), &get_event_msg!(nodes[1], MessageSendEvent::SendFundingLocked, nodes[0].node.get_our_node_id())).unwrap(); + + confirm_transaction(&nodes[0].chain_monitor, &funding_tx, funding_tx.version); + let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm_second(&nodes[1], &nodes[0]); + (channel_id, create_chan_between_nodes_with_value_b(&nodes[0], &nodes[1], &funding_locked)) + } else { + if restore_b_before_conf { + confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version); + } + let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm_second(&nodes[0], &nodes[1]); + (channel_id, create_chan_between_nodes_with_value_b(&nodes[1], &nodes[0], &funding_locked)) + }; for node in nodes.iter() { assert!(node.router.handle_channel_announcement(&announcement).unwrap()); node.router.handle_channel_update(&as_update).unwrap(); @@ -1637,8 +1677,8 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: #[test] fn during_funding_monitor_fail() { - do_during_funding_monitor_fail(false, false, true); - do_during_funding_monitor_fail(true, false, true); - do_during_funding_monitor_fail(true, true, true); - do_during_funding_monitor_fail(true, true, false); + do_during_funding_monitor_fail(false, false, true, true, true); + do_during_funding_monitor_fail(true, false, true, false, false); + do_during_funding_monitor_fail(true, true, true, true, false); + do_during_funding_monitor_fail(true, true, false, false, false); } diff --git a/src/ln/channel.rs b/src/ln/channel.rs index 6dd7e8c82..bcc91508f 100644 --- a/src/ln/channel.rs +++ b/src/ln/channel.rs @@ -181,9 +181,9 @@ enum ChannelState { /// "disconnected" and no updates are allowed until after we've done a channel_reestablish /// dance. PeerDisconnected = (1 << 7), - /// Flag which is set on ChannelFunded and FundingSent indicating the user has told us they - /// failed to update our ChannelMonitor somewhere and we should pause sending any outbound - /// messages until they've managed to do so. + /// Flag which is set on ChannelFunded, FundingCreated, and FundingSent indicating the user has + /// told us they failed to update our ChannelMonitor somewhere and we should pause sending any + /// outbound messages until they've managed to do so. MonitorUpdateFailed = (1 << 8), /// Flag which implies that we have sent a commitment_signed but are awaiting the responding /// revoke_and_ack message. During this time period, we can't generate new commitment_signed @@ -248,6 +248,7 @@ pub(super) struct Channel { /// send it first. resend_order: RAACommitmentOrder, + monitor_pending_funding_locked: bool, monitor_pending_revoke_and_ack: bool, monitor_pending_commitment_signed: bool, monitor_pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>, @@ -457,6 +458,7 @@ impl Channel { resend_order: RAACommitmentOrder::CommitmentFirst, + monitor_pending_funding_locked: false, monitor_pending_revoke_and_ack: false, monitor_pending_commitment_signed: false, monitor_pending_forwards: Vec::new(), @@ -672,6 +674,7 @@ impl Channel { resend_order: RAACommitmentOrder::CommitmentFirst, + monitor_pending_funding_locked: false, monitor_pending_revoke_and_ack: false, monitor_pending_commitment_signed: false, monitor_pending_forwards: Vec::new(), @@ -2352,11 +2355,29 @@ impl Channel { /// Indicates that the latest ChannelMonitor update has been committed by the client /// successfully and we should restore normal operation. Returns messages which should be sent /// to the remote side. - pub fn monitor_updating_restored(&mut self) -> (Option, Option, RAACommitmentOrder, Vec<(PendingForwardHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool) { + pub fn monitor_updating_restored(&mut self) -> (Option, Option, RAACommitmentOrder, Vec<(PendingForwardHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option) { assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32); self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32); + let needs_broadcast_safe = self.channel_state & (ChannelState::FundingSent as u32) != 0 && self.channel_outbound; + // Because we will never generate a FundingBroadcastSafe event when we're in + // MonitorUpdateFailed, if we assume the user only broadcast the funding transaction when + // they received the FundingBroadcastSafe event, we can only ever hit + // monitor_pending_funding_locked when we're an inbound channel which failed to persist the + // monitor on funding_created, and we even got the funding transaction confirmed before the + // monitor was persisted. + let funding_locked = if self.monitor_pending_funding_locked { + assert!(!self.channel_outbound, "Funding transaction broadcast without FundingBroadcastSafe!"); + self.monitor_pending_funding_locked = false; + let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number); + let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret); + Some(msgs::FundingLocked { + channel_id: self.channel_id(), + next_per_commitment_point: next_per_commitment_point, + }) + } else { None }; + let mut forwards = Vec::new(); mem::swap(&mut forwards, &mut self.monitor_pending_forwards); let mut failures = Vec::new(); @@ -2365,7 +2386,7 @@ impl Channel { if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 { self.monitor_pending_revoke_and_ack = false; self.monitor_pending_commitment_signed = false; - return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe); + return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked); } let raa = if self.monitor_pending_revoke_and_ack { @@ -2383,7 +2404,7 @@ impl Channel { if commitment_update.is_some() { "a" } else { "no" }, if raa.is_some() { "an" } else { "no" }, match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"}); - (raa, commitment_update, order, forwards, failures, needs_broadcast_safe) + (raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked) } pub fn update_fee(&mut self, fee_estimator: &FeeEstimator, msg: &msgs::UpdateFee) -> Result<(), ChannelError> { @@ -2493,7 +2514,9 @@ impl Channel { } else { None }; if self.channel_state & (ChannelState::FundingSent as u32) == ChannelState::FundingSent as u32 { - if self.channel_state & ChannelState::OurFundingLocked as u32 == 0 { + // If we're waiting on a monitor update, we shouldn't re-send any funding_locked's. + if self.channel_state & (ChannelState::OurFundingLocked as u32) == 0 || + self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 { if msg.next_remote_commitment_number != 0 { return Err(ChannelError::Close("Peer claimed they saw a revoke_and_ack but we haven't sent funding_locked yet")); } @@ -2984,12 +3007,17 @@ impl Channel { //they can by sending two revoke_and_acks back-to-back, but not really). This appears to be //a protocol oversight, but I assume I'm just missing something. if need_commitment_update { - let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number); - let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret); - return Ok(Some(msgs::FundingLocked { - channel_id: self.channel_id, - next_per_commitment_point: next_per_commitment_point, - })); + if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 { + let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number); + let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret); + return Ok(Some(msgs::FundingLocked { + channel_id: self.channel_id, + next_per_commitment_point: next_per_commitment_point, + })); + } else { + self.monitor_pending_funding_locked = true; + return Ok(None); + } } } } @@ -3708,6 +3736,7 @@ impl Writeable for Channel { RAACommitmentOrder::RevokeAndACKFirst => 1u8.write(writer)?, } + self.monitor_pending_funding_locked.write(writer)?; self.monitor_pending_revoke_and_ack.write(writer)?; self.monitor_pending_commitment_signed.write(writer)?; @@ -3875,6 +3904,7 @@ impl ReadableArgs> for Channel { _ => return Err(DecodeError::InvalidValue), }; + let monitor_pending_funding_locked = Readable::read(reader)?; let monitor_pending_revoke_and_ack = Readable::read(reader)?; let monitor_pending_commitment_signed = Readable::read(reader)?; @@ -3971,6 +4001,7 @@ impl ReadableArgs> for Channel { resend_order, + monitor_pending_funding_locked, monitor_pending_revoke_and_ack, monitor_pending_commitment_signed, monitor_pending_forwards, diff --git a/src/ln/channelmanager.rs b/src/ln/channelmanager.rs index 3336aa31d..afabcd7cc 100644 --- a/src/ln/channelmanager.rs +++ b/src/ln/channelmanager.rs @@ -1684,7 +1684,7 @@ impl ChannelManager { ChannelMonitorUpdateErr::TemporaryFailure => true, } } else { - let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe) = channel.monitor_updating_restored(); + let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe, funding_locked) = channel.monitor_updating_restored(); if !pending_forwards.is_empty() { htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), pending_forwards)); } @@ -1722,6 +1722,19 @@ impl ChannelManager { user_channel_id: channel.get_user_id(), }); } + if let Some(msg) = funding_locked { + pending_msg_events.push(events::MessageSendEvent::SendFundingLocked { + node_id: channel.get_their_node_id(), + msg, + }); + if let Some(announcement_sigs) = self.get_announcement_sigs(channel) { + pending_msg_events.push(events::MessageSendEvent::SendAnnouncementSignatures { + node_id: channel.get_their_node_id(), + msg: announcement_sigs, + }); + } + short_to_id.insert(channel.get_short_channel_id().unwrap(), channel.channel_id()); + } true } } else { true } @@ -1790,7 +1803,7 @@ impl ChannelManager { } fn internal_funding_created(&self, their_node_id: &PublicKey, msg: &msgs::FundingCreated) -> Result<(), MsgHandleErrInternal> { - let ((funding_msg, monitor_update), chan) = { + let ((funding_msg, monitor_update), mut chan) = { let mut channel_lock = self.channel_state.lock().unwrap(); let channel_state = channel_lock.borrow_parts(); match channel_state.by_id.entry(msg.temporary_channel_id.clone()) { @@ -1806,8 +1819,23 @@ impl ChannelManager { }; // Because we have exclusive ownership of the channel here we can release the channel_state // lock before add_update_monitor - if let Err(_e) = self.monitor.add_update_monitor(monitor_update.get_funding_txo().unwrap(), monitor_update) { - unimplemented!(); + if let Err(e) = self.monitor.add_update_monitor(monitor_update.get_funding_txo().unwrap(), monitor_update) { + match e { + ChannelMonitorUpdateErr::PermanentFailure => { + // Note that we reply with the new channel_id in error messages if we gave up on the + // channel, not the temporary_channel_id. This is compatible with ourselves, but the + // spec is somewhat ambiguous here. Not a huge deal since we'll send error messages for + // any messages referencing a previously-closed channel anyway. + return Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", funding_msg.channel_id, chan.force_shutdown(), None)); + }, + ChannelMonitorUpdateErr::TemporaryFailure => { + // There's no problem signing a counterparty's funding transaction if our monitor + // hasn't persisted to disk yet - we can't lose money on a transaction that we haven't + // accepted payment from yet. We do, however, need to wait to send our funding_locked + // until we have persisted our monitor. + chan.monitor_update_failed(false, false, Vec::new(), Vec::new()); + }, + } } let mut channel_state_lock = self.channel_state.lock().unwrap(); let channel_state = channel_state_lock.borrow_parts(); diff --git a/src/ln/functional_test_utils.rs b/src/ln/functional_test_utils.rs index 5ea6961bd..001da7009 100644 --- a/src/ln/functional_test_utils.rs +++ b/src/ln/functional_test_utils.rs @@ -221,31 +221,37 @@ pub fn create_chan_between_nodes_with_value_init(node_a: &Node, node_b: &Node, c tx } -pub fn create_chan_between_nodes_with_value_confirm(node_a: &Node, node_b: &Node, tx: &Transaction) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) { - confirm_transaction(&node_b.chain_monitor, &tx, tx.version); - node_a.node.handle_funding_locked(&node_b.node.get_our_node_id(), &get_event_msg!(node_b, MessageSendEvent::SendFundingLocked, node_a.node.get_our_node_id())).unwrap(); +pub fn create_chan_between_nodes_with_value_confirm_first(node_recv: &Node, node_conf: &Node, tx: &Transaction) { + confirm_transaction(&node_conf.chain_monitor, &tx, tx.version); + node_recv.node.handle_funding_locked(&node_conf.node.get_our_node_id(), &get_event_msg!(node_conf, MessageSendEvent::SendFundingLocked, node_recv.node.get_our_node_id())).unwrap(); +} +pub fn create_chan_between_nodes_with_value_confirm_second(node_recv: &Node, node_conf: &Node) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) { let channel_id; - - confirm_transaction(&node_a.chain_monitor, &tx, tx.version); - let events_6 = node_a.node.get_and_clear_pending_msg_events(); + let events_6 = node_conf.node.get_and_clear_pending_msg_events(); assert_eq!(events_6.len(), 2); ((match events_6[0] { MessageSendEvent::SendFundingLocked { ref node_id, ref msg } => { channel_id = msg.channel_id.clone(); - assert_eq!(*node_id, node_b.node.get_our_node_id()); + assert_eq!(*node_id, node_recv.node.get_our_node_id()); msg.clone() }, _ => panic!("Unexpected event"), }, match events_6[1] { MessageSendEvent::SendAnnouncementSignatures { ref node_id, ref msg } => { - assert_eq!(*node_id, node_b.node.get_our_node_id()); + assert_eq!(*node_id, node_recv.node.get_our_node_id()); msg.clone() }, _ => panic!("Unexpected event"), }), channel_id) } +pub fn create_chan_between_nodes_with_value_confirm(node_a: &Node, node_b: &Node, tx: &Transaction) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) { + create_chan_between_nodes_with_value_confirm_first(node_a, node_b, tx); + confirm_transaction(&node_a.chain_monitor, &tx, tx.version); + create_chan_between_nodes_with_value_confirm_second(node_b, node_a) +} + pub fn create_chan_between_nodes_with_value_a(node_a: &Node, node_b: &Node, channel_value: u64, push_msat: u64, a_flags: LocalFeatures, b_flags: LocalFeatures) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32], Transaction) { let tx = create_chan_between_nodes_with_value_init(node_a, node_b, channel_value, push_msat, a_flags, b_flags); let (msgs, chan_id) = create_chan_between_nodes_with_value_confirm(node_a, node_b, &tx);