Handle monitor update failure during funding on the fundee side 2019-07-no-unimpl
authorMatt Corallo <git@bluematt.me>
Mon, 29 Jul 2019 17:45:35 +0000 (13:45 -0400)
committerMatt Corallo <git@bluematt.me>
Mon, 29 Jul 2019 17:45:35 +0000 (13:45 -0400)
This carries a surprising amount of complexity despite only being
possible in the case where monitor updating failed during the
processing of funding_generated. Specifically, this requires
handling rebroadcasting funding_locked once we successfully persist
our monitor again.

As an alternative we could never send funding_signed when the
monitor failed to persist, but this approach avoids needless
delays during funding.

src/ln/chanmon_update_fail_tests.rs
src/ln/channel.rs
src/ln/channelmanager.rs
src/ln/functional_test_utils.rs

index 4803b0054e5d466bc6aaea5c4f43489681bac9f9..137d983465e339e43c7401562db38ee0f9eff932 100644 (file)
@@ -1557,7 +1557,9 @@ fn monitor_update_claim_fail_no_response() {
 // Note that restore_between_fails with !fail_on_generate is useless
 // Also note that !fail_on_generate && !fail_on_signed is useless
 // Finally, note that !fail_on_signed is not possible with fail_on_generate && !restore_between_fails
-fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: bool, fail_on_signed: bool) {
+// confirm_a_first and restore_b_before_conf are wholly unrelated to earlier bools and
+// restore_b_before_conf has no meaning if !confirm_a_first
+fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails: bool, fail_on_signed: bool, confirm_a_first: bool, restore_b_before_conf: bool) {
        // Test that if the monitor update generated by funding_transaction_generated fails we continue
        // the channel setup happily after the update is restored.
        let mut nodes = create_network(2, &[None, None]);
@@ -1574,6 +1576,7 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails:
        nodes[0].node.funding_transaction_generated(&temporary_channel_id, funding_output);
        check_added_monitors!(nodes[0], 1);
 
+       *nodes[1].chan_monitor.update_ret.lock().unwrap() = Err(ChannelMonitorUpdateErr::TemporaryFailure);
        nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id())).unwrap();
        check_added_monitors!(nodes[1], 1);
 
@@ -1623,8 +1626,45 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails:
                _ => panic!("Unexpected event"),
        };
 
-       let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm(&nodes[0], &nodes[1], &funding_tx);
-       let (announcement, as_update, bs_update) = create_chan_between_nodes_with_value_b(&nodes[0], &nodes[1], &funding_locked);
+       if confirm_a_first {
+               confirm_transaction(&nodes[0].chain_monitor, &funding_tx, funding_tx.version);
+               nodes[1].node.handle_funding_locked(&nodes[0].node.get_our_node_id(), &get_event_msg!(nodes[0], MessageSendEvent::SendFundingLocked, nodes[1].node.get_our_node_id())).unwrap();
+       } else {
+               assert!(!restore_b_before_conf);
+               confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version);
+               assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+       }
+
+       // Make sure nodes[1] isn't stupid enough to re-send the FundingLocked on reconnect
+       nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), false);
+       nodes[1].node.peer_disconnected(&nodes[0].node.get_our_node_id(), false);
+       reconnect_nodes(&nodes[0], &nodes[1], (false, confirm_a_first), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+       assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+       assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+
+       if !restore_b_before_conf {
+               confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version);
+               assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+               assert!(nodes[1].node.get_and_clear_pending_events().is_empty());
+       }
+
+       *nodes[1].chan_monitor.update_ret.lock().unwrap() = Ok(());
+       nodes[1].node.test_restore_channel_monitor();
+       check_added_monitors!(nodes[1], 1);
+
+       let (channel_id, (announcement, as_update, bs_update)) = if !confirm_a_first {
+               nodes[0].node.handle_funding_locked(&nodes[1].node.get_our_node_id(), &get_event_msg!(nodes[1], MessageSendEvent::SendFundingLocked, nodes[0].node.get_our_node_id())).unwrap();
+
+               confirm_transaction(&nodes[0].chain_monitor, &funding_tx, funding_tx.version);
+               let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm_second(&nodes[1], &nodes[0]);
+               (channel_id, create_chan_between_nodes_with_value_b(&nodes[0], &nodes[1], &funding_locked))
+       } else {
+               if restore_b_before_conf {
+                       confirm_transaction(&nodes[1].chain_monitor, &funding_tx, funding_tx.version);
+               }
+               let (funding_locked, channel_id) = create_chan_between_nodes_with_value_confirm_second(&nodes[0], &nodes[1]);
+               (channel_id, create_chan_between_nodes_with_value_b(&nodes[1], &nodes[0], &funding_locked))
+       };
        for node in nodes.iter() {
                assert!(node.router.handle_channel_announcement(&announcement).unwrap());
                node.router.handle_channel_update(&as_update).unwrap();
@@ -1637,8 +1677,8 @@ fn do_during_funding_monitor_fail(fail_on_generate: bool, restore_between_fails:
 
 #[test]
 fn during_funding_monitor_fail() {
-       do_during_funding_monitor_fail(false, false, true);
-       do_during_funding_monitor_fail(true, false, true);
-       do_during_funding_monitor_fail(true, true, true);
-       do_during_funding_monitor_fail(true, true, false);
+       do_during_funding_monitor_fail(false, false, true, true, true);
+       do_during_funding_monitor_fail(true, false, true, false, false);
+       do_during_funding_monitor_fail(true, true, true, true, false);
+       do_during_funding_monitor_fail(true, true, false, false, false);
 }
index 6dd7e8c82ddb285748586635e424d6a01f6016df..bcc91508f9bfc6f444bec5e94594ccee671d7e1c 100644 (file)
@@ -181,9 +181,9 @@ enum ChannelState {
        /// "disconnected" and no updates are allowed until after we've done a channel_reestablish
        /// dance.
        PeerDisconnected = (1 << 7),
-       /// Flag which is set on ChannelFunded and FundingSent indicating the user has told us they
-       /// failed to update our ChannelMonitor somewhere and we should pause sending any outbound
-       /// messages until they've managed to do so.
+       /// Flag which is set on ChannelFunded, FundingCreated, and FundingSent indicating the user has
+       /// told us they failed to update our ChannelMonitor somewhere and we should pause sending any
+       /// outbound messages until they've managed to do so.
        MonitorUpdateFailed = (1 << 8),
        /// Flag which implies that we have sent a commitment_signed but are awaiting the responding
        /// revoke_and_ack message. During this time period, we can't generate new commitment_signed
@@ -248,6 +248,7 @@ pub(super) struct Channel {
        /// send it first.
        resend_order: RAACommitmentOrder,
 
+       monitor_pending_funding_locked: bool,
        monitor_pending_revoke_and_ack: bool,
        monitor_pending_commitment_signed: bool,
        monitor_pending_forwards: Vec<(PendingForwardHTLCInfo, u64)>,
@@ -457,6 +458,7 @@ impl Channel {
 
                        resend_order: RAACommitmentOrder::CommitmentFirst,
 
+                       monitor_pending_funding_locked: false,
                        monitor_pending_revoke_and_ack: false,
                        monitor_pending_commitment_signed: false,
                        monitor_pending_forwards: Vec::new(),
@@ -672,6 +674,7 @@ impl Channel {
 
                        resend_order: RAACommitmentOrder::CommitmentFirst,
 
+                       monitor_pending_funding_locked: false,
                        monitor_pending_revoke_and_ack: false,
                        monitor_pending_commitment_signed: false,
                        monitor_pending_forwards: Vec::new(),
@@ -2352,11 +2355,29 @@ impl Channel {
        /// Indicates that the latest ChannelMonitor update has been committed by the client
        /// successfully and we should restore normal operation. Returns messages which should be sent
        /// to the remote side.
-       pub fn monitor_updating_restored(&mut self) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingForwardHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool) {
+       pub fn monitor_updating_restored(&mut self) -> (Option<msgs::RevokeAndACK>, Option<msgs::CommitmentUpdate>, RAACommitmentOrder, Vec<(PendingForwardHTLCInfo, u64)>, Vec<(HTLCSource, PaymentHash, HTLCFailReason)>, bool, Option<msgs::FundingLocked>) {
                assert_eq!(self.channel_state & ChannelState::MonitorUpdateFailed as u32, ChannelState::MonitorUpdateFailed as u32);
                self.channel_state &= !(ChannelState::MonitorUpdateFailed as u32);
+
                let needs_broadcast_safe = self.channel_state & (ChannelState::FundingSent as u32) != 0 && self.channel_outbound;
 
+               // Because we will never generate a FundingBroadcastSafe event when we're in
+               // MonitorUpdateFailed, if we assume the user only broadcast the funding transaction when
+               // they received the FundingBroadcastSafe event, we can only ever hit
+               // monitor_pending_funding_locked when we're an inbound channel which failed to persist the
+               // monitor on funding_created, and we even got the funding transaction confirmed before the
+               // monitor was persisted.
+               let funding_locked = if self.monitor_pending_funding_locked {
+                       assert!(!self.channel_outbound, "Funding transaction broadcast without FundingBroadcastSafe!");
+                       self.monitor_pending_funding_locked = false;
+                       let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number);
+                       let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret);
+                       Some(msgs::FundingLocked {
+                               channel_id: self.channel_id(),
+                               next_per_commitment_point: next_per_commitment_point,
+                       })
+               } else { None };
+
                let mut forwards = Vec::new();
                mem::swap(&mut forwards, &mut self.monitor_pending_forwards);
                let mut failures = Vec::new();
@@ -2365,7 +2386,7 @@ impl Channel {
                if self.channel_state & (ChannelState::PeerDisconnected as u32) != 0 {
                        self.monitor_pending_revoke_and_ack = false;
                        self.monitor_pending_commitment_signed = false;
-                       return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe);
+                       return (None, None, RAACommitmentOrder::RevokeAndACKFirst, forwards, failures, needs_broadcast_safe, funding_locked);
                }
 
                let raa = if self.monitor_pending_revoke_and_ack {
@@ -2383,7 +2404,7 @@ impl Channel {
                        if commitment_update.is_some() { "a" } else { "no" },
                        if raa.is_some() { "an" } else { "no" },
                        match order { RAACommitmentOrder::CommitmentFirst => "commitment", RAACommitmentOrder::RevokeAndACKFirst => "RAA"});
-               (raa, commitment_update, order, forwards, failures, needs_broadcast_safe)
+               (raa, commitment_update, order, forwards, failures, needs_broadcast_safe, funding_locked)
        }
 
        pub fn update_fee(&mut self, fee_estimator: &FeeEstimator, msg: &msgs::UpdateFee) -> Result<(), ChannelError> {
@@ -2493,7 +2514,9 @@ impl Channel {
                } else { None };
 
                if self.channel_state & (ChannelState::FundingSent as u32) == ChannelState::FundingSent as u32 {
-                       if self.channel_state & ChannelState::OurFundingLocked as u32 == 0 {
+                       // If we're waiting on a monitor update, we shouldn't re-send any funding_locked's.
+                       if self.channel_state & (ChannelState::OurFundingLocked as u32) == 0 ||
+                                       self.channel_state & (ChannelState::MonitorUpdateFailed as u32) != 0 {
                                if msg.next_remote_commitment_number != 0 {
                                        return Err(ChannelError::Close("Peer claimed they saw a revoke_and_ack but we haven't sent funding_locked yet"));
                                }
@@ -2984,12 +3007,17 @@ impl Channel {
                                        //they can by sending two revoke_and_acks back-to-back, but not really). This appears to be
                                        //a protocol oversight, but I assume I'm just missing something.
                                        if need_commitment_update {
-                                               let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number);
-                                               let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret);
-                                               return Ok(Some(msgs::FundingLocked {
-                                                       channel_id: self.channel_id,
-                                                       next_per_commitment_point: next_per_commitment_point,
-                                               }));
+                                               if self.channel_state & (ChannelState::MonitorUpdateFailed as u32) == 0 {
+                                                       let next_per_commitment_secret = self.build_local_commitment_secret(self.cur_local_commitment_transaction_number);
+                                                       let next_per_commitment_point = PublicKey::from_secret_key(&self.secp_ctx, &next_per_commitment_secret);
+                                                       return Ok(Some(msgs::FundingLocked {
+                                                               channel_id: self.channel_id,
+                                                               next_per_commitment_point: next_per_commitment_point,
+                                                       }));
+                                               } else {
+                                                       self.monitor_pending_funding_locked = true;
+                                                       return Ok(None);
+                                               }
                                        }
                                }
                        }
@@ -3708,6 +3736,7 @@ impl Writeable for Channel {
                        RAACommitmentOrder::RevokeAndACKFirst => 1u8.write(writer)?,
                }
 
+               self.monitor_pending_funding_locked.write(writer)?;
                self.monitor_pending_revoke_and_ack.write(writer)?;
                self.monitor_pending_commitment_signed.write(writer)?;
 
@@ -3875,6 +3904,7 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
                        _ => return Err(DecodeError::InvalidValue),
                };
 
+               let monitor_pending_funding_locked = Readable::read(reader)?;
                let monitor_pending_revoke_and_ack = Readable::read(reader)?;
                let monitor_pending_commitment_signed = Readable::read(reader)?;
 
@@ -3971,6 +4001,7 @@ impl<R : ::std::io::Read> ReadableArgs<R, Arc<Logger>> for Channel {
 
                        resend_order,
 
+                       monitor_pending_funding_locked,
                        monitor_pending_revoke_and_ack,
                        monitor_pending_commitment_signed,
                        monitor_pending_forwards,
index 3336aa31d1b524df61d6d23f25d3d717cef43e50..afabcd7cc6bb30a294dcf8326d721ac718952e2d 100644 (file)
@@ -1684,7 +1684,7 @@ impl ChannelManager {
                                                        ChannelMonitorUpdateErr::TemporaryFailure => true,
                                                }
                                        } else {
-                                               let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe) = channel.monitor_updating_restored();
+                                               let (raa, commitment_update, order, pending_forwards, mut pending_failures, needs_broadcast_safe, funding_locked) = channel.monitor_updating_restored();
                                                if !pending_forwards.is_empty() {
                                                        htlc_forwards.push((channel.get_short_channel_id().expect("We can't have pending forwards before funding confirmation"), pending_forwards));
                                                }
@@ -1722,6 +1722,19 @@ impl ChannelManager {
                                                                user_channel_id: channel.get_user_id(),
                                                        });
                                                }
+                                               if let Some(msg) = funding_locked {
+                                                       pending_msg_events.push(events::MessageSendEvent::SendFundingLocked {
+                                                               node_id: channel.get_their_node_id(),
+                                                               msg,
+                                                       });
+                                                       if let Some(announcement_sigs) = self.get_announcement_sigs(channel) {
+                                                               pending_msg_events.push(events::MessageSendEvent::SendAnnouncementSignatures {
+                                                                       node_id: channel.get_their_node_id(),
+                                                                       msg: announcement_sigs,
+                                                               });
+                                                       }
+                                                       short_to_id.insert(channel.get_short_channel_id().unwrap(), channel.channel_id());
+                                               }
                                                true
                                        }
                                } else { true }
@@ -1790,7 +1803,7 @@ impl ChannelManager {
        }
 
        fn internal_funding_created(&self, their_node_id: &PublicKey, msg: &msgs::FundingCreated) -> Result<(), MsgHandleErrInternal> {
-               let ((funding_msg, monitor_update), chan) = {
+               let ((funding_msg, monitor_update), mut chan) = {
                        let mut channel_lock = self.channel_state.lock().unwrap();
                        let channel_state = channel_lock.borrow_parts();
                        match channel_state.by_id.entry(msg.temporary_channel_id.clone()) {
@@ -1806,8 +1819,23 @@ impl ChannelManager {
                };
                // Because we have exclusive ownership of the channel here we can release the channel_state
                // lock before add_update_monitor
-               if let Err(_e) = self.monitor.add_update_monitor(monitor_update.get_funding_txo().unwrap(), monitor_update) {
-                       unimplemented!();
+               if let Err(e) = self.monitor.add_update_monitor(monitor_update.get_funding_txo().unwrap(), monitor_update) {
+                       match e {
+                               ChannelMonitorUpdateErr::PermanentFailure => {
+                                       // Note that we reply with the new channel_id in error messages if we gave up on the
+                                       // channel, not the temporary_channel_id. This is compatible with ourselves, but the
+                                       // spec is somewhat ambiguous here. Not a huge deal since we'll send error messages for
+                                       // any messages referencing a previously-closed channel anyway.
+                                       return Err(MsgHandleErrInternal::from_finish_shutdown("ChannelMonitor storage failure", funding_msg.channel_id, chan.force_shutdown(), None));
+                               },
+                               ChannelMonitorUpdateErr::TemporaryFailure => {
+                                       // There's no problem signing a counterparty's funding transaction if our monitor
+                                       // hasn't persisted to disk yet - we can't lose money on a transaction that we haven't
+                                       // accepted payment from yet. We do, however, need to wait to send our funding_locked
+                                       // until we have persisted our monitor.
+                                       chan.monitor_update_failed(false, false, Vec::new(), Vec::new());
+                               },
+                       }
                }
                let mut channel_state_lock = self.channel_state.lock().unwrap();
                let channel_state = channel_state_lock.borrow_parts();
index 5ea6961bdbf75547c6409318e58b1fb9b5c33222..001da7009b8a67e05ff22ae9719da7ce869434b9 100644 (file)
@@ -221,31 +221,37 @@ pub fn create_chan_between_nodes_with_value_init(node_a: &Node, node_b: &Node, c
        tx
 }
 
-pub fn create_chan_between_nodes_with_value_confirm(node_a: &Node, node_b: &Node, tx: &Transaction) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) {
-       confirm_transaction(&node_b.chain_monitor, &tx, tx.version);
-       node_a.node.handle_funding_locked(&node_b.node.get_our_node_id(), &get_event_msg!(node_b, MessageSendEvent::SendFundingLocked, node_a.node.get_our_node_id())).unwrap();
+pub fn create_chan_between_nodes_with_value_confirm_first(node_recv: &Node, node_conf: &Node, tx: &Transaction) {
+       confirm_transaction(&node_conf.chain_monitor, &tx, tx.version);
+       node_recv.node.handle_funding_locked(&node_conf.node.get_our_node_id(), &get_event_msg!(node_conf, MessageSendEvent::SendFundingLocked, node_recv.node.get_our_node_id())).unwrap();
+}
 
+pub fn create_chan_between_nodes_with_value_confirm_second(node_recv: &Node, node_conf: &Node) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) {
        let channel_id;
-
-       confirm_transaction(&node_a.chain_monitor, &tx, tx.version);
-       let events_6 = node_a.node.get_and_clear_pending_msg_events();
+       let events_6 = node_conf.node.get_and_clear_pending_msg_events();
        assert_eq!(events_6.len(), 2);
        ((match events_6[0] {
                MessageSendEvent::SendFundingLocked { ref node_id, ref msg } => {
                        channel_id = msg.channel_id.clone();
-                       assert_eq!(*node_id, node_b.node.get_our_node_id());
+                       assert_eq!(*node_id, node_recv.node.get_our_node_id());
                        msg.clone()
                },
                _ => panic!("Unexpected event"),
        }, match events_6[1] {
                MessageSendEvent::SendAnnouncementSignatures { ref node_id, ref msg } => {
-                       assert_eq!(*node_id, node_b.node.get_our_node_id());
+                       assert_eq!(*node_id, node_recv.node.get_our_node_id());
                        msg.clone()
                },
                _ => panic!("Unexpected event"),
        }), channel_id)
 }
 
+pub fn create_chan_between_nodes_with_value_confirm(node_a: &Node, node_b: &Node, tx: &Transaction) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32]) {
+       create_chan_between_nodes_with_value_confirm_first(node_a, node_b, tx);
+       confirm_transaction(&node_a.chain_monitor, &tx, tx.version);
+       create_chan_between_nodes_with_value_confirm_second(node_b, node_a)
+}
+
 pub fn create_chan_between_nodes_with_value_a(node_a: &Node, node_b: &Node, channel_value: u64, push_msat: u64, a_flags: LocalFeatures, b_flags: LocalFeatures) -> ((msgs::FundingLocked, msgs::AnnouncementSignatures), [u8; 32], Transaction) {
        let tx = create_chan_between_nodes_with_value_init(node_a, node_b, channel_value, push_msat, a_flags, b_flags);
        let (msgs, chan_id) = create_chan_between_nodes_with_value_confirm(node_a, node_b, &tx);