Handle async initial ChannelMonitor persistence failing on restart
authorMatt Corallo <git@bluematt.me>
Sat, 20 Aug 2022 01:03:27 +0000 (01:03 +0000)
committerMatt Corallo <git@bluematt.me>
Wed, 19 Oct 2022 14:41:30 +0000 (14:41 +0000)
If the initial ChannelMonitor persistence is done asynchronously
but does not complete before the node restarts (with a
ChannelManager persistence), we'll start back up with a channel
present but no corresponding ChannelMonitor.

Because the Channel is pending-monitor-update and has not yet
broadcasted its initial funding transaction or sent channel_ready,
this is not a violation of our API contract nor a safety violation.
However, the previous code would refuse to deserialize the
ChannelManager treating it as an API contract violation.

The solution is to test for this case explicitly and drop the
channel entirely as if the peer disconnected before we received
the funding_signed for outbound channels or before sending the
channel_ready for inbound channels.

lightning/src/ln/chanmon_update_fail_tests.rs
lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs
lightning/src/util/events.rs

index 15d46b04688930d03525b6347d6b714a28713b28..ef7efdd221f028114e780aba62b5ea6e56bd90e7 100644 (file)
@@ -2745,3 +2745,221 @@ fn double_temp_error() {
        commitment_signed_dance!(nodes[0], nodes[1], commitment_signed_b2, false);
        expect_payment_sent!(nodes[0], payment_preimage_2);
 }
+
+fn do_test_outbound_reload_without_init_mon(use_0conf: bool) {
+       // Test that if the monitor update generated in funding_signed is stored async and we restart
+       // with the latest ChannelManager but the ChannelMonitor persistence never completed we happily
+       // drop the channel and move on.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+
+       let persister: test_utils::TestPersister;
+       let new_chain_monitor: test_utils::TestChainMonitor;
+       let nodes_0_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+
+       let mut chan_config = test_default_channel_config();
+       chan_config.manually_accept_inbound_channels = true;
+       chan_config.channel_handshake_limits.trust_own_funding_0conf = true;
+
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(chan_config), Some(chan_config)]);
+       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+       nodes[0].node.create_channel(nodes[1].node.get_our_node_id(), 100000, 10001, 43, None).unwrap();
+       nodes[1].node.handle_open_channel(&nodes[0].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[0], MessageSendEvent::SendOpenChannel, nodes[1].node.get_our_node_id()));
+
+       let events = nodes[1].node.get_and_clear_pending_events();
+       assert_eq!(events.len(), 1);
+       match events[0] {
+               Event::OpenChannelRequest { temporary_channel_id, .. } => {
+                       if use_0conf {
+                               nodes[1].node.accept_inbound_channel_from_trusted_peer_0conf(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+                       } else {
+                               nodes[1].node.accept_inbound_channel(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+                       }
+               },
+               _ => panic!("Unexpected event"),
+       };
+
+       nodes[0].node.handle_accept_channel(&nodes[1].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[1], MessageSendEvent::SendAcceptChannel, nodes[0].node.get_our_node_id()));
+
+       let (temporary_channel_id, funding_tx, ..) = create_funding_transaction(&nodes[0], &nodes[1].node.get_our_node_id(), 100000, 43);
+
+       nodes[0].node.funding_transaction_generated(&temporary_channel_id, &nodes[1].node.get_our_node_id(), funding_tx.clone()).unwrap();
+       check_added_monitors!(nodes[0], 0);
+
+       let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
+       nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
+       check_added_monitors!(nodes[1], 1);
+
+       let bs_signed_locked = nodes[1].node.get_and_clear_pending_msg_events();
+       assert_eq!(bs_signed_locked.len(), if use_0conf { 2 } else { 1 });
+       match &bs_signed_locked[0] {
+               MessageSendEvent::SendFundingSigned { msg, .. } => {
+                       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+
+                       nodes[0].node.handle_funding_signed(&nodes[1].node.get_our_node_id(), &msg);
+                       check_added_monitors!(nodes[0], 1);
+               }
+               _ => panic!("Unexpected event"),
+       }
+       if use_0conf {
+               match &bs_signed_locked[1] {
+                       MessageSendEvent::SendChannelReady { msg, .. } => {
+                               nodes[0].node.handle_channel_ready(&nodes[1].node.get_our_node_id(), &msg);
+                       }
+                       _ => panic!("Unexpected event"),
+               }
+       }
+
+       assert!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().is_empty());
+       assert!(nodes[0].node.get_and_clear_pending_msg_events().is_empty());
+       assert!(nodes[0].node.get_and_clear_pending_events().is_empty());
+
+       // nodes[0] is now waiting on the first ChannelMonitor persistence to complete in order to
+       // broadcast the funding transaction. If nodes[0] restarts at this point with the
+       // ChannelMonitor lost, we should simply discard the channel.
+
+       // The test framework checks that watched_txn/outputs match the monitor set, which they will
+       // not, so we have to clear them here.
+       nodes[0].chain_source.watched_txn.lock().unwrap().clear();
+       nodes[0].chain_source.watched_outputs.lock().unwrap().clear();
+
+       let nodes_0_serialized = nodes[0].node.encode();
+       persister = test_utils::TestPersister::new();
+       let keys_manager = &chanmon_cfgs[0].keys_manager;
+       new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[0].chain_source), nodes[0].tx_broadcaster.clone(), nodes[0].logger, node_cfgs[0].fee_estimator, &persister, keys_manager);
+       nodes[0].chain_monitor = &new_chain_monitor;
+
+       let mut nodes_0_read = &nodes_0_serialized[..];
+       let config = UserConfig::default();
+       nodes_0_deserialized = {
+               <(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_0_read, ChannelManagerReadArgs {
+                       default_config: config,
+                       keys_manager,
+                       fee_estimator: node_cfgs[0].fee_estimator,
+                       chain_monitor: nodes[0].chain_monitor,
+                       tx_broadcaster: nodes[0].tx_broadcaster.clone(),
+                       logger: nodes[0].logger,
+                       channel_monitors: HashMap::new(),
+               }).unwrap().1
+       };
+       nodes[0].node = &nodes_0_deserialized;
+       assert!(nodes_0_read.is_empty());
+
+       check_closed_event!(nodes[0], 1, ClosureReason::DisconnectedPeer);
+       assert!(nodes[0].node.list_channels().is_empty());
+}
+
+#[test]
+fn test_outbound_reload_without_init_mon() {
+       do_test_outbound_reload_without_init_mon(true);
+       do_test_outbound_reload_without_init_mon(false);
+}
+
+fn do_test_inbound_reload_without_init_mon(use_0conf: bool, lock_commitment: bool) {
+       // Test that if the monitor update generated by funding_transaction_generated is stored async
+       // and we restart with the latest ChannelManager but the ChannelMonitor persistence never
+       // completed we happily drop the channel and move on.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+
+       let persister: test_utils::TestPersister;
+       let new_chain_monitor: test_utils::TestChainMonitor;
+       let nodes_1_deserialized: ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>;
+
+       let mut chan_config = test_default_channel_config();
+       chan_config.manually_accept_inbound_channels = true;
+       chan_config.channel_handshake_limits.trust_own_funding_0conf = true;
+
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[Some(chan_config), Some(chan_config)]);
+       let mut nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+       nodes[0].node.create_channel(nodes[1].node.get_our_node_id(), 100000, 10001, 43, None).unwrap();
+       nodes[1].node.handle_open_channel(&nodes[0].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[0], MessageSendEvent::SendOpenChannel, nodes[1].node.get_our_node_id()));
+
+       let events = nodes[1].node.get_and_clear_pending_events();
+       assert_eq!(events.len(), 1);
+       match events[0] {
+               Event::OpenChannelRequest { temporary_channel_id, .. } => {
+                       if use_0conf {
+                               nodes[1].node.accept_inbound_channel_from_trusted_peer_0conf(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+                       } else {
+                               nodes[1].node.accept_inbound_channel(&temporary_channel_id, &nodes[0].node.get_our_node_id(), 0).unwrap();
+                       }
+               },
+               _ => panic!("Unexpected event"),
+       };
+
+       nodes[0].node.handle_accept_channel(&nodes[1].node.get_our_node_id(), channelmanager::provided_init_features(), &get_event_msg!(nodes[1], MessageSendEvent::SendAcceptChannel, nodes[0].node.get_our_node_id()));
+
+       let (temporary_channel_id, funding_tx, ..) = create_funding_transaction(&nodes[0], &nodes[1].node.get_our_node_id(), 100000, 43);
+
+       nodes[0].node.funding_transaction_generated(&temporary_channel_id, &nodes[1].node.get_our_node_id(), funding_tx.clone()).unwrap();
+       check_added_monitors!(nodes[0], 0);
+
+       let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
+       chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+       nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
+       check_added_monitors!(nodes[1], 1);
+
+       // nodes[1] happily sends its funding_signed even though its awaiting the persistence of the
+       // initial ChannelMonitor, but it will decline to send its channel_ready even if the funding
+       // transaction is confirmed.
+       let funding_signed_msg = get_event_msg!(nodes[1], MessageSendEvent::SendFundingSigned, nodes[0].node.get_our_node_id());
+
+       nodes[0].node.handle_funding_signed(&nodes[1].node.get_our_node_id(), &funding_signed_msg);
+       check_added_monitors!(nodes[0], 1);
+
+       let as_funding_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+       if lock_commitment {
+               confirm_transaction(&nodes[0], &as_funding_tx[0]);
+               confirm_transaction(&nodes[1], &as_funding_tx[0]);
+       }
+       if use_0conf || lock_commitment {
+               let as_ready = get_event_msg!(nodes[0], MessageSendEvent::SendChannelReady, nodes[1].node.get_our_node_id());
+               nodes[1].node.handle_channel_ready(&nodes[0].node.get_our_node_id(), &as_ready);
+       }
+       assert!(nodes[1].node.get_and_clear_pending_msg_events().is_empty());
+
+       // nodes[1] is now waiting on the first ChannelMonitor persistence to complete in order to
+       // move the channel to ready (or is waiting on the funding transaction to confirm). If nodes[1]
+       // restarts at this point with the ChannelMonitor lost, we should simply discard the channel.
+
+       // The test framework checks that watched_txn/outputs match the monitor set, which they will
+       // not, so we have to clear them here.
+       nodes[1].chain_source.watched_txn.lock().unwrap().clear();
+       nodes[1].chain_source.watched_outputs.lock().unwrap().clear();
+
+       let nodes_1_serialized = nodes[1].node.encode();
+       persister = test_utils::TestPersister::new();
+       let keys_manager = &chanmon_cfgs[1].keys_manager;
+       new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[1].chain_source), nodes[1].tx_broadcaster.clone(), nodes[1].logger, node_cfgs[1].fee_estimator, &persister, keys_manager);
+       nodes[1].chain_monitor = &new_chain_monitor;
+
+       let mut nodes_1_read = &nodes_1_serialized[..];
+       let config = UserConfig::default();
+       nodes_1_deserialized = {
+               <(BlockHash, ChannelManager<EnforcingSigner, &test_utils::TestChainMonitor, &test_utils::TestBroadcaster, &test_utils::TestKeysInterface, &test_utils::TestFeeEstimator, &test_utils::TestLogger>)>::read(&mut nodes_1_read, ChannelManagerReadArgs {
+                       default_config: config,
+                       keys_manager,
+                       fee_estimator: node_cfgs[1].fee_estimator,
+                       chain_monitor: nodes[1].chain_monitor,
+                       tx_broadcaster: nodes[1].tx_broadcaster.clone(),
+                       logger: nodes[1].logger,
+                       channel_monitors: HashMap::new(),
+               }).unwrap().1
+       };
+       nodes[1].node = &nodes_1_deserialized;
+       assert!(nodes_1_read.is_empty());
+
+       check_closed_event!(nodes[1], 1, ClosureReason::DisconnectedPeer);
+       assert!(nodes[1].node.list_channels().is_empty());
+}
+
+#[test]
+fn test_inbound_reload_without_init_mon() {
+       do_test_inbound_reload_without_init_mon(true, true);
+       do_test_inbound_reload_without_init_mon(true, false);
+       do_test_inbound_reload_without_init_mon(false, true);
+       do_test_inbound_reload_without_init_mon(false, false);
+}
index 48a701e3b04fb712772a3a9a293536e839ee6de4..55732d704febfcdd6c125c7af18411eb5ce07a3b 100644 (file)
@@ -4788,6 +4788,42 @@ impl<Signer: Sign> Channel<Signer> {
                self.channel_state >= ChannelState::FundingSent as u32
        }
 
+       /// Returns true if the channel is awaiting the persistence of the initial ChannelMonitor.
+       /// If the channel is outbound, this implies we have not yet broadcasted the funding
+       /// transaction. If the channel is inbound, this implies simply that the channel has not
+       /// advanced state.
+       pub fn is_awaiting_initial_mon_persist(&self) -> bool {
+               if !self.is_awaiting_monitor_update() { return false; }
+               if self.channel_state &
+                       !(ChannelState::TheirChannelReady as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)
+                               == ChannelState::FundingSent as u32 {
+                       // If we're not a 0conf channel, we'll be waiting on a monitor update with only
+                       // FundingSent set, though our peer could have sent their channel_ready.
+                       debug_assert!(self.minimum_depth.unwrap_or(1) > 0);
+                       return true;
+               }
+               if self.cur_holder_commitment_transaction_number == INITIAL_COMMITMENT_NUMBER - 1 &&
+                       self.cur_counterparty_commitment_transaction_number == INITIAL_COMMITMENT_NUMBER - 1 {
+                       // If we're a 0-conf channel, we'll move beyond FundingSent immediately even while
+                       // waiting for the initial monitor persistence. Thus, we check if our commitment
+                       // transaction numbers have both been iterated only exactly once (for the
+                       // funding_signed), and we're awaiting monitor update.
+                       //
+                       // If we got here, we shouldn't have yet broadcasted the funding transaction (as the
+                       // only way to get an awaiting-monitor-update state during initial funding is if the
+                       // initial monitor persistence is still pending).
+                       //
+                       // Because deciding we're awaiting initial broadcast spuriously could result in
+                       // funds-loss (as we don't have a monitor, but have the funding transaction confirmed),
+                       // we hard-assert here, even in production builds.
+                       if self.is_outbound() { assert!(self.funding_transaction.is_some()); }
+                       assert!(self.monitor_pending_channel_ready);
+                       assert_eq!(self.latest_monitor_update_id, 0);
+                       return true;
+               }
+               false
+       }
+
        /// Returns true if our channel_ready has been sent
        pub fn is_our_channel_ready(&self) -> bool {
                (self.channel_state & ChannelState::OurChannelReady as u32) != 0 || self.channel_state >= ChannelState::ChannelFunded as u32
index f234ad9f91f555075804d87db115d98193f09619..8f0f3811ba3b2bca0b6ac55167fea2fdfd890fcb 100644 (file)
@@ -6921,6 +6921,16 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
                                        }
                                        by_id.insert(channel.channel_id(), channel);
                                }
+                       } else if channel.is_awaiting_initial_mon_persist() {
+                               // If we were persisted and shut down while the initial ChannelMonitor persistence
+                               // was in-progress, we never broadcasted the funding transaction and can still
+                               // safely discard the channel.
+                               let _ = channel.force_shutdown(false);
+                               channel_closures.push(events::Event::ChannelClosed {
+                                       channel_id: channel.channel_id(),
+                                       user_channel_id: channel.get_user_id(),
+                                       reason: ClosureReason::DisconnectedPeer,
+                               });
                        } else {
                                log_error!(args.logger, "Missing ChannelMonitor for channel {} needed by ChannelManager.", log_bytes!(channel.channel_id()));
                                log_error!(args.logger, " The chain::Watch API *requires* that monitors are persisted durably before returning,");
index 8ddd762e97036bac77fee08c2be7819ec707388e..d2aa076bd21193b1c0d5526af51b2843de2ae88c 100644 (file)
@@ -111,11 +111,19 @@ pub enum ClosureReason {
        /// The peer disconnected prior to funding completing. In this case the spec mandates that we
        /// forget the channel entirely - we can attempt again if the peer reconnects.
        ///
+       /// This includes cases where we restarted prior to funding completion, including prior to the
+       /// initial [`ChannelMonitor`] persistence completing.
+       ///
        /// In LDK versions prior to 0.0.107 this could also occur if we were unable to connect to the
        /// peer because of mutual incompatibility between us and our channel counterparty.
+       ///
+       /// [`ChannelMonitor`]: crate::chain::channelmonitor::ChannelMonitor
        DisconnectedPeer,
-       /// Closure generated from `ChannelManager::read` if the ChannelMonitor is newer than
-       /// the ChannelManager deserialized.
+       /// Closure generated from `ChannelManager::read` if the [`ChannelMonitor`] is newer than
+       /// the [`ChannelManager`] deserialized.
+       ///
+       /// [`ChannelMonitor`]: crate::chain::channelmonitor::ChannelMonitor
+       /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager
        OutdatedChannelManager
 }