Unblock channels awaiting monitor update based on `ChanMan` queue
[rust-lightning] / lightning / src / ln / chanmon_update_fail_tests.rs
index faa49f0f480cfd3ef9f12871ad1c1e861fdbc2ac..5b821bfb868df77523a3ba8ec32a55ab9db5a680 100644 (file)
@@ -207,7 +207,8 @@ fn do_test_simple_monitor_temporary_update_fail(disconnect: bool) {
        }
 
        // ...and make sure we can force-close a frozen channel
-       nodes[0].node.force_close_broadcasting_latest_txn(&channel_id, &nodes[1].node.get_our_node_id()).unwrap();
+       let error_message = "Channel force-closed";
+       nodes[0].node.force_close_broadcasting_latest_txn(&channel_id, &nodes[1].node.get_our_node_id(), error_message.to_string()).unwrap();
        check_added_monitors!(nodes[0], 1);
        check_closed_broadcast!(nodes[0], true);
 
@@ -2025,7 +2026,9 @@ fn test_path_paused_mpp() {
        assert_eq!(events.len(), 1);
        pass_along_path(&nodes[0], &[&nodes[2], &nodes[3]], 200_000, payment_hash.clone(), Some(payment_secret), events.pop().unwrap(), true, None);
 
-       claim_payment_along_route(&nodes[0], &[&[&nodes[1], &nodes[3]], &[&nodes[2], &nodes[3]]], false, payment_preimage);
+       claim_payment_along_route(
+               ClaimAlongRouteArgs::new(&nodes[0], &[&[&nodes[1], &nodes[3]], &[&nodes[2], &nodes[3]]], payment_preimage)
+       );
 }
 
 #[test]
@@ -3222,17 +3225,18 @@ fn do_test_durable_preimages_on_closed_channel(close_chans_before_reload: bool,
        let _ = get_revoke_commit_msgs!(nodes[1], nodes[2].node.get_our_node_id());
 
        let mon_bc = get_monitor!(nodes[1], chan_id_bc).encode();
+       let error_message = "Channel force-closed";
 
        if close_chans_before_reload {
                if !close_only_a {
                        chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
-                       nodes[1].node.force_close_broadcasting_latest_txn(&chan_id_bc, &nodes[2].node.get_our_node_id()).unwrap();
+                       nodes[1].node.force_close_broadcasting_latest_txn(&chan_id_bc, &nodes[2].node.get_our_node_id(), error_message.to_string()).unwrap();
                        check_closed_broadcast(&nodes[1], 1, true);
                        check_closed_event(&nodes[1], 1, ClosureReason::HolderForceClosed, false, &[nodes[2].node.get_our_node_id()], 100000);
                }
 
                chanmon_cfgs[1].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
-               nodes[1].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[0].node.get_our_node_id()).unwrap();
+               nodes[1].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[0].node.get_our_node_id(), error_message.to_string()).unwrap();
                check_closed_broadcast(&nodes[1], 1, true);
                check_closed_event(&nodes[1], 1, ClosureReason::HolderForceClosed, false, &[nodes[0].node.get_our_node_id()], 100000);
        }
@@ -3253,8 +3257,9 @@ fn do_test_durable_preimages_on_closed_channel(close_chans_before_reload: bool,
                        assert_eq!(bs_close_txn.len(), 3);
                }
        }
+       let error_message = "Channel force-closed";
 
-       nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[1].node.get_our_node_id()).unwrap();
+       nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[1].node.get_our_node_id(), error_message.to_string()).unwrap();
        check_closed_event(&nodes[0], 1, ClosureReason::HolderForceClosed, false, &[nodes[1].node.get_our_node_id()], 100000);
        let as_closing_tx = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
        assert_eq!(as_closing_tx.len(), 1);
@@ -3332,6 +3337,93 @@ fn test_durable_preimages_on_closed_channel() {
        do_test_durable_preimages_on_closed_channel(false, false, false);
 }
 
+#[test]
+fn test_sync_async_persist_doesnt_hang() {
+       // Previously, we checked if a channel was a candidate for making forward progress based on if
+       // the `MonitorEvent::Completed` matched the channel's latest monitor update id. However, this
+       // could lead to a rare race when `ChannelMonitor`s were being persisted both synchronously and
+       // asynchronously leading to channel hangs.
+       //
+       // To hit this case, we need to generate a `MonitorEvent::Completed` prior to a new channel
+       // update, but which is only processed after the channel update.
+       let chanmon_cfgs = create_chanmon_cfgs(2);
+       let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+       let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+       let chan_id_ab = create_announced_chan_between_nodes(&nodes, 0, 1).2;
+
+       // Send two payments from A to B, then claim the first, marking the very last
+       // ChannelMonitorUpdate as InProgress...
+       let (payment_preimage_1, payment_hash_1, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+       let (payment_preimage_2, payment_hash_2, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+
+       nodes[1].node.claim_funds(payment_preimage_1);
+       check_added_monitors(&nodes[1], 1);
+       expect_payment_claimed!(nodes[1], payment_hash_1, 1_000_000);
+
+       let bs_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
+       nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
+       expect_payment_sent(&nodes[0], payment_preimage_1, None, false, false);
+       nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_updates.commitment_signed);
+       check_added_monitors(&nodes[0], 1);
+       let (as_raa, as_cs) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+
+       nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_raa);
+       check_added_monitors(&nodes[1], 1);
+       nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_cs);
+       check_added_monitors(&nodes[1], 1);
+
+       let bs_final_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+       chanmon_cfgs[0].persister.set_update_ret(ChannelMonitorUpdateStatus::InProgress);
+       nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_final_raa);
+       check_added_monitors(&nodes[0], 1);
+
+       // Immediately complete the monitor update, but before the ChannelManager has a chance to see
+       // the MonitorEvent::Completed, create a channel update by receiving a claim on the second
+       // payment.
+       let (outpoint, _, ab_update_id) = nodes[0].chain_monitor.latest_monitor_update_id.lock().unwrap().get(&chan_id_ab).unwrap().clone();
+       nodes[0].chain_monitor.chain_monitor.channel_monitor_updated(outpoint, ab_update_id).unwrap();
+
+       nodes[1].node.claim_funds(payment_preimage_2);
+       check_added_monitors(&nodes[1], 1);
+       expect_payment_claimed!(nodes[1], payment_hash_2, 1_000_000);
+
+       let bs_updates = get_htlc_update_msgs(&nodes[1], &nodes[0].node.get_our_node_id());
+       nodes[0].node.handle_update_fulfill_htlc(&nodes[1].node.get_our_node_id(), &bs_updates.update_fulfill_htlcs[0]);
+       nodes[0].node.handle_commitment_signed(&nodes[1].node.get_our_node_id(), &bs_updates.commitment_signed);
+       check_added_monitors(&nodes[0], 1);
+
+       // At this point, we have completed an extra `ChannelMonitorUpdate` but the `ChannelManager`
+       // hasn't yet seen our `MonitorEvent::Completed`. When we call
+       // `get_and_clear_pending_msg_events` here, the `ChannelManager` finally sees that event and
+       // should return the channel to normal operation.
+       let (as_raa, as_cs) = get_revoke_commit_msgs!(nodes[0], nodes[1].node.get_our_node_id());
+
+       // Now that we've completed our test, process the events we have queued up (which we were not
+       // able to check until now as they would have caused the `ChannelManager` to look at the
+       // pending `MonitorEvent`s).
+       let pending_events = nodes[0].node.get_and_clear_pending_events();
+       assert_eq!(pending_events.len(), 2);
+       if let Event::PaymentPathSuccessful { ref payment_hash, ..} = pending_events[1] {
+               assert_eq!(payment_hash.unwrap(), payment_hash_1);
+       } else { panic!(); }
+       if let Event::PaymentSent { ref payment_hash, ..} = pending_events[0] {
+               assert_eq!(*payment_hash, payment_hash_2);
+       } else { panic!(); }
+
+       // Finally, complete the claiming of the second payment
+       nodes[1].node.handle_revoke_and_ack(&nodes[0].node.get_our_node_id(), &as_raa);
+       check_added_monitors(&nodes[1], 1);
+       nodes[1].node.handle_commitment_signed(&nodes[0].node.get_our_node_id(), &as_cs);
+       check_added_monitors(&nodes[1], 1);
+
+       let bs_final_raa = get_event_msg!(nodes[1], MessageSendEvent::SendRevokeAndACK, nodes[0].node.get_our_node_id());
+       nodes[0].node.handle_revoke_and_ack(&nodes[1].node.get_our_node_id(), &bs_final_raa);
+       check_added_monitors(&nodes[0], 1);
+       expect_payment_path_successful!(nodes[0]);
+}
+
 fn do_test_reload_mon_update_completion_actions(close_during_reload: bool) {
        // Test that if a `ChannelMonitorUpdate` completes but a `ChannelManager` isn't serialized
        // before restart we run the monitor update completion action on startup.
@@ -3391,10 +3483,11 @@ fn do_test_reload_mon_update_completion_actions(close_during_reload: bool) {
        let manager_b = nodes[1].node.encode();
        reload_node!(nodes[1], &manager_b, &[&mon_ab, &mon_bc], persister, new_chain_monitor, nodes_1_deserialized);
 
+       let error_message = "Channel force-closed";
        if close_during_reload {
                // Test that we still free the B<->C channel if the A<->B channel closed while we reloaded
                // (as learned about during the on-reload block connection).
-               nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[1].node.get_our_node_id()).unwrap();
+               nodes[0].node.force_close_broadcasting_latest_txn(&chan_id_ab, &nodes[1].node.get_our_node_id(), error_message.to_string()).unwrap();
                check_added_monitors!(nodes[0], 1);
                check_closed_broadcast!(nodes[0], true);
                check_closed_event(&nodes[0], 1, ClosureReason::HolderForceClosed, false, &[nodes[1].node.get_our_node_id()], 100_000);