From be123f7d228197a0a9f9e48c4e4e31fa2ecbeca9 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Thu, 21 Oct 2021 22:33:42 +0000 Subject: [PATCH] Give peers one timer tick to finish handshake before disconnecting This ensures we don't let a hung connection stick around forever if the peer never completes the initial handshake. This also resolves a race where, on receiving a second connection from a peer, we may reset their_node_id to None to prevent sending messages even though the `channel_encryptor` `is_ready_for_encryption()`. Sending pings only checks the `channel_encryptor` status, not `their_node_id` resulting in an `unwrap` on `None` in `enqueue_message`. --- lightning/src/ln/peer_handler.rs | 70 +++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 0dbad0a31..e1da618ec 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -860,6 +860,7 @@ impl P let features = InitFeatures::known(); let resp = msgs::Init { features }; self.enqueue_message(peer, &resp); + peer.awaiting_pong_timer_tick_intervals = 0; }, NextNoiseStep::ActThree => { let their_node_id = try_potential_handleerror!(peer.channel_encryptor.process_act_three(&peer.pending_read_buffer[..])); @@ -870,6 +871,7 @@ impl P let features = InitFeatures::known(); let resp = msgs::Init { features }; self.enqueue_message(peer, &resp); + peer.awaiting_pong_timer_tick_intervals = 0; }, NextNoiseStep::NoiseComplete => { if peer.pending_read_is_header { @@ -1530,12 +1532,29 @@ impl P let peer_count = peers.len(); peers.retain(|descriptor, peer| { - if !peer.channel_encryptor.is_ready_for_encryption() { - // The peer needs to complete its handshake before we can exchange messages + let mut do_disconnect_peer = false; + if !peer.channel_encryptor.is_ready_for_encryption() || peer.their_node_id.is_none() { + // The peer needs to complete its handshake before we can exchange messages. We + // give peers one timer tick to complete handshake, reusing + // `awaiting_pong_timer_tick_intervals` to track number of timer ticks taken + // for handshake completion. + if peer.awaiting_pong_timer_tick_intervals != 0 { + do_disconnect_peer = true; + } else { + peer.awaiting_pong_timer_tick_intervals = 1; + return true; + } + } + + if peer.awaiting_pong_timer_tick_intervals == -1 { + // Magic value set in `maybe_send_extra_ping`. + peer.awaiting_pong_timer_tick_intervals = 1; + peer.received_message_since_timer_tick = false; return true; } - if (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) + if do_disconnect_peer + || (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) || peer.awaiting_pong_timer_tick_intervals as u64 > MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peer_count as u64 { @@ -1546,21 +1565,11 @@ impl P node_id_to_descriptor.remove(&node_id); self.message_handler.chan_handler.peer_disconnected(&node_id, false); } - None => { - // This can't actually happen as we should have hit - // is_ready_for_encryption() previously on this same peer. - unreachable!(); - }, + None => {}, } return false; } - peer.received_message_since_timer_tick = false; - if peer.awaiting_pong_timer_tick_intervals == -1 { - // Magic value set in `maybe_send_extra_ping`. - peer.awaiting_pong_timer_tick_intervals = 1; - return true; - } if peer.awaiting_pong_timer_tick_intervals > 0 { peer.awaiting_pong_timer_tick_intervals += 1; @@ -1758,4 +1767,37 @@ mod tests { assert_eq!(cfgs[1].routing_handler.chan_upds_recvd.load(Ordering::Acquire), 100); assert_eq!(cfgs[1].routing_handler.chan_anns_recvd.load(Ordering::Acquire), 50); } + + #[test] + fn test_handshake_timeout() { + // Tests that we time out a peer still waiting on handshake completion after a full timer + // tick. + let cfgs = create_peermgr_cfgs(2); + cfgs[0].routing_handler.request_full_sync.store(true, Ordering::Release); + cfgs[1].routing_handler.request_full_sync.store(true, Ordering::Release); + let peers = create_network(2, &cfgs); + + let secp_ctx = Secp256k1::new(); + let a_id = PublicKey::from_secret_key(&secp_ctx, &peers[0].our_node_secret); + let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let initial_data = peers[1].new_outbound_connection(a_id, fd_b.clone()).unwrap(); + peers[0].new_inbound_connection(fd_a.clone()).unwrap(); + + // If we get a single timer tick before completion, that's fine + assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 1); + peers[0].timer_tick_occurred(); + assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 1); + + assert_eq!(peers[0].read_event(&mut fd_a, &initial_data).unwrap(), false); + peers[0].process_events(); + assert_eq!(peers[1].read_event(&mut fd_b, &fd_a.outbound_data.lock().unwrap().split_off(0)).unwrap(), false); + peers[1].process_events(); + + // ...but if we get a second timer tick, we should disconnect the peer + peers[0].timer_tick_occurred(); + assert_eq!(peers[0].peers.lock().unwrap().peers.len(), 0); + + assert!(peers[0].read_event(&mut fd_a, &fd_b.outbound_data.lock().unwrap().split_off(0)).is_err()); + } } -- 2.39.5