X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning%2Fsrc%2Fln%2Fpeer_handler.rs;h=8ec9d72955d093aabda322e896a1998bafb27e3a;hb=d5b05e54c32cd53e0534849b57242c65747738b1;hp=37867adff32d164caa1f3fc3e78f65f49ff1e8d3;hpb=90bb3f9075f4584dac97f984ab8b248a55c72e65;p=rust-lightning diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 37867adf..8ec9d729 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -18,6 +18,7 @@ use bitcoin::secp256k1::{self, Secp256k1, SecretKey, PublicKey}; use crate::chain::keysinterface::{KeysManager, NodeSigner, Recipient}; +use crate::events::{MessageSendEvent, MessageSendEventsProvider, OnionMessageProvider}; use crate::ln::features::{InitFeatures, NodeFeatures}; use crate::ln::msgs; use crate::ln::msgs::{ChannelMessageHandler, LightningError, NetAddress, OnionMessageHandler, RoutingMessageHandler}; @@ -27,9 +28,8 @@ use crate::ln::peer_channel_encryptor::{PeerChannelEncryptor,NextNoiseStep}; use crate::ln::wire; use crate::ln::wire::Encode; use crate::onion_message::{CustomOnionMessageContents, CustomOnionMessageHandler, SimpleArcOnionMessenger, SimpleRefOnionMessenger}; -use crate::routing::gossip::{NetworkGraph, P2PGossipSync, NodeId}; +use crate::routing::gossip::{NetworkGraph, P2PGossipSync, NodeId, NodeAlias}; use crate::util::atomic_counter::AtomicCounter; -use crate::util::events::{MessageSendEvent, MessageSendEventsProvider, OnionMessageProvider}; use crate::util::logger::Logger; use crate::prelude::*; @@ -46,16 +46,23 @@ use bitcoin::hashes::sha256::Hash as Sha256; use bitcoin::hashes::sha256::HashEngine as Sha256Engine; use bitcoin::hashes::{HashEngine, Hash}; -/// Handler for BOLT1-compliant messages. +/// A handler provided to [`PeerManager`] for reading and handling custom messages. +/// +/// [BOLT 1] specifies a custom message type range for use with experimental or application-specific +/// messages. `CustomMessageHandler` allows for user-defined handling of such types. See the +/// [`lightning_custom_message`] crate for tools useful in composing more than one custom handler. +/// +/// [BOLT 1]: https://github.com/lightning/bolts/blob/master/01-messaging.md +/// [`lightning_custom_message`]: https://docs.rs/lightning_custom_message/latest/lightning_custom_message pub trait CustomMessageHandler: wire::CustomMessageReader { - /// Called with the message type that was received and the buffer to be read. - /// Can return a `MessageHandlingError` if the message could not be handled. + /// Handles the given message sent from `sender_node_id`, possibly producing messages for + /// [`CustomMessageHandler::get_and_clear_pending_msg`] to return and thus for [`PeerManager`] + /// to send. fn handle_custom_message(&self, msg: Self::CustomMessage, sender_node_id: &PublicKey) -> Result<(), LightningError>; - /// Gets the list of pending messages which were generated by the custom message - /// handler, clearing the list in the process. The first tuple element must - /// correspond to the intended recipients node ids. If no connection to one of the - /// specified node does not exist, the message is simply not sent to it. + /// Returns the list of pending messages that were generated by the handler, clearing the list + /// in the process. Each message is paired with the node id of the intended recipient. If no + /// connection to the node exists, then the message is simply not sent. fn get_and_clear_pending_msg(&self) -> Vec<(PublicKey, Self::CustomMessage)>; } @@ -72,7 +79,7 @@ impl RoutingMessageHandler for IgnoringMessageHandler { fn get_next_channel_announcement(&self, _starting_point: u64) -> Option<(msgs::ChannelAnnouncement, Option, Option)> { None } fn get_next_node_announcement(&self, _starting_point: Option<&NodeId>) -> Option { None } - fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init) -> Result<(), ()> { Ok(()) } + fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init, _inbound: bool) -> Result<(), ()> { Ok(()) } fn handle_reply_channel_range(&self, _their_node_id: &PublicKey, _msg: msgs::ReplyChannelRange) -> Result<(), LightningError> { Ok(()) } fn handle_reply_short_channel_ids_end(&self, _their_node_id: &PublicKey, _msg: msgs::ReplyShortChannelIdsEnd) -> Result<(), LightningError> { Ok(()) } fn handle_query_channel_range(&self, _their_node_id: &PublicKey, _msg: msgs::QueryChannelRange) -> Result<(), LightningError> { Ok(()) } @@ -81,14 +88,15 @@ impl RoutingMessageHandler for IgnoringMessageHandler { fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { InitFeatures::empty() } + fn processing_queue_high(&self) -> bool { false } } impl OnionMessageProvider for IgnoringMessageHandler { fn next_onion_message_for_peer(&self, _peer_node_id: PublicKey) -> Option { None } } impl OnionMessageHandler for IgnoringMessageHandler { fn handle_onion_message(&self, _their_node_id: &PublicKey, _msg: &msgs::OnionMessage) {} - fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init) -> Result<(), ()> { Ok(()) } - fn peer_disconnected(&self, _their_node_id: &PublicKey, _no_connection_possible: bool) {} + fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init, _inbound: bool) -> Result<(), ()> { Ok(()) } + fn peer_disconnected(&self, _their_node_id: &PublicKey) {} fn provided_node_features(&self) -> NodeFeatures { NodeFeatures::empty() } fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { InitFeatures::empty() @@ -222,8 +230,8 @@ impl ChannelMessageHandler for ErroringMessageHandler { } // msgs::ChannelUpdate does not contain the channel_id field, so we just drop them. fn handle_channel_update(&self, _their_node_id: &PublicKey, _msg: &msgs::ChannelUpdate) {} - fn peer_disconnected(&self, _their_node_id: &PublicKey, _no_connection_possible: bool) {} - fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init) -> Result<(), ()> { Ok(()) } + fn peer_disconnected(&self, _their_node_id: &PublicKey) {} + fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init, _inbound: bool) -> Result<(), ()> { Ok(()) } fn handle_error(&self, _their_node_id: &PublicKey, _msg: &msgs::ErrorMessage) {} fn provided_node_features(&self) -> NodeFeatures { NodeFeatures::empty() } fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { @@ -276,7 +284,7 @@ pub struct MessageHandler where /// to a remote host. You will need to be able to generate multiple of these which meet Eq and /// implement Hash to meet the PeerManager API. /// -/// For efficiency, Clone should be relatively cheap for this type. +/// For efficiency, [`Clone`] should be relatively cheap for this type. /// /// Two descriptors may compare equal (by [`cmp::Eq`] and [`hash::Hash`]) as long as the original /// has been disconnected, the [`PeerManager`] has been informed of the disconnection (either by it @@ -314,16 +322,7 @@ pub trait SocketDescriptor : cmp::Eq + hash::Hash + Clone { /// generate no further read_event/write_buffer_space_avail/socket_disconnected calls for the /// descriptor. #[derive(Clone)] -pub struct PeerHandleError { - /// Used to indicate that we probably can't make any future connections to this peer (e.g. - /// because we required features that our peer was missing, or vice versa). - /// - /// While LDK's [`ChannelManager`] will not do it automatically, you likely wish to force-close - /// any channels with this peer or check for new versions of LDK. - /// - /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager - pub no_connection_possible: bool, -} +pub struct PeerHandleError { } impl fmt::Debug for PeerHandleError { fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { formatter.write_str("Peer Sent Invalid Data") @@ -392,6 +391,12 @@ struct Peer { /// We cache a `NodeId` here to avoid serializing peers' keys every time we forward gossip /// messages in `PeerManager`. Use `Peer::set_their_node_id` to modify this field. their_node_id: Option<(PublicKey, NodeId)>, + /// The features provided in the peer's [`msgs::Init`] message. + /// + /// This is set only after we've processed the [`msgs::Init`] message and called relevant + /// `peer_connected` handler methods. Thus, this field is set *iff* we've finished our + /// handshake and can talk to this peer normally (though use [`Peer::handshake_complete`] to + /// check this. their_features: Option, their_net_address: Option, @@ -414,9 +419,24 @@ struct Peer { awaiting_pong_timer_tick_intervals: i8, received_message_since_timer_tick: bool, sent_gossip_timestamp_filter: bool, + + /// Indicates we've received a `channel_announcement` since the last time we had + /// [`PeerManager::gossip_processing_backlogged`] set (or, really, that we've received a + /// `channel_announcement` at all - we set this unconditionally but unset it every time we + /// check if we're gossip-processing-backlogged). + received_channel_announce_since_backlogged: bool, + + inbound_connection: bool, } impl Peer { + /// True after we've processed the [`msgs::Init`] message and called relevant `peer_connected` + /// handler methods. Thus, this implies we've finished our handshake and can talk to this peer + /// normally. + fn handshake_complete(&self) -> bool { + self.their_features.is_some() + } + /// Returns true if the channel announcements/updates for the given channel should be /// forwarded to this peer. /// If we are sending our routing table to this peer and we have not yet sent channel @@ -424,6 +444,7 @@ impl Peer { /// point and we shouldn't send it yet to avoid sending duplicate updates. If we've already /// sent the old versions, we should send the update, and so return true here. fn should_forward_channel_announcement(&self, channel_id: u64) -> bool { + if !self.handshake_complete() { return false; } if self.their_features.as_ref().unwrap().supports_gossip_queries() && !self.sent_gossip_timestamp_filter { return false; @@ -437,6 +458,7 @@ impl Peer { /// Similar to the above, but for node announcements indexed by node_id. fn should_forward_node_announcement(&self, node_id: NodeId) -> bool { + if !self.handshake_complete() { return false; } if self.their_features.as_ref().unwrap().supports_gossip_queries() && !self.sent_gossip_timestamp_filter { return false; @@ -450,8 +472,12 @@ impl Peer { /// Returns whether we should be reading bytes from this peer, based on whether its outbound /// buffer still has space and we don't need to pause reads to get some writes out. - fn should_read(&self) -> bool { - self.pending_outbound_buffer.len() < OUTBOUND_BUFFER_LIMIT_READ_PAUSE + fn should_read(&mut self, gossip_processing_backlogged: bool) -> bool { + if !gossip_processing_backlogged { + self.received_channel_announce_since_backlogged = false; + } + self.pending_outbound_buffer.len() < OUTBOUND_BUFFER_LIMIT_READ_PAUSE && + (!gossip_processing_backlogged || !self.received_channel_announce_since_backlogged) } /// Determines if we should push additional gossip background sync (aka "backfill") onto a peer's @@ -459,19 +485,20 @@ impl Peer { fn should_buffer_gossip_backfill(&self) -> bool { self.pending_outbound_buffer.is_empty() && self.gossip_broadcast_buffer.is_empty() && self.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK + && self.handshake_complete() } /// Determines if we should push an onion message onto a peer's outbound buffer. This is checked /// every time the peer's buffer may have been drained. fn should_buffer_onion_message(&self) -> bool { - self.pending_outbound_buffer.is_empty() + self.pending_outbound_buffer.is_empty() && self.handshake_complete() && self.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK } /// Determines if we should push additional gossip broadcast messages onto a peer's outbound /// buffer. This is checked every time the peer's buffer may have been drained. fn should_buffer_gossip_broadcast(&self) -> bool { - self.pending_outbound_buffer.is_empty() + self.pending_outbound_buffer.is_empty() && self.handshake_complete() && self.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK } @@ -495,7 +522,7 @@ impl Peer { /// SimpleRefPeerManager is the more appropriate type. Defining these type aliases prevents /// issues such as overly long function definitions. /// -/// (C-not exported) as `Arc`s don't make sense in bindings. +/// This is not exported to bindings users as `Arc`s don't make sense in bindings. pub type SimpleArcPeerManager = PeerManager>, Arc>>, Arc, Arc>>, Arc>, Arc, IgnoringMessageHandler, Arc>; /// SimpleRefPeerManager is a type alias for a PeerManager reference, and is the reference @@ -505,7 +532,7 @@ pub type SimpleArcPeerManager = PeerManager = PeerManager, &'f P2PGossipSync<&'g NetworkGraph<&'f L>, &'h C, &'f L>, &'i SimpleRefOnionMessenger<'j, 'k, L>, &'f L, IgnoringMessageHandler, &'c KeysManager>; /// A PeerManager manages a set of peers, described by their [`SocketDescriptor`] and marshalls @@ -520,10 +547,10 @@ pub type SimpleRefPeerManager<'a, 'b, 'c, 'd, 'e, 'f, 'g, 'h, 'i, 'j, 'k, 'l, 'm /// [`PeerManager`] functions related to the same connection must occur only in serial, making new /// calls only after previous ones have returned. /// -/// Rather than using a plain PeerManager, it is preferable to use either a SimpleArcPeerManager -/// a SimpleRefPeerManager, for conciseness. See their documentation for more details, but -/// essentially you should default to using a SimpleRefPeerManager, and use a -/// SimpleArcPeerManager when you require a PeerManager with a static lifetime, such as when +/// Rather than using a plain [`PeerManager`], it is preferable to use either a [`SimpleArcPeerManager`] +/// a [`SimpleRefPeerManager`], for conciseness. See their documentation for more details, but +/// essentially you should default to using a [`SimpleRefPeerManager`], and use a +/// [`SimpleArcPeerManager`] when you require a `PeerManager` with a static lifetime, such as when /// you're using lightning-net-tokio. /// /// [`read_event`]: PeerManager::read_event @@ -568,6 +595,9 @@ pub struct PeerManager Pe /// `OnionMessageHandler`. No routing message handler is used and network graph messages are /// ignored. /// - /// ephemeral_random_data is used to derive per-connection ephemeral keys and must be + /// `ephemeral_random_data` is used to derive per-connection ephemeral keys and must be /// cryptographically secure random bytes. /// /// `current_time` is used as an always-increasing counter that survives across restarts and is @@ -616,7 +646,7 @@ impl Pe /// timestamp, however if it is not available a persistent counter that increases once per /// minute should suffice. /// - /// (C-not exported) as we can't export a PeerManager with a dummy route handler + /// This is not exported to bindings users as we can't export a PeerManager with a dummy route handler pub fn new_channel_only(channel_message_handler: CM, onion_message_handler: OM, current_time: u32, ephemeral_random_data: &[u8; 32], logger: L, node_signer: NS) -> Self { Self::new(MessageHandler { chan_handler: channel_message_handler, @@ -640,10 +670,10 @@ impl PeerManager Self { Self::new(MessageHandler { chan_handler: ErroringMessageHandler::new(), @@ -702,8 +732,9 @@ impl) -> Result, PeerHandleError> { let mut peer_encryptor = PeerChannelEncryptor::new_outbound(their_node_id.clone(), self.get_ephemeral_key()); let res = peer_encryptor.get_act_one(&self.secp_ctx).to_vec(); let pending_read_buffer = [0; 50].to_vec(); // Noise act two is 50 bytes let mut peers = self.peers.write().unwrap(); - if peers.insert(descriptor, Mutex::new(Peer { - channel_encryptor: peer_encryptor, - their_node_id: None, - their_features: None, - their_net_address: remote_network_address, - - pending_outbound_buffer: LinkedList::new(), - pending_outbound_buffer_first_msg_offset: 0, - gossip_broadcast_buffer: LinkedList::new(), - awaiting_write_event: false, - - pending_read_buffer, - pending_read_buffer_pos: 0, - pending_read_is_header: false, - - sync_status: InitSyncTracker::NoSyncRequested, - - msgs_sent_since_pong: 0, - awaiting_pong_timer_tick_intervals: 0, - received_message_since_timer_tick: false, - sent_gossip_timestamp_filter: false, - })).is_some() { - panic!("PeerManager driver duplicated descriptors!"); - }; - Ok(res) + match peers.entry(descriptor) { + hash_map::Entry::Occupied(_) => { + debug_assert!(false, "PeerManager driver duplicated descriptors!"); + Err(PeerHandleError {}) + }, + hash_map::Entry::Vacant(e) => { + e.insert(Mutex::new(Peer { + channel_encryptor: peer_encryptor, + their_node_id: None, + their_features: None, + their_net_address: remote_network_address, + + pending_outbound_buffer: LinkedList::new(), + pending_outbound_buffer_first_msg_offset: 0, + gossip_broadcast_buffer: LinkedList::new(), + awaiting_write_event: false, + + pending_read_buffer, + pending_read_buffer_pos: 0, + pending_read_is_header: false, + + sync_status: InitSyncTracker::NoSyncRequested, + + msgs_sent_since_pong: 0, + awaiting_pong_timer_tick_intervals: 0, + received_message_since_timer_tick: false, + sent_gossip_timestamp_filter: false, + + received_channel_announce_since_backlogged: false, + inbound_connection: false, + })); + Ok(res) + } + } } /// Indicates a new inbound connection has been established to a node with an optional remote @@ -823,42 +864,64 @@ impl) -> Result<(), PeerHandleError> { let peer_encryptor = PeerChannelEncryptor::new_inbound(&self.node_signer); let pending_read_buffer = [0; 50].to_vec(); // Noise act one is 50 bytes let mut peers = self.peers.write().unwrap(); - if peers.insert(descriptor, Mutex::new(Peer { - channel_encryptor: peer_encryptor, - their_node_id: None, - their_features: None, - their_net_address: remote_network_address, - - pending_outbound_buffer: LinkedList::new(), - pending_outbound_buffer_first_msg_offset: 0, - gossip_broadcast_buffer: LinkedList::new(), - awaiting_write_event: false, - - pending_read_buffer, - pending_read_buffer_pos: 0, - pending_read_is_header: false, - - sync_status: InitSyncTracker::NoSyncRequested, - - msgs_sent_since_pong: 0, - awaiting_pong_timer_tick_intervals: 0, - received_message_since_timer_tick: false, - sent_gossip_timestamp_filter: false, - })).is_some() { - panic!("PeerManager driver duplicated descriptors!"); - }; - Ok(()) + match peers.entry(descriptor) { + hash_map::Entry::Occupied(_) => { + debug_assert!(false, "PeerManager driver duplicated descriptors!"); + Err(PeerHandleError {}) + }, + hash_map::Entry::Vacant(e) => { + e.insert(Mutex::new(Peer { + channel_encryptor: peer_encryptor, + their_node_id: None, + their_features: None, + their_net_address: remote_network_address, + + pending_outbound_buffer: LinkedList::new(), + pending_outbound_buffer_first_msg_offset: 0, + gossip_broadcast_buffer: LinkedList::new(), + awaiting_write_event: false, + + pending_read_buffer, + pending_read_buffer_pos: 0, + pending_read_is_header: false, + + sync_status: InitSyncTracker::NoSyncRequested, + + msgs_sent_since_pong: 0, + awaiting_pong_timer_tick_intervals: 0, + received_message_since_timer_tick: false, + sent_gossip_timestamp_filter: false, + + received_channel_announce_since_backlogged: false, + inbound_connection: true, + })); + Ok(()) + } + } + } + + fn peer_should_read(&self, peer: &mut Peer) -> bool { + peer.should_read(self.gossip_processing_backlogged.load(Ordering::Relaxed)) } - fn do_attempt_write_data(&self, descriptor: &mut Descriptor, peer: &mut Peer) { + fn update_gossip_backlogged(&self) { + let new_state = self.message_handler.route_handler.processing_queue_high(); + let prev_state = self.gossip_processing_backlogged.swap(new_state, Ordering::Relaxed); + if prev_state && !new_state { + self.gossip_processing_backlog_lifted.store(true, Ordering::Relaxed); + } + } + + fn do_attempt_write_data(&self, descriptor: &mut Descriptor, peer: &mut Peer, force_one_write: bool) { + let mut have_written = false; while !peer.awaiting_write_event { if peer.should_buffer_onion_message() { if let Some((peer_node_id, _)) = peer.their_node_id { @@ -915,13 +978,23 @@ impl return, + None => { + if force_one_write && !have_written { + if should_read { + let data_sent = descriptor.send_data(&[], should_read); + debug_assert_eq!(data_sent, 0, "Can't write more than no data"); + } + } + return + }, Some(buff) => buff, }; let pending = &next_buff[peer.pending_outbound_buffer_first_msg_offset..]; - let data_sent = descriptor.send_data(pending, peer.should_read()); + let data_sent = descriptor.send_data(pending, should_read); + have_written = true; peer.pending_outbound_buffer_first_msg_offset += data_sent; if peer.pending_outbound_buffer_first_msg_offset == next_buff.len() { peer.pending_outbound_buffer_first_msg_offset = 0; @@ -939,7 +1012,7 @@ impl { let mut peer = peer_mutex.lock().unwrap(); peer.awaiting_write_event = false; - self.do_attempt_write_data(descriptor, &mut peer); + self.do_attempt_write_data(descriptor, &mut peer, false); } }; Ok(()) @@ -974,6 +1047,9 @@ impl Result { @@ -981,7 +1057,7 @@ impl Ok(res), Err(e) => { log_trace!(self.logger, "Peer sent invalid data or we decided to disconnect due to a protocol error"); - self.disconnect_event_internal(peer_descriptor, e.no_connection_possible); + self.disconnect_event_internal(peer_descriptor); Err(e) } } @@ -1014,7 +1090,7 @@ impl { let mut read_pos = 0; @@ -1028,7 +1104,7 @@ impl { //TODO: Try to push msg log_debug!(self.logger, "Error handling message{}; disconnecting peer with: {}", OptionalFromDebugger(&peer_node_id), e.err); - return Err(PeerHandleError{ no_connection_possible: false }); + return Err(PeerHandleError { }); }, msgs::ErrorAction::IgnoreAndLog(level) => { log_given_level!(self.logger, level, "Error handling message{}; ignoring: {}", OptionalFromDebugger(&peer_node_id), e.err); @@ -1078,10 +1154,14 @@ impl { match self.node_id_to_descriptor.lock().unwrap().entry(peer.their_node_id.unwrap().0) { - hash_map::Entry::Occupied(_) => { + hash_map::Entry::Occupied(e) => { log_trace!(self.logger, "Got second connection with {}, closing", log_pubkey!(peer.their_node_id.unwrap().0)); peer.their_node_id = None; // Unset so that we don't generate a peer_disconnected event - return Err(PeerHandleError{ no_connection_possible: false }) + // Check that the peers map is consistent with the + // node_id_to_descriptor map, as this has been broken + // before. + debug_assert!(peers.get(e.get()).is_some()); + return Err(PeerHandleError { }) }, hash_map::Entry::Vacant(entry) => { log_debug!(self.logger, "Finished noise handshake for connection with {}", log_pubkey!(peer.their_node_id.unwrap().0)); @@ -1138,7 +1218,7 @@ impl 8192 { peer.pending_read_buffer = Vec::new(); } peer.pending_read_buffer.resize(msg_len as usize + 16, 0); if msg_len < 2 { // Need at least the message type tag - return Err(PeerHandleError{ no_connection_possible: false }); + return Err(PeerHandleError { }); } peer.pending_read_is_header = false; } else { @@ -1181,19 +1261,19 @@ impl { log_gossip!(self.logger, "Received a message with an unknown required feature flag or TLV, you may want to update!"); self.enqueue_message(peer, &msgs::WarningMessage { channel_id: [0; 32], data: format!("Received an unknown required feature/TLV in message type {:?}", ty) }); - return Err(PeerHandleError { no_connection_possible: false }); + return Err(PeerHandleError { }); } - (msgs::DecodeError::UnknownVersion, _) => return Err(PeerHandleError { no_connection_possible: false }), + (msgs::DecodeError::UnknownVersion, _) => return Err(PeerHandleError { }), (msgs::DecodeError::InvalidValue, _) => { log_debug!(self.logger, "Got an invalid value while deserializing message"); - return Err(PeerHandleError { no_connection_possible: false }); + return Err(PeerHandleError { }); } (msgs::DecodeError::ShortRead, _) => { log_debug!(self.logger, "Deserialization failed due to shortness of message"); - return Err(PeerHandleError { no_connection_possible: false }); + return Err(PeerHandleError { }); } - (msgs::DecodeError::BadLengthDescriptor, _) => return Err(PeerHandleError { no_connection_possible: false }), - (msgs::DecodeError::Io(_), _) => return Err(PeerHandleError { no_connection_possible: false }), + (msgs::DecodeError::BadLengthDescriptor, _) => return Err(PeerHandleError { }), + (msgs::DecodeError::Io(_), _) => return Err(PeerHandleError { }), } } }; @@ -1203,7 +1283,7 @@ impl { @@ -1415,12 +1499,14 @@ impl MessageHandlingError { e.into() })? { should_forward = Some(wire::Message::ChannelAnnouncement(msg)); } + self.update_gossip_backlogged(); }, wire::Message::NodeAnnouncement(msg) => { if self.message_handler.route_handler.handle_node_announcement(&msg) .map_err(|e| -> MessageHandlingError { e.into() })? { should_forward = Some(wire::Message::NodeAnnouncement(msg)); } + self.update_gossip_backlogged(); }, wire::Message::ChannelUpdate(msg) => { self.message_handler.chan_handler.handle_channel_update(&their_node_id, &msg); @@ -1428,6 +1514,7 @@ impl MessageHandlingError { e.into() })? { should_forward = Some(wire::Message::ChannelUpdate(msg)); } + self.update_gossip_backlogged(); }, wire::Message::QueryShortChannelIds(msg) => { self.message_handler.route_handler.handle_query_short_channel_ids(&their_node_id, msg)?; @@ -1450,8 +1537,7 @@ impl { log_debug!(self.logger, "Received unknown even message of type {}, disconnecting peer!", type_id); - // Fail the channel if message is an even, unknown type as per BOLT #1. - return Err(PeerHandleError{ no_connection_possible: true }.into()); + return Err(PeerHandleError { }.into()); }, wire::Message::Unknown(type_id) => { log_trace!(self.logger, "Received unknown odd message of type {}, ignoring", type_id); @@ -1471,10 +1557,12 @@ impl match peers.get(&descriptor) { Some(peer_mutex) => { let peer_lock = peer_mutex.lock().unwrap(); - if peer_lock.their_features.is_none() { + if !peer_lock.handshake_complete() { continue; } peer_lock @@ -1722,10 +1817,12 @@ impl {}, } - match self.message_handler.route_handler.handle_channel_update(&update_msg) { - Ok(_) | Err(LightningError { action: msgs::ErrorAction::IgnoreDuplicateGossip, .. }) => - self.forward_broadcast_msg(peers, &wire::Message::ChannelUpdate(update_msg), None), - _ => {}, + if let Some(msg) = update_msg { + match self.message_handler.route_handler.handle_channel_update(&msg) { + Ok(_) | Err(LightningError { action: msgs::ErrorAction::IgnoreDuplicateGossip, .. }) => + self.forward_broadcast_msg(peers, &wire::Message::ChannelUpdate(msg), None), + _ => {}, + } } }, MessageSendEvent::BroadcastChannelUpdate { msg } => { @@ -1736,6 +1833,14 @@ impl {}, } }, + MessageSendEvent::BroadcastNodeAnnouncement { msg } => { + log_debug!(self.logger, "Handling BroadcastNodeAnnouncement event in peer_handler for node {}", msg.contents.node_id); + match self.message_handler.route_handler.handle_node_announcement(&msg) { + Ok(_) | Err(LightningError { action: msgs::ErrorAction::IgnoreDuplicateGossip, .. }) => + self.forward_broadcast_msg(peers, &wire::Message::NodeAnnouncement(msg), None), + _ => {}, + } + }, MessageSendEvent::SendChannelUpdate { ref node_id, ref msg } => { log_trace!(self.logger, "Handling SendChannelUpdate event in peer_handler for node {} for channel {}", log_pubkey!(node_id), msg.contents.short_channel_id); @@ -1797,7 +1902,9 @@ impl { let peer = peer_lock.lock().unwrap(); if let Some((node_id, _)) = peer.their_node_id { - log_trace!(self.logger, - "Handling disconnection of peer {}, with {}future connection to the peer possible.", - log_pubkey!(node_id), if no_connection_possible { "no " } else { "" }); - self.node_id_to_descriptor.lock().unwrap().remove(&node_id); - self.message_handler.chan_handler.peer_disconnected(&node_id, no_connection_possible); - self.message_handler.onion_message_handler.peer_disconnected(&node_id, no_connection_possible); + log_trace!(self.logger, "Handling disconnection of peer {}", log_pubkey!(node_id)); + let removed = self.node_id_to_descriptor.lock().unwrap().remove(&node_id); + debug_assert!(removed.is_some(), "descriptor maps should be consistent"); + if !peer.handshake_complete() { return; } + self.message_handler.chan_handler.peer_disconnected(&node_id); + self.message_handler.onion_message_handler.peer_disconnected(&node_id); } } }; @@ -1862,21 +1982,17 @@ impl 0 && !peer.received_message_since_timer_tick) + || peer.awaiting_pong_timer_tick_intervals as u64 > + MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peers_lock.len() as u64 + { + descriptors_needing_disconnect.push(descriptor.clone()); + break; + } peer.received_message_since_timer_tick = false; - continue; - } - if (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) - || peer.awaiting_pong_timer_tick_intervals as u64 > - MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peers_lock.len() as u64 - { - descriptors_needing_disconnect.push(descriptor.clone()); - continue; - } - peer.received_message_since_timer_tick = false; + if peer.awaiting_pong_timer_tick_intervals > 0 { + peer.awaiting_pong_timer_tick_intervals += 1; + break; + } - if peer.awaiting_pong_timer_tick_intervals > 0 { - peer.awaiting_pong_timer_tick_intervals += 1; - continue; + peer.awaiting_pong_timer_tick_intervals = 1; + let ping = msgs::Ping { + ponglen: 0, + byteslen: 64, + }; + self.enqueue_message(&mut *peer, &ping); + break; } - - peer.awaiting_pong_timer_tick_intervals = 1; - let ping = msgs::Ping { - ponglen: 0, - byteslen: 64, - }; - self.enqueue_message(&mut *peer, &ping); - self.do_attempt_write_data(&mut (descriptor.clone()), &mut *peer); + self.do_attempt_write_data(&mut (descriptor.clone()), &mut *peer, flush_read_disabled); } } if !descriptors_needing_disconnect.is_empty() { { let mut peers_lock = self.peers.write().unwrap(); - for descriptor in descriptors_needing_disconnect.iter() { - if let Some(peer) = peers_lock.remove(descriptor) { - if let Some((node_id, _)) = peer.lock().unwrap().their_node_id { - log_trace!(self.logger, "Disconnecting peer with id {} due to ping timeout", node_id); + for descriptor in descriptors_needing_disconnect { + if let Some(peer_mutex) = peers_lock.remove(&descriptor) { + let peer = peer_mutex.lock().unwrap(); + if let Some((node_id, _)) = peer.their_node_id { self.node_id_to_descriptor.lock().unwrap().remove(&node_id); - self.message_handler.chan_handler.peer_disconnected(&node_id, false); - self.message_handler.onion_message_handler.peer_disconnected(&node_id, false); } + self.do_disconnect(descriptor, &*peer, "ping timeout"); } } } - - for mut descriptor in descriptors_needing_disconnect.drain(..) { - descriptor.disconnect_socket(); - } } } @@ -2037,7 +2153,9 @@ impl bool { #[cfg(test)] mod tests { use crate::chain::keysinterface::{NodeSigner, Recipient}; + use crate::events; + use crate::ln::peer_channel_encryptor::PeerChannelEncryptor; use crate::ln::peer_handler::{PeerManager, MessageHandler, SocketDescriptor, IgnoringMessageHandler, filter_addresses}; use crate::ln::{msgs, wire}; use crate::ln::msgs::NetAddress; - use crate::util::events; use crate::util::test_utils; use bitcoin::secp256k1::SecretKey; use crate::prelude::*; use crate::sync::{Arc, Mutex}; - use core::sync::atomic::Ordering; + use core::sync::atomic::{AtomicBool, Ordering}; #[derive(Clone)] struct FileDescriptor { fd: u16, outbound_data: Arc>>, + disconnect: Arc, } impl PartialEq for FileDescriptor { fn eq(&self, other: &Self) -> bool { @@ -2113,7 +2233,7 @@ mod tests { data.len() } - fn disconnect_socket(&mut self) {} + fn disconnect_socket(&mut self) { self.disconnect.store(true, Ordering::Release); } } struct PeerManagerCfg { @@ -2154,10 +2274,16 @@ mod tests { fn establish_connection<'a>(peer_a: &PeerManager, peer_b: &PeerManager) -> (FileDescriptor, FileDescriptor) { let id_a = peer_a.node_signer.get_node_id(Recipient::Node).unwrap(); - let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let mut fd_a = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; let addr_a = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1000}; let id_b = peer_b.node_signer.get_node_id(Recipient::Node).unwrap(); - let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let mut fd_b = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; let addr_b = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1001}; let initial_data = peer_b.new_outbound_connection(id_a, fd_b.clone(), Some(addr_a.clone())).unwrap(); peer_a.new_inbound_connection(fd_a.clone(), Some(addr_b.clone())).unwrap(); @@ -2181,24 +2307,100 @@ mod tests { (fd_a.clone(), fd_b.clone()) } + #[test] + #[cfg(feature = "std")] + fn fuzz_threaded_connections() { + // Spawn two threads which repeatedly connect two peers together, leading to "got second + // connection with peer" disconnections and rapid reconnect. This previously found an issue + // with our internal map consistency, and is a generally good smoke test of disconnection. + let cfgs = Arc::new(create_peermgr_cfgs(2)); + // Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }. + let peers = Arc::new(create_network(2, unsafe { &*(&*cfgs as *const _) as &'static _ })); + + let start_time = std::time::Instant::now(); + macro_rules! spawn_thread { ($id: expr) => { { + let peers = Arc::clone(&peers); + let cfgs = Arc::clone(&cfgs); + std::thread::spawn(move || { + let mut ctr = 0; + while start_time.elapsed() < std::time::Duration::from_secs(1) { + let id_a = peers[0].node_signer.get_node_id(Recipient::Node).unwrap(); + let mut fd_a = FileDescriptor { + fd: $id + ctr * 3, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_a = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1000}; + let mut fd_b = FileDescriptor { + fd: $id + ctr * 3, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_b = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1001}; + let initial_data = peers[1].new_outbound_connection(id_a, fd_b.clone(), Some(addr_a.clone())).unwrap(); + peers[0].new_inbound_connection(fd_a.clone(), Some(addr_b.clone())).unwrap(); + if peers[0].read_event(&mut fd_a, &initial_data).is_err() { break; } + + while start_time.elapsed() < std::time::Duration::from_secs(1) { + peers[0].process_events(); + if fd_a.disconnect.load(Ordering::Acquire) { break; } + let a_data = fd_a.outbound_data.lock().unwrap().split_off(0); + if peers[1].read_event(&mut fd_b, &a_data).is_err() { break; } + + peers[1].process_events(); + if fd_b.disconnect.load(Ordering::Acquire) { break; } + let b_data = fd_b.outbound_data.lock().unwrap().split_off(0); + if peers[0].read_event(&mut fd_a, &b_data).is_err() { break; } + + cfgs[0].chan_handler.pending_events.lock().unwrap() + .push(crate::events::MessageSendEvent::SendShutdown { + node_id: peers[1].node_signer.get_node_id(Recipient::Node).unwrap(), + msg: msgs::Shutdown { + channel_id: [0; 32], + scriptpubkey: bitcoin::Script::new(), + }, + }); + cfgs[1].chan_handler.pending_events.lock().unwrap() + .push(crate::events::MessageSendEvent::SendShutdown { + node_id: peers[0].node_signer.get_node_id(Recipient::Node).unwrap(), + msg: msgs::Shutdown { + channel_id: [0; 32], + scriptpubkey: bitcoin::Script::new(), + }, + }); + + if ctr % 2 == 0 { + peers[0].timer_tick_occurred(); + peers[1].timer_tick_occurred(); + } + } + + peers[0].socket_disconnected(&fd_a); + peers[1].socket_disconnected(&fd_b); + ctr += 1; + std::thread::sleep(std::time::Duration::from_micros(1)); + } + }) + } } } + let thrd_a = spawn_thread!(1); + let thrd_b = spawn_thread!(2); + + thrd_a.join().unwrap(); + thrd_b.join().unwrap(); + } + #[test] fn test_disconnect_peer() { // Simple test which builds a network of PeerManager, connects and brings them to NoiseState::Finished and // push a DisconnectPeer event to remove the node flagged by id let cfgs = create_peermgr_cfgs(2); - let chan_handler = test_utils::TestChannelMessageHandler::new(); - let mut peers = create_network(2, &cfgs); + let peers = create_network(2, &cfgs); establish_connection(&peers[0], &peers[1]); assert_eq!(peers[0].peers.read().unwrap().len(), 1); let their_id = peers[1].node_signer.get_node_id(Recipient::Node).unwrap(); - - chan_handler.pending_events.lock().unwrap().push(events::MessageSendEvent::HandleError { + cfgs[0].chan_handler.pending_events.lock().unwrap().push(events::MessageSendEvent::HandleError { node_id: their_id, action: msgs::ErrorAction::DisconnectPeer { msg: None }, }); - assert_eq!(chan_handler.pending_events.lock().unwrap().len(), 1); - peers[0].message_handler.chan_handler = &chan_handler; peers[0].process_events(); assert_eq!(peers[0].peers.read().unwrap().len(), 0); @@ -2232,6 +2434,38 @@ mod tests { assert_eq!(peers[1].read_event(&mut fd_b, &a_data).unwrap(), false); } + #[test] + fn test_non_init_first_msg() { + // Simple test of the first message received over a connection being something other than + // Init. This results in an immediate disconnection, which previously included a spurious + // peer_disconnected event handed to event handlers (which would panic in + // `TestChannelMessageHandler` here). + let cfgs = create_peermgr_cfgs(2); + let peers = create_network(2, &cfgs); + + let mut fd_dup = FileDescriptor { + fd: 3, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_dup = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1003}; + let id_a = cfgs[0].node_signer.get_node_id(Recipient::Node).unwrap(); + peers[0].new_inbound_connection(fd_dup.clone(), Some(addr_dup.clone())).unwrap(); + + let mut dup_encryptor = PeerChannelEncryptor::new_outbound(id_a, SecretKey::from_slice(&[42; 32]).unwrap()); + let initial_data = dup_encryptor.get_act_one(&peers[1].secp_ctx); + assert_eq!(peers[0].read_event(&mut fd_dup, &initial_data).unwrap(), false); + peers[0].process_events(); + + let a_data = fd_dup.outbound_data.lock().unwrap().split_off(0); + let (act_three, _) = + dup_encryptor.process_act_two(&a_data[..], &&cfgs[1].node_signer).unwrap(); + assert_eq!(peers[0].read_event(&mut fd_dup, &act_three).unwrap(), false); + + let not_init_msg = msgs::Ping { ponglen: 4, byteslen: 0 }; + let msg_bytes = dup_encryptor.encrypt_message(¬_init_msg); + assert!(peers[0].read_event(&mut fd_dup, &msg_bytes).is_err()); + } + #[test] fn test_disconnect_all_peer() { // Simple test which builds a network of PeerManager, connects and brings them to NoiseState::Finished and @@ -2316,8 +2550,14 @@ mod tests { let peers = create_network(2, &cfgs); let a_id = peers[0].node_signer.get_node_id(Recipient::Node).unwrap(); - let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; - let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let mut fd_a = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let mut fd_b = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; let initial_data = peers[1].new_outbound_connection(a_id, fd_b.clone(), None).unwrap(); peers[0].new_inbound_connection(fd_a.clone(), None).unwrap();