X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning%2Fsrc%2Fln%2Fpeer_handler.rs;h=edc9c68a5068c25b419a5e21dc16a4cb7d850ac9;hb=2d213a47bffe2f3d9b744fe64e0001d90a4bf8fa;hp=f593e23c8c6c0fd5054955a38db6dd13ec7effcb;hpb=1fd95496d1038352535a74065d2b06559e2192fb;p=rust-lightning diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index f593e23c..edc9c68a 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -46,16 +46,23 @@ use bitcoin::hashes::sha256::Hash as Sha256; use bitcoin::hashes::sha256::HashEngine as Sha256Engine; use bitcoin::hashes::{HashEngine, Hash}; -/// Handler for BOLT1-compliant messages. +/// A handler provided to [`PeerManager`] for reading and handling custom messages. +/// +/// [BOLT 1] specifies a custom message type range for use with experimental or application-specific +/// messages. `CustomMessageHandler` allows for user-defined handling of such types. See the +/// [`lightning_custom_message`] crate for tools useful in composing more than one custom handler. +/// +/// [BOLT 1]: https://github.com/lightning/bolts/blob/master/01-messaging.md +/// [`lightning_custom_message`]: https://docs.rs/lightning_custom_message/latest/lightning_custom_message pub trait CustomMessageHandler: wire::CustomMessageReader { - /// Called with the message type that was received and the buffer to be read. - /// Can return a `MessageHandlingError` if the message could not be handled. + /// Handles the given message sent from `sender_node_id`, possibly producing messages for + /// [`CustomMessageHandler::get_and_clear_pending_msg`] to return and thus for [`PeerManager`] + /// to send. fn handle_custom_message(&self, msg: Self::CustomMessage, sender_node_id: &PublicKey) -> Result<(), LightningError>; - /// Gets the list of pending messages which were generated by the custom message - /// handler, clearing the list in the process. The first tuple element must - /// correspond to the intended recipients node ids. If no connection to one of the - /// specified node does not exist, the message is simply not sent to it. + /// Returns the list of pending messages that were generated by the handler, clearing the list + /// in the process. Each message is paired with the node id of the intended recipient. If no + /// connection to the node exists, then the message is simply not sent. fn get_and_clear_pending_msg(&self) -> Vec<(PublicKey, Self::CustomMessage)>; } @@ -71,8 +78,8 @@ impl RoutingMessageHandler for IgnoringMessageHandler { fn handle_channel_update(&self, _msg: &msgs::ChannelUpdate) -> Result { Ok(false) } fn get_next_channel_announcement(&self, _starting_point: u64) -> Option<(msgs::ChannelAnnouncement, Option, Option)> { None } - fn get_next_node_announcement(&self, _starting_point: Option<&PublicKey>) -> Option { None } - fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init) -> Result<(), ()> { Ok(()) } + fn get_next_node_announcement(&self, _starting_point: Option<&NodeId>) -> Option { None } + fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init, _inbound: bool) -> Result<(), ()> { Ok(()) } fn handle_reply_channel_range(&self, _their_node_id: &PublicKey, _msg: msgs::ReplyChannelRange) -> Result<(), LightningError> { Ok(()) } fn handle_reply_short_channel_ids_end(&self, _their_node_id: &PublicKey, _msg: msgs::ReplyShortChannelIdsEnd) -> Result<(), LightningError> { Ok(()) } fn handle_query_channel_range(&self, _their_node_id: &PublicKey, _msg: msgs::QueryChannelRange) -> Result<(), LightningError> { Ok(()) } @@ -81,14 +88,15 @@ impl RoutingMessageHandler for IgnoringMessageHandler { fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { InitFeatures::empty() } + fn processing_queue_high(&self) -> bool { false } } impl OnionMessageProvider for IgnoringMessageHandler { fn next_onion_message_for_peer(&self, _peer_node_id: PublicKey) -> Option { None } } impl OnionMessageHandler for IgnoringMessageHandler { fn handle_onion_message(&self, _their_node_id: &PublicKey, _msg: &msgs::OnionMessage) {} - fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init) -> Result<(), ()> { Ok(()) } - fn peer_disconnected(&self, _their_node_id: &PublicKey, _no_connection_possible: bool) {} + fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init, _inbound: bool) -> Result<(), ()> { Ok(()) } + fn peer_disconnected(&self, _their_node_id: &PublicKey) {} fn provided_node_features(&self) -> NodeFeatures { NodeFeatures::empty() } fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { InitFeatures::empty() @@ -222,8 +230,8 @@ impl ChannelMessageHandler for ErroringMessageHandler { } // msgs::ChannelUpdate does not contain the channel_id field, so we just drop them. fn handle_channel_update(&self, _their_node_id: &PublicKey, _msg: &msgs::ChannelUpdate) {} - fn peer_disconnected(&self, _their_node_id: &PublicKey, _no_connection_possible: bool) {} - fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init) -> Result<(), ()> { Ok(()) } + fn peer_disconnected(&self, _their_node_id: &PublicKey) {} + fn peer_connected(&self, _their_node_id: &PublicKey, _init: &msgs::Init, _inbound: bool) -> Result<(), ()> { Ok(()) } fn handle_error(&self, _their_node_id: &PublicKey, _msg: &msgs::ErrorMessage) {} fn provided_node_features(&self) -> NodeFeatures { NodeFeatures::empty() } fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { @@ -314,16 +322,7 @@ pub trait SocketDescriptor : cmp::Eq + hash::Hash + Clone { /// generate no further read_event/write_buffer_space_avail/socket_disconnected calls for the /// descriptor. #[derive(Clone)] -pub struct PeerHandleError { - /// Used to indicate that we probably can't make any future connections to this peer (e.g. - /// because we required features that our peer was missing, or vice versa). - /// - /// While LDK's [`ChannelManager`] will not do it automatically, you likely wish to force-close - /// any channels with this peer or check for new versions of LDK. - /// - /// [`ChannelManager`]: crate::ln::channelmanager::ChannelManager - pub no_connection_possible: bool, -} +pub struct PeerHandleError { } impl fmt::Debug for PeerHandleError { fn fmt(&self, formatter: &mut fmt::Formatter) -> Result<(), fmt::Error> { formatter.write_str("Peer Sent Invalid Data") @@ -345,7 +344,7 @@ impl error::Error for PeerHandleError { enum InitSyncTracker{ NoSyncRequested, ChannelsSyncing(u64), - NodesSyncing(PublicKey), + NodesSyncing(NodeId), } /// The ratio between buffer sizes at which we stop sending initial sync messages vs when we stop @@ -389,7 +388,15 @@ const BUFFER_DRAIN_MSGS_PER_TICK: usize = 32; struct Peer { channel_encryptor: PeerChannelEncryptor, - their_node_id: Option, + /// We cache a `NodeId` here to avoid serializing peers' keys every time we forward gossip + /// messages in `PeerManager`. Use `Peer::set_their_node_id` to modify this field. + their_node_id: Option<(PublicKey, NodeId)>, + /// The features provided in the peer's [`msgs::Init`] message. + /// + /// This is set only after we've processed the [`msgs::Init`] message and called relevant + /// `peer_connected` handler methods. Thus, this field is set *iff* we've finished our + /// handshake and can talk to this peer normally (though use [`Peer::handshake_complete`] to + /// check this. their_features: Option, their_net_address: Option, @@ -412,9 +419,24 @@ struct Peer { awaiting_pong_timer_tick_intervals: i8, received_message_since_timer_tick: bool, sent_gossip_timestamp_filter: bool, + + /// Indicates we've received a `channel_announcement` since the last time we had + /// [`PeerManager::gossip_processing_backlogged`] set (or, really, that we've received a + /// `channel_announcement` at all - we set this unconditionally but unset it every time we + /// check if we're gossip-processing-backlogged). + received_channel_announce_since_backlogged: bool, + + inbound_connection: bool, } impl Peer { + /// True after we've processed the [`msgs::Init`] message and called relevant `peer_connected` + /// handler methods. Thus, this implies we've finished our handshake and can talk to this peer + /// normally. + fn handshake_complete(&self) -> bool { + self.their_features.is_some() + } + /// Returns true if the channel announcements/updates for the given channel should be /// forwarded to this peer. /// If we are sending our routing table to this peer and we have not yet sent channel @@ -422,6 +444,7 @@ impl Peer { /// point and we shouldn't send it yet to avoid sending duplicate updates. If we've already /// sent the old versions, we should send the update, and so return true here. fn should_forward_channel_announcement(&self, channel_id: u64) -> bool { + if !self.handshake_complete() { return false; } if self.their_features.as_ref().unwrap().supports_gossip_queries() && !self.sent_gossip_timestamp_filter { return false; @@ -434,7 +457,8 @@ impl Peer { } /// Similar to the above, but for node announcements indexed by node_id. - fn should_forward_node_announcement(&self, node_id: PublicKey) -> bool { + fn should_forward_node_announcement(&self, node_id: NodeId) -> bool { + if !self.handshake_complete() { return false; } if self.their_features.as_ref().unwrap().supports_gossip_queries() && !self.sent_gossip_timestamp_filter { return false; @@ -442,14 +466,18 @@ impl Peer { match self.sync_status { InitSyncTracker::NoSyncRequested => true, InitSyncTracker::ChannelsSyncing(_) => false, - InitSyncTracker::NodesSyncing(pk) => pk < node_id, + InitSyncTracker::NodesSyncing(sync_node_id) => sync_node_id.as_slice() < node_id.as_slice(), } } /// Returns whether we should be reading bytes from this peer, based on whether its outbound /// buffer still has space and we don't need to pause reads to get some writes out. - fn should_read(&self) -> bool { - self.pending_outbound_buffer.len() < OUTBOUND_BUFFER_LIMIT_READ_PAUSE + fn should_read(&mut self, gossip_processing_backlogged: bool) -> bool { + if !gossip_processing_backlogged { + self.received_channel_announce_since_backlogged = false; + } + self.pending_outbound_buffer.len() < OUTBOUND_BUFFER_LIMIT_READ_PAUSE && + (!gossip_processing_backlogged || !self.received_channel_announce_since_backlogged) } /// Determines if we should push additional gossip background sync (aka "backfill") onto a peer's @@ -457,19 +485,20 @@ impl Peer { fn should_buffer_gossip_backfill(&self) -> bool { self.pending_outbound_buffer.is_empty() && self.gossip_broadcast_buffer.is_empty() && self.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK + && self.handshake_complete() } /// Determines if we should push an onion message onto a peer's outbound buffer. This is checked /// every time the peer's buffer may have been drained. fn should_buffer_onion_message(&self) -> bool { - self.pending_outbound_buffer.is_empty() + self.pending_outbound_buffer.is_empty() && self.handshake_complete() && self.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK } /// Determines if we should push additional gossip broadcast messages onto a peer's outbound /// buffer. This is checked every time the peer's buffer may have been drained. fn should_buffer_gossip_broadcast(&self) -> bool { - self.pending_outbound_buffer.is_empty() + self.pending_outbound_buffer.is_empty() && self.handshake_complete() && self.msgs_sent_since_pong < BUFFER_DRAIN_MSGS_PER_TICK } @@ -481,6 +510,10 @@ impl Peer { total_outbound_buffered > OUTBOUND_BUFFER_LIMIT_DROP_GOSSIP || self.msgs_sent_since_pong > BUFFER_DRAIN_MSGS_PER_TICK * FORWARD_INIT_SYNC_BUFFER_LIMIT_RATIO } + + fn set_their_node_id(&mut self, node_id: PublicKey) { + self.their_node_id = Some((node_id, NodeId::from_pubkey(&node_id))); + } } /// SimpleArcPeerManager is useful when you need a PeerManager with a static lifetime, e.g. @@ -562,6 +595,9 @@ pub struct PeerManager PeerManager(&'a Option); +struct OptionalFromDebugger<'a>(&'a Option<(PublicKey, NodeId)>); impl core::fmt::Display for OptionalFromDebugger<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { - if let Some(node_id) = self.0 { write!(f, " from {}", log_pubkey!(node_id)) } else { Ok(()) } + if let Some((node_id, _)) = self.0 { write!(f, " from {}", log_pubkey!(node_id)) } else { Ok(()) } } } @@ -720,6 +756,8 @@ impl Vec { + /// The returned `Option`s will only be `Some` if an address had been previously given via + /// [`Self::new_outbound_connection`] or [`Self::new_inbound_connection`]. + pub fn get_peer_node_ids(&self) -> Vec<(PublicKey, Option)> { let peers = self.peers.read().unwrap(); peers.values().filter_map(|peer_mutex| { let p = peer_mutex.lock().unwrap(); - if !p.channel_encryptor.is_ready_for_encryption() || p.their_features.is_none() { + if !p.handshake_complete() { return None; } - p.their_node_id + Some((p.their_node_id.unwrap().0, p.their_net_address.clone())) }).collect() } @@ -751,7 +794,7 @@ impl { + debug_assert!(false, "PeerManager driver duplicated descriptors!"); + Err(PeerHandleError {}) + }, + hash_map::Entry::Vacant(e) => { + e.insert(Mutex::new(Peer { + channel_encryptor: peer_encryptor, + their_node_id: None, + their_features: None, + their_net_address: remote_network_address, + + pending_outbound_buffer: LinkedList::new(), + pending_outbound_buffer_first_msg_offset: 0, + gossip_broadcast_buffer: LinkedList::new(), + awaiting_write_event: false, + + pending_read_buffer, + pending_read_buffer_pos: 0, + pending_read_is_header: false, + + sync_status: InitSyncTracker::NoSyncRequested, + + msgs_sent_since_pong: 0, + awaiting_pong_timer_tick_intervals: 0, + received_message_since_timer_tick: false, + sent_gossip_timestamp_filter: false, + + received_channel_announce_since_backlogged: false, + inbound_connection: false, + })); + Ok(res) + } + } } /// Indicates a new inbound connection has been established to a node with an optional remote @@ -819,37 +871,59 @@ impl { + debug_assert!(false, "PeerManager driver duplicated descriptors!"); + Err(PeerHandleError {}) + }, + hash_map::Entry::Vacant(e) => { + e.insert(Mutex::new(Peer { + channel_encryptor: peer_encryptor, + their_node_id: None, + their_features: None, + their_net_address: remote_network_address, + + pending_outbound_buffer: LinkedList::new(), + pending_outbound_buffer_first_msg_offset: 0, + gossip_broadcast_buffer: LinkedList::new(), + awaiting_write_event: false, + + pending_read_buffer, + pending_read_buffer_pos: 0, + pending_read_is_header: false, + + sync_status: InitSyncTracker::NoSyncRequested, + + msgs_sent_since_pong: 0, + awaiting_pong_timer_tick_intervals: 0, + received_message_since_timer_tick: false, + sent_gossip_timestamp_filter: false, + + received_channel_announce_since_backlogged: false, + inbound_connection: true, + })); + Ok(()) + } + } + } + + fn peer_should_read(&self, peer: &mut Peer) -> bool { + peer.should_read(self.gossip_processing_backlogged.load(Ordering::Relaxed)) + } + + fn update_gossip_backlogged(&self) { + let new_state = self.message_handler.route_handler.processing_queue_high(); + let prev_state = self.gossip_processing_backlogged.swap(new_state, Ordering::Relaxed); + if prev_state && !new_state { + self.gossip_processing_backlog_lifted.store(true, Ordering::Relaxed); + } } - fn do_attempt_write_data(&self, descriptor: &mut Descriptor, peer: &mut Peer) { + fn do_attempt_write_data(&self, descriptor: &mut Descriptor, peer: &mut Peer, force_one_write: bool) { + let mut have_written = false; while !peer.awaiting_write_event { if peer.should_buffer_onion_message() { - if let Some(peer_node_id) = peer.their_node_id { + if let Some((peer_node_id, _)) = peer.their_node_id { if let Some(next_onion_message) = self.message_handler.onion_message_handler.next_onion_message_for_peer(peer_node_id) { self.enqueue_message(peer, &next_onion_message); @@ -889,8 +963,8 @@ impl unreachable!(), - InitSyncTracker::NodesSyncing(key) => { - if let Some(msg) = self.message_handler.route_handler.get_next_node_announcement(Some(&key)) { + InitSyncTracker::NodesSyncing(sync_node_id) => { + if let Some(msg) = self.message_handler.route_handler.get_next_node_announcement(Some(&sync_node_id)) { self.enqueue_message(peer, &msg); peer.sync_status = InitSyncTracker::NodesSyncing(msg.contents.node_id); } else { @@ -903,13 +977,23 @@ impl return, + None => { + if force_one_write && !have_written { + if should_read { + let data_sent = descriptor.send_data(&[], should_read); + debug_assert_eq!(data_sent, 0, "Can't write more than no data"); + } + } + return + }, Some(buff) => buff, }; let pending = &next_buff[peer.pending_outbound_buffer_first_msg_offset..]; - let data_sent = descriptor.send_data(pending, peer.should_read()); + let data_sent = descriptor.send_data(pending, should_read); + have_written = true; peer.pending_outbound_buffer_first_msg_offset += data_sent; if peer.pending_outbound_buffer_first_msg_offset == next_buff.len() { peer.pending_outbound_buffer_first_msg_offset = 0; @@ -939,12 +1023,12 @@ impl { let mut peer = peer_mutex.lock().unwrap(); peer.awaiting_write_event = false; - self.do_attempt_write_data(descriptor, &mut peer); + self.do_attempt_write_data(descriptor, &mut peer, false); } }; Ok(()) @@ -962,6 +1046,9 @@ impl Result { @@ -969,7 +1056,7 @@ impl Ok(res), Err(e) => { log_trace!(self.logger, "Peer sent invalid data or we decided to disconnect due to a protocol error"); - self.disconnect_event_internal(peer_descriptor, e.no_connection_possible); + self.disconnect_event_internal(peer_descriptor); Err(e) } } @@ -978,9 +1065,9 @@ impl(&self, peer: &mut Peer, message: &M) { if is_gossip_msg(message.type_id()) { - log_gossip!(self.logger, "Enqueueing message {:?} to {}", message, log_pubkey!(peer.their_node_id.unwrap())); + log_gossip!(self.logger, "Enqueueing message {:?} to {}", message, log_pubkey!(peer.their_node_id.unwrap().0)); } else { - log_trace!(self.logger, "Enqueueing message {:?} to {}", message, log_pubkey!(peer.their_node_id.unwrap())) + log_trace!(self.logger, "Enqueueing message {:?} to {}", message, log_pubkey!(peer.their_node_id.unwrap().0)) } peer.msgs_sent_since_pong += 1; peer.pending_outbound_buffer.push_back(peer.channel_encryptor.encrypt_message(message)); @@ -1002,7 +1089,7 @@ impl { let mut read_pos = 0; @@ -1016,7 +1103,7 @@ impl { //TODO: Try to push msg log_debug!(self.logger, "Error handling message{}; disconnecting peer with: {}", OptionalFromDebugger(&peer_node_id), e.err); - return Err(PeerHandleError{ no_connection_possible: false }); + return Err(PeerHandleError { }); }, msgs::ErrorAction::IgnoreAndLog(level) => { log_given_level!(self.logger, level, "Error handling message{}; ignoring: {}", OptionalFromDebugger(&peer_node_id), e.err); @@ -1065,14 +1152,18 @@ impl { - match self.node_id_to_descriptor.lock().unwrap().entry(peer.their_node_id.unwrap()) { - hash_map::Entry::Occupied(_) => { - log_trace!(self.logger, "Got second connection with {}, closing", log_pubkey!(peer.their_node_id.unwrap())); + match self.node_id_to_descriptor.lock().unwrap().entry(peer.their_node_id.unwrap().0) { + hash_map::Entry::Occupied(e) => { + log_trace!(self.logger, "Got second connection with {}, closing", log_pubkey!(peer.their_node_id.unwrap().0)); peer.their_node_id = None; // Unset so that we don't generate a peer_disconnected event - return Err(PeerHandleError{ no_connection_possible: false }) + // Check that the peers map is consistent with the + // node_id_to_descriptor map, as this has been broken + // before. + debug_assert!(peers.get(e.get()).is_some()); + return Err(PeerHandleError { }) }, hash_map::Entry::Vacant(entry) => { - log_debug!(self.logger, "Finished noise handshake for connection with {}", log_pubkey!(peer.their_node_id.unwrap())); + log_debug!(self.logger, "Finished noise handshake for connection with {}", log_pubkey!(peer.their_node_id.unwrap().0)); entry.insert(peer_descriptor.clone()) }, }; @@ -1096,7 +1187,7 @@ impl 8192 { peer.pending_read_buffer = Vec::new(); } peer.pending_read_buffer.resize(msg_len as usize + 16, 0); if msg_len < 2 { // Need at least the message type tag - return Err(PeerHandleError{ no_connection_possible: false }); + return Err(PeerHandleError { }); } peer.pending_read_is_header = false; } else { @@ -1169,19 +1260,19 @@ impl { log_gossip!(self.logger, "Received a message with an unknown required feature flag or TLV, you may want to update!"); self.enqueue_message(peer, &msgs::WarningMessage { channel_id: [0; 32], data: format!("Received an unknown required feature/TLV in message type {:?}", ty) }); - return Err(PeerHandleError { no_connection_possible: false }); + return Err(PeerHandleError { }); } - (msgs::DecodeError::UnknownVersion, _) => return Err(PeerHandleError { no_connection_possible: false }), + (msgs::DecodeError::UnknownVersion, _) => return Err(PeerHandleError { }), (msgs::DecodeError::InvalidValue, _) => { log_debug!(self.logger, "Got an invalid value while deserializing message"); - return Err(PeerHandleError { no_connection_possible: false }); + return Err(PeerHandleError { }); } (msgs::DecodeError::ShortRead, _) => { log_debug!(self.logger, "Deserialization failed due to shortness of message"); - return Err(PeerHandleError { no_connection_possible: false }); + return Err(PeerHandleError { }); } - (msgs::DecodeError::BadLengthDescriptor, _) => return Err(PeerHandleError { no_connection_possible: false }), - (msgs::DecodeError::Io(_), _) => return Err(PeerHandleError { no_connection_possible: false }), + (msgs::DecodeError::BadLengthDescriptor, _) => return Err(PeerHandleError { }), + (msgs::DecodeError::Io(_), _) => return Err(PeerHandleError { }), } } }; @@ -1191,7 +1282,7 @@ impl, message: wire::Message<<::Target as wire::CustomMessageReader>::CustomMessage> ) -> Result::Target as wire::CustomMessageReader>::CustomMessage>>, MessageHandlingError> { - let their_node_id = peer_lock.their_node_id.clone().expect("We know the peer's public key by the time we receive messages"); + let their_node_id = peer_lock.their_node_id.clone().expect("We know the peer's public key by the time we receive messages").0; peer_lock.received_message_since_timer_tick = true; // Need an Init as first message if let wire::Message::Init(msg) = message { if msg.features.requires_unknown_bits() { log_debug!(self.logger, "Peer features required unknown version bits"); - return Err(PeerHandleError{ no_connection_possible: true }.into()); + return Err(PeerHandleError { }.into()); } if peer_lock.their_features.is_some() { - return Err(PeerHandleError{ no_connection_possible: false }.into()); + return Err(PeerHandleError { }.into()); } log_info!(self.logger, "Received peer Init message from {}: {}", log_pubkey!(their_node_id), msg.features); @@ -1246,24 +1337,24 @@ impl { @@ -1403,12 +1498,14 @@ impl MessageHandlingError { e.into() })? { should_forward = Some(wire::Message::ChannelAnnouncement(msg)); } + self.update_gossip_backlogged(); }, wire::Message::NodeAnnouncement(msg) => { if self.message_handler.route_handler.handle_node_announcement(&msg) .map_err(|e| -> MessageHandlingError { e.into() })? { should_forward = Some(wire::Message::NodeAnnouncement(msg)); } + self.update_gossip_backlogged(); }, wire::Message::ChannelUpdate(msg) => { self.message_handler.chan_handler.handle_channel_update(&their_node_id, &msg); @@ -1416,6 +1513,7 @@ impl MessageHandlingError { e.into() })? { should_forward = Some(wire::Message::ChannelUpdate(msg)); } + self.update_gossip_backlogged(); }, wire::Message::QueryShortChannelIds(msg) => { self.message_handler.route_handler.handle_query_short_channel_ids(&their_node_id, msg)?; @@ -1438,8 +1536,7 @@ impl { log_debug!(self.logger, "Received unknown even message of type {}, disconnecting peer!", type_id); - // Fail the channel if message is an even, unknown type as per BOLT #1. - return Err(PeerHandleError{ no_connection_possible: true }.into()); + return Err(PeerHandleError { }.into()); }, wire::Message::Unknown(type_id) => { log_trace!(self.logger, "Received unknown odd message of type {}, ignoring", type_id); @@ -1459,21 +1556,22 @@ impl match peers.get(&descriptor) { Some(peer_mutex) => { let peer_lock = peer_mutex.lock().unwrap(); - if peer_lock.their_features.is_none() { + if !peer_lock.handshake_complete() { continue; } peer_lock @@ -1709,10 +1816,12 @@ impl {}, } - match self.message_handler.route_handler.handle_channel_update(&update_msg) { - Ok(_) | Err(LightningError { action: msgs::ErrorAction::IgnoreDuplicateGossip, .. }) => - self.forward_broadcast_msg(peers, &wire::Message::ChannelUpdate(update_msg), None), - _ => {}, + if let Some(msg) = update_msg { + match self.message_handler.route_handler.handle_channel_update(&msg) { + Ok(_) | Err(LightningError { action: msgs::ErrorAction::IgnoreDuplicateGossip, .. }) => + self.forward_broadcast_msg(peers, &wire::Message::ChannelUpdate(msg), None), + _ => {}, + } } }, MessageSendEvent::BroadcastChannelUpdate { msg } => { @@ -1723,6 +1832,14 @@ impl {}, } }, + MessageSendEvent::BroadcastNodeAnnouncement { msg } => { + log_debug!(self.logger, "Handling BroadcastNodeAnnouncement event in peer_handler for node {}", msg.contents.node_id); + match self.message_handler.route_handler.handle_node_announcement(&msg) { + Ok(_) | Err(LightningError { action: msgs::ErrorAction::IgnoreDuplicateGossip, .. }) => + self.forward_broadcast_msg(peers, &wire::Message::NodeAnnouncement(msg), None), + _ => {}, + } + }, MessageSendEvent::SendChannelUpdate { ref node_id, ref msg } => { log_trace!(self.logger, "Handling SendChannelUpdate event in peer_handler for node {} for channel {}", log_pubkey!(node_id), msg.contents.short_channel_id); @@ -1784,7 +1901,9 @@ impl { let peer = peer_lock.lock().unwrap(); - if let Some(node_id) = peer.their_node_id { - log_trace!(self.logger, - "Handling disconnection of peer {}, with {}future connection to the peer possible.", - log_pubkey!(node_id), if no_connection_possible { "no " } else { "" }); - self.node_id_to_descriptor.lock().unwrap().remove(&node_id); - self.message_handler.chan_handler.peer_disconnected(&node_id, no_connection_possible); - self.message_handler.onion_message_handler.peer_disconnected(&node_id, no_connection_possible); + if let Some((node_id, _)) = peer.their_node_id { + log_trace!(self.logger, "Handling disconnection of peer {}", log_pubkey!(node_id)); + let removed = self.node_id_to_descriptor.lock().unwrap().remove(&node_id); + debug_assert!(removed.is_some(), "descriptor maps should be consistent"); + if !peer.handshake_complete() { return; } + self.message_handler.chan_handler.peer_disconnected(&node_id); + self.message_handler.onion_message_handler.peer_disconnected(&node_id); } } }; @@ -1849,21 +1981,17 @@ impl 0 && !peer.received_message_since_timer_tick) + || peer.awaiting_pong_timer_tick_intervals as u64 > + MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peers_lock.len() as u64 + { + descriptors_needing_disconnect.push(descriptor.clone()); + break; + } peer.received_message_since_timer_tick = false; - continue; - } - if (peer.awaiting_pong_timer_tick_intervals > 0 && !peer.received_message_since_timer_tick) - || peer.awaiting_pong_timer_tick_intervals as u64 > - MAX_BUFFER_DRAIN_TICK_INTERVALS_PER_PEER as u64 * peers_lock.len() as u64 - { - descriptors_needing_disconnect.push(descriptor.clone()); - continue; - } - peer.received_message_since_timer_tick = false; + if peer.awaiting_pong_timer_tick_intervals > 0 { + peer.awaiting_pong_timer_tick_intervals += 1; + break; + } - if peer.awaiting_pong_timer_tick_intervals > 0 { - peer.awaiting_pong_timer_tick_intervals += 1; - continue; + peer.awaiting_pong_timer_tick_intervals = 1; + let ping = msgs::Ping { + ponglen: 0, + byteslen: 64, + }; + self.enqueue_message(&mut *peer, &ping); + break; } - - peer.awaiting_pong_timer_tick_intervals = 1; - let ping = msgs::Ping { - ponglen: 0, - byteslen: 64, - }; - self.enqueue_message(&mut *peer, &ping); - self.do_attempt_write_data(&mut (descriptor.clone()), &mut *peer); + self.do_attempt_write_data(&mut (descriptor.clone()), &mut *peer, flush_read_disabled); } } if !descriptors_needing_disconnect.is_empty() { { let mut peers_lock = self.peers.write().unwrap(); - for descriptor in descriptors_needing_disconnect.iter() { - if let Some(peer) = peers_lock.remove(descriptor) { - if let Some(node_id) = peer.lock().unwrap().their_node_id { - log_trace!(self.logger, "Disconnecting peer with id {} due to ping timeout", node_id); + for descriptor in descriptors_needing_disconnect { + if let Some(peer_mutex) = peers_lock.remove(&descriptor) { + let peer = peer_mutex.lock().unwrap(); + if let Some((node_id, _)) = peer.their_node_id { self.node_id_to_descriptor.lock().unwrap().remove(&node_id); - self.message_handler.chan_handler.peer_disconnected(&node_id, false); - self.message_handler.onion_message_handler.peer_disconnected(&node_id, false); } + self.do_disconnect(descriptor, &*peer, "ping timeout"); } } } - - for mut descriptor in descriptors_needing_disconnect.drain(..) { - descriptor.disconnect_socket(); - } } } @@ -2023,7 +2151,7 @@ impl bool { #[cfg(test)] mod tests { use crate::chain::keysinterface::{NodeSigner, Recipient}; + use crate::ln::peer_channel_encryptor::PeerChannelEncryptor; use crate::ln::peer_handler::{PeerManager, MessageHandler, SocketDescriptor, IgnoringMessageHandler, filter_addresses}; use crate::ln::{msgs, wire}; use crate::ln::msgs::NetAddress; @@ -2075,12 +2204,13 @@ mod tests { use crate::prelude::*; use crate::sync::{Arc, Mutex}; - use core::sync::atomic::Ordering; + use core::sync::atomic::{AtomicBool, Ordering}; #[derive(Clone)] struct FileDescriptor { fd: u16, outbound_data: Arc>>, + disconnect: Arc, } impl PartialEq for FileDescriptor { fn eq(&self, other: &Self) -> bool { @@ -2100,7 +2230,7 @@ mod tests { data.len() } - fn disconnect_socket(&mut self) {} + fn disconnect_socket(&mut self) { self.disconnect.store(true, Ordering::Release); } } struct PeerManagerCfg { @@ -2140,11 +2270,20 @@ mod tests { } fn establish_connection<'a>(peer_a: &PeerManager, peer_b: &PeerManager) -> (FileDescriptor, FileDescriptor) { - let a_id = peer_a.node_signer.get_node_id(Recipient::Node).unwrap(); - let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; - let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; - let initial_data = peer_b.new_outbound_connection(a_id, fd_b.clone(), None).unwrap(); - peer_a.new_inbound_connection(fd_a.clone(), None).unwrap(); + let id_a = peer_a.node_signer.get_node_id(Recipient::Node).unwrap(); + let mut fd_a = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_a = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1000}; + let id_b = peer_b.node_signer.get_node_id(Recipient::Node).unwrap(); + let mut fd_b = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_b = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1001}; + let initial_data = peer_b.new_outbound_connection(id_a, fd_b.clone(), Some(addr_a.clone())).unwrap(); + peer_a.new_inbound_connection(fd_a.clone(), Some(addr_b.clone())).unwrap(); assert_eq!(peer_a.read_event(&mut fd_a, &initial_data).unwrap(), false); peer_a.process_events(); @@ -2159,27 +2298,106 @@ mod tests { let a_data = fd_a.outbound_data.lock().unwrap().split_off(0); assert_eq!(peer_b.read_event(&mut fd_b, &a_data).unwrap(), false); + assert!(peer_a.get_peer_node_ids().contains(&(id_b, Some(addr_b)))); + assert!(peer_b.get_peer_node_ids().contains(&(id_a, Some(addr_a)))); + (fd_a.clone(), fd_b.clone()) } + #[test] + #[cfg(feature = "std")] + fn fuzz_threaded_connections() { + // Spawn two threads which repeatedly connect two peers together, leading to "got second + // connection with peer" disconnections and rapid reconnect. This previously found an issue + // with our internal map consistency, and is a generally good smoke test of disconnection. + let cfgs = Arc::new(create_peermgr_cfgs(2)); + // Until we have std::thread::scoped we have to unsafe { turn off the borrow checker }. + let peers = Arc::new(create_network(2, unsafe { &*(&*cfgs as *const _) as &'static _ })); + + let start_time = std::time::Instant::now(); + macro_rules! spawn_thread { ($id: expr) => { { + let peers = Arc::clone(&peers); + let cfgs = Arc::clone(&cfgs); + std::thread::spawn(move || { + let mut ctr = 0; + while start_time.elapsed() < std::time::Duration::from_secs(1) { + let id_a = peers[0].node_signer.get_node_id(Recipient::Node).unwrap(); + let mut fd_a = FileDescriptor { + fd: $id + ctr * 3, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_a = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1000}; + let mut fd_b = FileDescriptor { + fd: $id + ctr * 3, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_b = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1001}; + let initial_data = peers[1].new_outbound_connection(id_a, fd_b.clone(), Some(addr_a.clone())).unwrap(); + peers[0].new_inbound_connection(fd_a.clone(), Some(addr_b.clone())).unwrap(); + if peers[0].read_event(&mut fd_a, &initial_data).is_err() { break; } + + while start_time.elapsed() < std::time::Duration::from_secs(1) { + peers[0].process_events(); + if fd_a.disconnect.load(Ordering::Acquire) { break; } + let a_data = fd_a.outbound_data.lock().unwrap().split_off(0); + if peers[1].read_event(&mut fd_b, &a_data).is_err() { break; } + + peers[1].process_events(); + if fd_b.disconnect.load(Ordering::Acquire) { break; } + let b_data = fd_b.outbound_data.lock().unwrap().split_off(0); + if peers[0].read_event(&mut fd_a, &b_data).is_err() { break; } + + cfgs[0].chan_handler.pending_events.lock().unwrap() + .push(crate::util::events::MessageSendEvent::SendShutdown { + node_id: peers[1].node_signer.get_node_id(Recipient::Node).unwrap(), + msg: msgs::Shutdown { + channel_id: [0; 32], + scriptpubkey: bitcoin::Script::new(), + }, + }); + cfgs[1].chan_handler.pending_events.lock().unwrap() + .push(crate::util::events::MessageSendEvent::SendShutdown { + node_id: peers[0].node_signer.get_node_id(Recipient::Node).unwrap(), + msg: msgs::Shutdown { + channel_id: [0; 32], + scriptpubkey: bitcoin::Script::new(), + }, + }); + + if ctr % 2 == 0 { + peers[0].timer_tick_occurred(); + peers[1].timer_tick_occurred(); + } + } + + peers[0].socket_disconnected(&fd_a); + peers[1].socket_disconnected(&fd_b); + ctr += 1; + std::thread::sleep(std::time::Duration::from_micros(1)); + } + }) + } } } + let thrd_a = spawn_thread!(1); + let thrd_b = spawn_thread!(2); + + thrd_a.join().unwrap(); + thrd_b.join().unwrap(); + } + #[test] fn test_disconnect_peer() { // Simple test which builds a network of PeerManager, connects and brings them to NoiseState::Finished and // push a DisconnectPeer event to remove the node flagged by id let cfgs = create_peermgr_cfgs(2); - let chan_handler = test_utils::TestChannelMessageHandler::new(); - let mut peers = create_network(2, &cfgs); + let peers = create_network(2, &cfgs); establish_connection(&peers[0], &peers[1]); assert_eq!(peers[0].peers.read().unwrap().len(), 1); let their_id = peers[1].node_signer.get_node_id(Recipient::Node).unwrap(); - - chan_handler.pending_events.lock().unwrap().push(events::MessageSendEvent::HandleError { + cfgs[0].chan_handler.pending_events.lock().unwrap().push(events::MessageSendEvent::HandleError { node_id: their_id, action: msgs::ErrorAction::DisconnectPeer { msg: None }, }); - assert_eq!(chan_handler.pending_events.lock().unwrap().len(), 1); - peers[0].message_handler.chan_handler = &chan_handler; peers[0].process_events(); assert_eq!(peers[0].peers.read().unwrap().len(), 0); @@ -2213,6 +2431,38 @@ mod tests { assert_eq!(peers[1].read_event(&mut fd_b, &a_data).unwrap(), false); } + #[test] + fn test_non_init_first_msg() { + // Simple test of the first message received over a connection being something other than + // Init. This results in an immediate disconnection, which previously included a spurious + // peer_disconnected event handed to event handlers (which would panic in + // `TestChannelMessageHandler` here). + let cfgs = create_peermgr_cfgs(2); + let peers = create_network(2, &cfgs); + + let mut fd_dup = FileDescriptor { + fd: 3, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let addr_dup = NetAddress::IPv4{addr: [127, 0, 0, 1], port: 1003}; + let id_a = cfgs[0].node_signer.get_node_id(Recipient::Node).unwrap(); + peers[0].new_inbound_connection(fd_dup.clone(), Some(addr_dup.clone())).unwrap(); + + let mut dup_encryptor = PeerChannelEncryptor::new_outbound(id_a, SecretKey::from_slice(&[42; 32]).unwrap()); + let initial_data = dup_encryptor.get_act_one(&peers[1].secp_ctx); + assert_eq!(peers[0].read_event(&mut fd_dup, &initial_data).unwrap(), false); + peers[0].process_events(); + + let a_data = fd_dup.outbound_data.lock().unwrap().split_off(0); + let (act_three, _) = + dup_encryptor.process_act_two(&a_data[..], &&cfgs[1].node_signer).unwrap(); + assert_eq!(peers[0].read_event(&mut fd_dup, &act_three).unwrap(), false); + + let not_init_msg = msgs::Ping { ponglen: 4, byteslen: 0 }; + let msg_bytes = dup_encryptor.encrypt_message(¬_init_msg); + assert!(peers[0].read_event(&mut fd_dup, &msg_bytes).is_err()); + } + #[test] fn test_disconnect_all_peer() { // Simple test which builds a network of PeerManager, connects and brings them to NoiseState::Finished and @@ -2297,8 +2547,14 @@ mod tests { let peers = create_network(2, &cfgs); let a_id = peers[0].node_signer.get_node_id(Recipient::Node).unwrap(); - let mut fd_a = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; - let mut fd_b = FileDescriptor { fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())) }; + let mut fd_a = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; + let mut fd_b = FileDescriptor { + fd: 1, outbound_data: Arc::new(Mutex::new(Vec::new())), + disconnect: Arc::new(AtomicBool::new(false)), + }; let initial_data = peers[1].new_outbound_connection(a_id, fd_b.clone(), None).unwrap(); peers[0].new_inbound_connection(fd_a.clone(), None).unwrap();