From: Matt Corallo Date: Thu, 17 Jun 2021 22:30:09 +0000 (+0000) Subject: Do not require that no calls are made post-disconnect_socket X-Git-Tag: v0.0.99~18^2~2 X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=4703d4e72565ddfd150b9368ea036f4973fd7590;p=rust-lightning Do not require that no calls are made post-disconnect_socket The only practical way to meet this requirement is to block disconnect_socket until any pending events are fully processed, leading to this trivial deadlock: * Thread 1: select() woken up due to a read event * Thread 2: Event processing causes a disconnect_socket call to fire while the PeerManager lock is held. * Thread 2: disconnect_socket blocks until the read event in thread 1 completes. * Thread 1: bytes are read from the socket and PeerManager::read_event is called, waiting on the lock still held by thread 2. There isn't a trivial way to address this deadlock without simply making the final read_event call return immediately, which we do here. This also implies that users can freely call event methods after disconnect_socket, but only so far as the socket descriptor is different from any later socket descriptor (ie until the file descriptor is re-used). --- diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index 2eb9b2e78..a701200be 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -194,11 +194,8 @@ pub trait SocketDescriptor : cmp::Eq + hash::Hash + Clone { /// indicating that read events on this descriptor should resume. A resume_read of false does /// *not* imply that further read events should be paused. fn send_data(&mut self, data: &[u8], resume_read: bool) -> usize; - /// Disconnect the socket pointed to by this SocketDescriptor. Once this function returns, no - /// more calls to write_buffer_space_avail, read_event or socket_disconnected may be made with - /// this descriptor. No socket_disconnected call should be generated as a result of this call, - /// though races may occur whereby disconnect_socket is called after a call to - /// socket_disconnected but prior to socket_disconnected returning. + /// Disconnect the socket pointed to by this SocketDescriptor. + /// No [`PeerManager::socket_disconnected`] call need be generated as a result of this call. fn disconnect_socket(&mut self); } @@ -616,7 +613,12 @@ impl PeerManager Result<(), PeerHandleError> { let mut peers = self.peers.lock().unwrap(); match peers.peers.get_mut(descriptor) { - None => panic!("Descriptor for write_event is not already known to PeerManager"), + None => { + // This is most likely a simple race condition where the user found that the socket + // was writeable, then we told the user to `disconnect_socket()`, then they called + // this method. Return an error to make sure we get disconnected. + return Err(PeerHandleError { no_connection_possible: false }); + }, Some(peer) => { peer.awaiting_write_event = false; self.do_attempt_write_data(descriptor, peer); @@ -636,7 +638,6 @@ impl PeerManager Result { match self.do_read_event(peer_descriptor, data) { Ok(res) => Ok(res), @@ -664,7 +665,12 @@ impl PeerManager panic!("Descriptor for read_event is not already known to PeerManager"), + None => { + // This is most likely a simple race condition where the user read some bytes + // from the socket, then we told the user to `disconnect_socket()`, then they + // called this method. Return an error to make sure we get disconnected. + return Err(PeerHandleError { no_connection_possible: false }); + }, Some(peer) => { assert!(peer.pending_read_buffer.len() > 0); assert!(peer.pending_read_buffer.len() > peer.pending_read_buffer_pos); @@ -1292,12 +1298,9 @@ impl PeerManager PeerManager panic!("Descriptor for disconnect_event is not already known to PeerManager"), + None => { + // This is most likely a simple race condition where the user found that the socket + // was disconnected, then we told the user to `disconnect_socket()`, then they + // called this method. Either way we're disconnected, return. + }, Some(peer) => { match peer.their_node_id { Some(node_id) => {