X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=lightning-net-tokio%2Fsrc%2Flib.rs;h=98932c0eaa638d86a9fd9faefc271fc5b9ceb97c;hb=cd4cc203a2bbb716c8747bd45ac84e42b8f53b84;hp=e460df25e54d192e92548feb5b6053715798fd27;hpb=eff8af21103e43f763cb10ae6a75c1543a2d4068;p=rust-lightning diff --git a/lightning-net-tokio/src/lib.rs b/lightning-net-tokio/src/lib.rs index e460df25..98932c0e 100644 --- a/lightning-net-tokio/src/lib.rs +++ b/lightning-net-tokio/src/lib.rs @@ -1,93 +1,126 @@ +// This file is Copyright its original authors, visible in version control +// history. +// +// This file is licensed under the Apache License, Version 2.0 or the MIT license +// , at your option. +// You may not use this file except in accordance with one or both of these +// licenses. + //! A socket handling library for those running in Tokio environments who wish to use -//! rust-lightning with native TcpStreams. +//! rust-lightning with native [`TcpStream`]s. //! //! Designed to be as simple as possible, the high-level usage is almost as simple as "hand over a -//! TcpStream and a reference to a PeerManager and the rest is handled", except for the -//! [Event](../lightning/util/events/enum.Event.html) handlng mechanism, see below. -//! -//! The PeerHandler, due to the fire-and-forget nature of this logic, must be an Arc, and must use -//! the SocketDescriptor provided here as the PeerHandler's SocketDescriptor. -//! -//! Three methods are exposed to register a new connection for handling in tokio::spawn calls, see -//! their individual docs for more. All three take a -//! [mpsc::Sender<()>](../tokio/sync/mpsc/struct.Sender.html) which is sent into every time -//! something occurs which may result in lightning [Events](../lightning/util/events/enum.Event.html). -//! The call site should, thus, look something like this: -//! ``` -//! use tokio::sync::mpsc; -//! use tokio::net::TcpStream; -//! use bitcoin::secp256k1::key::PublicKey; -//! use lightning::util::events::EventsProvider; -//! use std::net::SocketAddr; -//! use std::sync::Arc; +//! [`TcpStream`] and a reference to a [`PeerManager`] and the rest is handled". //! -//! // Define concrete types for our high-level objects: -//! type TxBroadcaster = dyn lightning::chain::chaininterface::BroadcasterInterface; -//! type FeeEstimator = dyn lightning::chain::chaininterface::FeeEstimator; -//! type ChannelMonitor = lightning::ln::channelmonitor::SimpleManyChannelMonitor, Arc>; -//! type ChannelManager = lightning::ln::channelmanager::SimpleArcChannelManager; -//! type PeerManager = lightning::ln::peer_handler::SimpleArcPeerManager; +//! The [`PeerManager`], due to the fire-and-forget nature of this logic, must be a reference, +//! (e.g. an [`Arc`]) and must use the [`SocketDescriptor`] provided here as the [`PeerManager`]'s +//! `SocketDescriptor` implementation. //! -//! // Connect to node with pubkey their_node_id at addr: -//! async fn connect_to_node(peer_manager: PeerManager, channel_monitor: Arc, channel_manager: ChannelManager, their_node_id: PublicKey, addr: SocketAddr) { -//! let (sender, mut receiver) = mpsc::channel(2); -//! lightning_net_tokio::connect_outbound(peer_manager, sender, their_node_id, addr).await; -//! loop { -//! receiver.recv().await; -//! for _event in channel_manager.get_and_clear_pending_events().drain(..) { -//! // Handle the event! -//! } -//! for _event in channel_monitor.get_and_clear_pending_events().drain(..) { -//! // Handle the event! -//! } -//! } -//! } +//! Three methods are exposed to register a new connection for handling in [`tokio::spawn`] calls; +//! see their individual docs for details. //! -//! // Begin reading from a newly accepted socket and talk to the peer: -//! async fn accept_socket(peer_manager: PeerManager, channel_monitor: Arc, channel_manager: ChannelManager, socket: TcpStream) { -//! let (sender, mut receiver) = mpsc::channel(2); -//! lightning_net_tokio::setup_inbound(peer_manager, sender, socket); -//! loop { -//! receiver.recv().await; -//! for _event in channel_manager.get_and_clear_pending_events().drain(..) { -//! // Handle the event! -//! } -//! for _event in channel_monitor.get_and_clear_pending_events().drain(..) { -//! // Handle the event! -//! } -//! } -//! } -//! ``` - -use bitcoin::secp256k1::key::PublicKey; +//! [`PeerManager`]: lightning::ln::peer_handler::PeerManager + +#![deny(rustdoc::broken_intra_doc_links)] +#![deny(rustdoc::private_intra_doc_links)] + +#![deny(missing_docs)] +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +use bitcoin::secp256k1::PublicKey; use tokio::net::TcpStream; -use tokio::{io, time}; +use tokio::time; use tokio::sync::mpsc; -use tokio::io::{AsyncReadExt, AsyncWrite, AsyncWriteExt}; use lightning::ln::peer_handler; use lightning::ln::peer_handler::SocketDescriptor as LnSocketTrait; -use lightning::ln::msgs::ChannelMessageHandler; +use lightning::ln::peer_handler::APeerManager; +use lightning::ln::msgs::SocketAddress; -use std::{task, thread}; +use std::ops::Deref; +use std::task::{self, Poll}; +use std::future::Future; use std::net::SocketAddr; -use std::sync::{Arc, Mutex, MutexGuard}; +use std::net::TcpStream as StdTcpStream; +use std::sync::{Arc, Mutex}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; +use std::pin::Pin; use std::hash::Hash; static ID_COUNTER: AtomicU64 = AtomicU64::new(0); +// We only need to select over multiple futures in one place, and taking on the full `tokio/macros` +// dependency tree in order to do so (which has broken our MSRV before) is excessive. Instead, we +// define a trivial two- and three- select macro with the specific types we need and just use that. + +pub(crate) enum SelectorOutput { + A(Option<()>), B(Option<()>), C(tokio::io::Result<()>), +} + +pub(crate) struct TwoSelector< + A: Future> + Unpin, B: Future> + Unpin +> { + pub a: A, + pub b: B, +} + +impl< + A: Future> + Unpin, B: Future> + Unpin +> Future for TwoSelector { + type Output = SelectorOutput; + fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { + match Pin::new(&mut self.a).poll(ctx) { + Poll::Ready(res) => { return Poll::Ready(SelectorOutput::A(res)); }, + Poll::Pending => {}, + } + match Pin::new(&mut self.b).poll(ctx) { + Poll::Ready(res) => { return Poll::Ready(SelectorOutput::B(res)); }, + Poll::Pending => {}, + } + Poll::Pending + } +} + +pub(crate) struct ThreeSelector< + A: Future> + Unpin, B: Future> + Unpin, C: Future> + Unpin +> { + pub a: A, + pub b: B, + pub c: C, +} + +impl< + A: Future> + Unpin, B: Future> + Unpin, C: Future> + Unpin +> Future for ThreeSelector { + type Output = SelectorOutput; + fn poll(mut self: Pin<&mut Self>, ctx: &mut task::Context<'_>) -> Poll { + match Pin::new(&mut self.a).poll(ctx) { + Poll::Ready(res) => { return Poll::Ready(SelectorOutput::A(res)); }, + Poll::Pending => {}, + } + match Pin::new(&mut self.b).poll(ctx) { + Poll::Ready(res) => { return Poll::Ready(SelectorOutput::B(res)); }, + Poll::Pending => {}, + } + match Pin::new(&mut self.c).poll(ctx) { + Poll::Ready(res) => { return Poll::Ready(SelectorOutput::C(res)); }, + Poll::Pending => {}, + } + Poll::Pending + } +} + /// Connection contains all our internal state for a connection - we hold a reference to the /// Connection object (in an Arc>) in each SocketDescriptor we create as well as in the /// read future (which is returned by schedule_read). struct Connection { - writer: Option>, - event_notify: mpsc::Sender<()>, + writer: Option>, // Because our PeerManager is templated by user-provided types, and we can't (as far as I can // tell) have a const RawWakerVTable built out of templated functions, we need some indirection - // between being woken up with write-ready and calling PeerManager::write_buffer_spce_avail. + // between being woken up with write-ready and calling PeerManager::write_buffer_space_avail. // This provides that indirection, with a Sender which gets handed to the PeerManager Arc on // the schedule_read stack. // @@ -101,30 +134,37 @@ struct Connection { // socket. To wake it up (without otherwise changing its state, we can push a value into this // Sender. read_waker: mpsc::Sender<()>, - // When we are told by rust-lightning to disconnect, we can't return to rust-lightning until we - // are sure we won't call any more read/write PeerManager functions with the same connection. - // This is set to true if we're in such a condition (with disconnect checked before with the - // top-level mutex held) and false when we can return. - block_disconnect_socket: bool, read_paused: bool, rl_requested_disconnect: bool, id: u64, } impl Connection { - fn event_trigger(us: &mut MutexGuard) { - match us.event_notify.try_send(()) { - Ok(_) => {}, - Err(mpsc::error::TrySendError::Full(_)) => { - // Ignore full errors as we just need the user to poll after this point, so if they - // haven't received the last send yet, it doesn't matter. - }, - _ => panic!() + async fn poll_event_process( + peer_manager: PM, + mut event_receiver: mpsc::Receiver<()>, + ) where PM::Target: APeerManager { + loop { + if event_receiver.recv().await.is_none() { + return; + } + peer_manager.as_ref().process_events(); } } - async fn schedule_read(peer_manager: Arc>>, us: Arc>, mut reader: io::ReadHalf, mut read_wake_receiver: mpsc::Receiver<()>, mut write_avail_receiver: mpsc::Receiver<()>) { - let peer_manager_ref = peer_manager.clone(); - // 8KB is nice and big but also should never cause any issues with stack overflowing. - let mut buf = [0; 8192]; + + async fn schedule_read( + peer_manager: PM, + us: Arc>, + reader: Arc, + mut read_wake_receiver: mpsc::Receiver<()>, + mut write_avail_receiver: mpsc::Receiver<()>, + ) where PM::Target: APeerManager { + // Create a waker to wake up poll_event_process, above + let (event_waker, event_receiver) = mpsc::channel(1); + tokio::spawn(Self::poll_event_process(peer_manager.clone(), event_receiver)); + + // 4KiB is nice and big without handling too many messages all at once, giving other peers + // a chance to do some work. + let mut buf = [0; 4096]; let mut our_descriptor = SocketDescriptor::new(us.clone()); // An enum describing why we did/are disconnecting: @@ -139,69 +179,82 @@ impl Connection { // In this case, we do need to call peer_manager.socket_disconnected() to inform // Rust-Lightning that the socket is gone. PeerDisconnected - }; + } let disconnect_type = loop { - macro_rules! shutdown_socket { - ($err: expr, $need_disconnect: expr) => { { - println!("Disconnecting peer due to {}!", $err); - break $need_disconnect; - } } - } - - macro_rules! prepare_read_write_call { - () => { { - let mut us_lock = us.lock().unwrap(); - if us_lock.rl_requested_disconnect { - shutdown_socket!("disconnect_socket() call from RL", Disconnect::CloseConnection); - } - us_lock.block_disconnect_socket = true; - } } - } - - let read_paused = us.lock().unwrap().read_paused; - tokio::select! { - v = write_avail_receiver.recv() => { + let read_paused = { + let us_lock = us.lock().unwrap(); + if us_lock.rl_requested_disconnect { + break Disconnect::CloseConnection; + } + us_lock.read_paused + }; + // TODO: Drop the Box'ing of the futures once Rust has pin-on-stack support. + let select_result = if read_paused { + TwoSelector { + a: Box::pin(write_avail_receiver.recv()), + b: Box::pin(read_wake_receiver.recv()), + }.await + } else { + ThreeSelector { + a: Box::pin(write_avail_receiver.recv()), + b: Box::pin(read_wake_receiver.recv()), + c: Box::pin(reader.readable()), + }.await + }; + match select_result { + SelectorOutput::A(v) => { assert!(v.is_some()); // We can't have dropped the sending end, its in the us Arc! - prepare_read_write_call!(); - if let Err(e) = peer_manager.write_buffer_space_avail(&mut our_descriptor) { - shutdown_socket!(e, Disconnect::CloseConnection); + if peer_manager.as_ref().write_buffer_space_avail(&mut our_descriptor).is_err() { + break Disconnect::CloseConnection; } - us.lock().unwrap().block_disconnect_socket = false; }, - _ = read_wake_receiver.recv() => {}, - read = reader.read(&mut buf), if !read_paused => match read { - Ok(0) => shutdown_socket!("Connection closed", Disconnect::PeerDisconnected), - Ok(len) => { - prepare_read_write_call!(); - let read_res = peer_manager.read_event(&mut our_descriptor, &buf[0..len]); - let mut us_lock = us.lock().unwrap(); - match read_res { - Ok(pause_read) => { - if pause_read { - us_lock.read_paused = true; - } - Self::event_trigger(&mut us_lock); - }, - Err(e) => shutdown_socket!(e, Disconnect::CloseConnection), - } - us_lock.block_disconnect_socket = false; - }, - Err(e) => shutdown_socket!(e, Disconnect::PeerDisconnected), + SelectorOutput::B(some) => { + // The mpsc Receiver should only return `None` if the write side has been + // dropped, but that shouldn't be possible since its referenced by the Self in + // `us`. + debug_assert!(some.is_some()); + }, + SelectorOutput::C(res) => { + if res.is_err() { break Disconnect::PeerDisconnected; } + match reader.try_read(&mut buf) { + Ok(0) => break Disconnect::PeerDisconnected, + Ok(len) => { + let read_res = peer_manager.as_ref().read_event(&mut our_descriptor, &buf[0..len]); + let mut us_lock = us.lock().unwrap(); + match read_res { + Ok(pause_read) => { + if pause_read { + us_lock.read_paused = true; + } + }, + Err(_) => break Disconnect::CloseConnection, + } + }, + Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => { + // readable() is allowed to spuriously wake, so we have to handle + // WouldBlock here. + }, + Err(_) => break Disconnect::PeerDisconnected, + } }, } + let _ = event_waker.try_send(()); + + // At this point we've processed a message or two, and reset the ping timer for this + // peer, at least in the "are we still receiving messages" context, if we don't give up + // our timeslice to another task we may just spin on this peer, starving other peers + // and eventually disconnecting them for ping timeouts. Instead, we explicitly yield + // here. + let _ = tokio::task::yield_now().await; }; - let writer_option = us.lock().unwrap().writer.take(); - if let Some(mut writer) = writer_option { - // If the socket is already closed, shutdown() will fail, so just ignore it. - let _ = writer.shutdown().await; - } + us.lock().unwrap().writer.take(); if let Disconnect::PeerDisconnected = disconnect_type { - peer_manager_ref.socket_disconnected(&our_descriptor); - Self::event_trigger(&mut us.lock().unwrap()); + peer_manager.as_ref().socket_disconnected(&our_descriptor); + peer_manager.as_ref().process_events(); } } - fn new(event_notify: mpsc::Sender<()>, stream: TcpStream) -> (io::ReadHalf, mpsc::Receiver<()>, mpsc::Receiver<()>, Arc>) { + fn new(stream: StdTcpStream) -> (Arc, mpsc::Receiver<()>, mpsc::Receiver<()>, Arc>) { // We only ever need a channel of depth 1 here: if we returned a non-full write to the // PeerManager, we will eventually get notified that there is room in the socket to write // new bytes, which will generate an event. That event will be popped off the queue before @@ -212,31 +265,49 @@ impl Connection { // we shove a value into the channel which comes after we've reset the read_paused bool to // false. let (read_waker, read_receiver) = mpsc::channel(1); - let (reader, writer) = io::split(stream); + stream.set_nonblocking(true).unwrap(); + let tokio_stream = Arc::new(TcpStream::from_std(stream).unwrap()); - (reader, write_receiver, read_receiver, + (Arc::clone(&tokio_stream), write_receiver, read_receiver, Arc::new(Mutex::new(Self { - writer: Some(writer), event_notify, write_avail, read_waker, read_paused: false, - block_disconnect_socket: false, rl_requested_disconnect: false, + writer: Some(tokio_stream), write_avail, read_waker, read_paused: false, + rl_requested_disconnect: false, id: ID_COUNTER.fetch_add(1, Ordering::AcqRel) }))) } } +fn get_addr_from_stream(stream: &StdTcpStream) -> Option { + match stream.peer_addr() { + Ok(SocketAddr::V4(sockaddr)) => Some(SocketAddress::TcpIpV4 { + addr: sockaddr.ip().octets(), + port: sockaddr.port(), + }), + Ok(SocketAddr::V6(sockaddr)) => Some(SocketAddress::TcpIpV6 { + addr: sockaddr.ip().octets(), + port: sockaddr.port(), + }), + Err(_) => None, + } +} + /// Process incoming messages and feed outgoing messages on the provided socket generated by /// accepting an incoming connection. /// /// The returned future will complete when the peer is disconnected and associated handling /// futures are freed, though, because all processing futures are spawned with tokio::spawn, you do /// not need to poll the provided future in order to make progress. -/// -/// See the module-level documentation for how to handle the event_notify mpsc::Sender. -pub fn setup_inbound(peer_manager: Arc>>, event_notify: mpsc::Sender<()>, stream: TcpStream) -> impl std::future::Future { - let (reader, write_receiver, read_receiver, us) = Connection::new(event_notify, stream); - #[cfg(debug_assertions)] +pub fn setup_inbound( + peer_manager: PM, + stream: StdTcpStream, +) -> impl std::future::Future +where PM::Target: APeerManager { + let remote_addr = get_addr_from_stream(&stream); + let (reader, write_receiver, read_receiver, us) = Connection::new(stream); + #[cfg(test)] let last_us = Arc::clone(&us); - let handle_opt = if let Ok(_) = peer_manager.new_inbound_connection(SocketDescriptor::new(us.clone())) { + let handle_opt = if peer_manager.as_ref().new_inbound_connection(SocketDescriptor::new(us.clone()), remote_addr).is_ok() { Some(tokio::spawn(Connection::schedule_read(peer_manager, us, reader, read_receiver, write_receiver))) } else { // Note that we will skip socket_disconnected here, in accordance with the PeerManager @@ -254,8 +325,8 @@ pub fn setup_inbound(peer_manager: Arc(peer_manager: Arc(peer_manager: Arc>>, event_notify: mpsc::Sender<()>, their_node_id: PublicKey, stream: TcpStream) -> impl std::future::Future { - let (reader, mut write_receiver, read_receiver, us) = Connection::new(event_notify, stream); - #[cfg(debug_assertions)] +pub fn setup_outbound( + peer_manager: PM, + their_node_id: PublicKey, + stream: StdTcpStream, +) -> impl std::future::Future +where PM::Target: APeerManager { + let remote_addr = get_addr_from_stream(&stream); + let (reader, mut write_receiver, read_receiver, us) = Connection::new(stream); + #[cfg(test)] let last_us = Arc::clone(&us); - - let handle_opt = if let Ok(initial_send) = peer_manager.new_outbound_connection(their_node_id, SocketDescriptor::new(us.clone())) { + let handle_opt = if let Ok(initial_send) = peer_manager.as_ref().new_outbound_connection(their_node_id, SocketDescriptor::new(us.clone()), remote_addr) { Some(tokio::spawn(async move { // We should essentially always have enough room in a TCP socket buffer to send the // initial 10s of bytes. However, tokio running in single-threaded mode will always @@ -294,7 +368,7 @@ pub fn setup_outbound(peer_manager: Arc { eprintln!("Failed to write first full message to socket!"); - peer_manager.socket_disconnected(&SocketDescriptor::new(Arc::clone(&us))); + peer_manager.as_ref().socket_disconnected(&SocketDescriptor::new(Arc::clone(&us))); break Err(()); } } @@ -319,8 +393,8 @@ pub fn setup_outbound(peer_manager: Arc(peer_manager: Arc(peer_manager: Arc>>, event_notify: mpsc::Sender<()>, their_node_id: PublicKey, addr: SocketAddr) -> Option> { - if let Ok(Ok(stream)) = time::timeout(Duration::from_secs(10), TcpStream::connect(&addr)).await { - Some(setup_outbound(peer_manager, event_notify, their_node_id, stream)) +pub async fn connect_outbound( + peer_manager: PM, + their_node_id: PublicKey, + addr: SocketAddr, +) -> Option> +where PM::Target: APeerManager { + if let Ok(Ok(stream)) = time::timeout(Duration::from_secs(10), async { TcpStream::connect(&addr).await.map(|s| s.into_std().unwrap()) }).await { + Some(setup_outbound(peer_manager, their_node_id, stream)) } else { None } } @@ -349,7 +426,11 @@ const SOCK_WAKER_VTABLE: task::RawWakerVTable = task::RawWakerVTable::new(clone_socket_waker, wake_socket_waker, wake_socket_waker_by_ref, drop_socket_waker); fn clone_socket_waker(orig_ptr: *const ()) -> task::RawWaker { - write_avail_to_waker(orig_ptr as *const mpsc::Sender<()>) + let new_waker = unsafe { Arc::from_raw(orig_ptr as *const mpsc::Sender<()>) }; + let res = write_avail_to_waker(&new_waker); + // Don't decrement the refcount when dropping new_waker by turning it back `into_raw`. + let _ = Arc::into_raw(new_waker); + res } // When waking, an error should be fine. Most likely we got two send_datas in a row, both of which // failed to fully write, but we only need to call write_buffer_space_avail() once. Otherwise, the @@ -362,16 +443,15 @@ fn wake_socket_waker(orig_ptr: *const ()) { } fn wake_socket_waker_by_ref(orig_ptr: *const ()) { let sender_ptr = orig_ptr as *const mpsc::Sender<()>; - let mut sender = unsafe { (*sender_ptr).clone() }; + let sender = unsafe { &*sender_ptr }; let _ = sender.try_send(()); } fn drop_socket_waker(orig_ptr: *const ()) { - let _orig_box = unsafe { Box::from_raw(orig_ptr as *mut mpsc::Sender<()>) }; - // _orig_box is now dropped + let _orig_arc = unsafe { Arc::from_raw(orig_ptr as *mut mpsc::Sender<()>) }; + // _orig_arc is now dropped } -fn write_avail_to_waker(sender: *const mpsc::Sender<()>) -> task::RawWaker { - let new_box = Box::leak(Box::new(unsafe { (*sender).clone() })); - let new_ptr = new_box as *const mpsc::Sender<()>; +fn write_avail_to_waker(sender: &Arc>) -> task::RawWaker { + let new_ptr = Arc::into_raw(Arc::clone(&sender)); task::RawWaker::new(new_ptr as *const (), &SOCK_WAKER_VTABLE) } @@ -379,19 +459,27 @@ fn write_avail_to_waker(sender: *const mpsc::Sender<()>) -> task::RawWaker { /// type in the template of PeerHandler. pub struct SocketDescriptor { conn: Arc>, + // We store a copy of the mpsc::Sender to wake the read task in an Arc here. While we can + // simply clone the sender and store a copy in each waker, that would require allocating for + // each waker. Instead, we can simply `Arc::clone`, creating a new reference and store the + // pointer in the waker. + write_avail_sender: Arc>, id: u64, } impl SocketDescriptor { fn new(conn: Arc>) -> Self { - let id = conn.lock().unwrap().id; - Self { conn, id } + let (id, write_avail_sender) = { + let us = conn.lock().unwrap(); + (us.id, Arc::new(us.write_avail.clone())) + }; + Self { conn, id, write_avail_sender } } } impl peer_handler::SocketDescriptor for SocketDescriptor { fn send_data(&mut self, data: &[u8], resume_read: bool) -> usize { - // To send data, we take a lock on our Connection to access the WriteHalf of the TcpStream, - // writing to it if there's room in the kernel buffer, or otherwise create a new Waker with - // a SocketDescriptor in it which can wake up the write_avail Sender, waking up the + // To send data, we take a lock on our Connection to access the TcpStream, writing to it if + // there's room in the kernel buffer, or otherwise create a new Waker with a + // SocketDescriptor in it which can wake up the write_avail Sender, waking up the // processing future which will call write_buffer_space_avail and we'll end up back here. let mut us = self.conn.lock().unwrap(); if us.writer.is_none() { @@ -407,33 +495,33 @@ impl peer_handler::SocketDescriptor for SocketDescriptor { let _ = us.read_waker.try_send(()); } if data.is_empty() { return 0; } - let waker = unsafe { task::Waker::from_raw(write_avail_to_waker(&us.write_avail)) }; + let waker = unsafe { task::Waker::from_raw(write_avail_to_waker(&self.write_avail_sender)) }; let mut ctx = task::Context::from_waker(&waker); let mut written_len = 0; loop { - match std::pin::Pin::new(us.writer.as_mut().unwrap()).poll_write(&mut ctx, &data[written_len..]) { - task::Poll::Ready(Ok(res)) => { - // The tokio docs *seem* to indicate this can't happen, and I certainly don't - // know how to handle it if it does (cause it should be a Poll::Pending - // instead): - assert_ne!(res, 0); - written_len += res; - if written_len == data.len() { return written_len; } - }, - task::Poll::Ready(Err(e)) => { - // The tokio docs *seem* to indicate this can't happen, and I certainly don't - // know how to handle it if it does (cause it should be a Poll::Pending - // instead): - assert_ne!(e.kind(), io::ErrorKind::WouldBlock); - // Probably we've already been closed, just return what we have and let the - // read thread handle closing logic. - return written_len; + match us.writer.as_ref().unwrap().poll_write_ready(&mut ctx) { + task::Poll::Ready(Ok(())) => { + match us.writer.as_ref().unwrap().try_write(&data[written_len..]) { + Ok(res) => { + debug_assert_ne!(res, 0); + written_len += res; + if written_len == data.len() { return written_len; } + }, + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + continue; + } + Err(_) => return written_len, + } }, + task::Poll::Ready(Err(_)) => return written_len, task::Poll::Pending => { // We're queued up for a write event now, but we need to make sure we also // pause read given we're now waiting on the remote end to ACK (and in // accordance with the send_data() docs). us.read_paused = true; + // Further, to avoid any current pending read causing a `read_event` call, wake + // up the read_waker and restart its loop. + let _ = us.read_waker.try_send(()); return written_len; }, } @@ -441,18 +529,10 @@ impl peer_handler::SocketDescriptor for SocketDescriptor { } fn disconnect_socket(&mut self) { - { - let mut us = self.conn.lock().unwrap(); - us.rl_requested_disconnect = true; - us.read_paused = true; - // Wake up the sending thread, assuming it is still alive - let _ = us.write_avail.try_send(()); - // Happy-path return: - if !us.block_disconnect_socket { return; } - } - while self.conn.lock().unwrap().block_disconnect_socket { - thread::yield_now(); - } + let mut us = self.conn.lock().unwrap(); + us.rl_requested_disconnect = true; + // Wake up the sending thread, assuming it is still alive + let _ = us.write_avail.try_send(()); } } impl Clone for SocketDescriptor { @@ -460,6 +540,7 @@ impl Clone for SocketDescriptor { Self { conn: Arc::clone(&self.conn), id: self.id, + write_avail_sender: Arc::clone(&self.write_avail_sender), } } } @@ -480,18 +561,23 @@ mod tests { use lightning::ln::features::*; use lightning::ln::msgs::*; use lightning::ln::peer_handler::{MessageHandler, PeerManager}; - use lightning::util::events::*; + use lightning::routing::gossip::NodeId; + use lightning::events::*; + use lightning::util::test_utils::TestNodeSigner; + use bitcoin::Network; + use bitcoin::blockdata::constants::ChainHash; use bitcoin::secp256k1::{Secp256k1, SecretKey, PublicKey}; use tokio::sync::mpsc; use std::mem; + use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; use std::time::Duration; pub struct TestLogger(); impl lightning::util::logger::Logger for TestLogger { - fn log(&self, record: &lightning::util::logger::Record) { + fn log(&self, record: lightning::util::logger::Record) { println!("{:<5} [{} : {}, {}] {}", record.level.to_string(), record.module_path, record.file, record.line, record.args); } } @@ -500,23 +586,30 @@ mod tests { expected_pubkey: PublicKey, pubkey_connected: mpsc::Sender<()>, pubkey_disconnected: mpsc::Sender<()>, + disconnected_flag: AtomicBool, msg_events: Mutex>, } impl RoutingMessageHandler for MsgHandler { fn handle_node_announcement(&self, _msg: &NodeAnnouncement) -> Result { Ok(false) } fn handle_channel_announcement(&self, _msg: &ChannelAnnouncement) -> Result { Ok(false) } fn handle_channel_update(&self, _msg: &ChannelUpdate) -> Result { Ok(false) } - fn handle_htlc_fail_channel_update(&self, _update: &HTLCFailChannelUpdate) { } - fn get_next_channel_announcements(&self, _starting_point: u64, _batch_amount: u8) -> Vec<(ChannelAnnouncement, Option, Option)> { Vec::new() } - fn get_next_node_announcements(&self, _starting_point: Option<&PublicKey>, _batch_amount: u8) -> Vec { Vec::new() } - fn should_request_full_sync(&self, _node_id: &PublicKey) -> bool { false } + fn get_next_channel_announcement(&self, _starting_point: u64) -> Option<(ChannelAnnouncement, Option, Option)> { None } + fn get_next_node_announcement(&self, _starting_point: Option<&NodeId>) -> Option { None } + fn peer_connected(&self, _their_node_id: &PublicKey, _init_msg: &Init, _inbound: bool) -> Result<(), ()> { Ok(()) } + fn handle_reply_channel_range(&self, _their_node_id: &PublicKey, _msg: ReplyChannelRange) -> Result<(), LightningError> { Ok(()) } + fn handle_reply_short_channel_ids_end(&self, _their_node_id: &PublicKey, _msg: ReplyShortChannelIdsEnd) -> Result<(), LightningError> { Ok(()) } + fn handle_query_channel_range(&self, _their_node_id: &PublicKey, _msg: QueryChannelRange) -> Result<(), LightningError> { Ok(()) } + fn handle_query_short_channel_ids(&self, _their_node_id: &PublicKey, _msg: QueryShortChannelIds) -> Result<(), LightningError> { Ok(()) } + fn provided_node_features(&self) -> NodeFeatures { NodeFeatures::empty() } + fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { InitFeatures::empty() } + fn processing_queue_high(&self) -> bool { false } } impl ChannelMessageHandler for MsgHandler { - fn handle_open_channel(&self, _their_node_id: &PublicKey, _their_features: InitFeatures, _msg: &OpenChannel) {} - fn handle_accept_channel(&self, _their_node_id: &PublicKey, _their_features: InitFeatures, _msg: &AcceptChannel) {} + fn handle_open_channel(&self, _their_node_id: &PublicKey, _msg: &OpenChannel) {} + fn handle_accept_channel(&self, _their_node_id: &PublicKey, _msg: &AcceptChannel) {} fn handle_funding_created(&self, _their_node_id: &PublicKey, _msg: &FundingCreated) {} fn handle_funding_signed(&self, _their_node_id: &PublicKey, _msg: &FundingSigned) {} - fn handle_funding_locked(&self, _their_node_id: &PublicKey, _msg: &FundingLocked) {} + fn handle_channel_ready(&self, _their_node_id: &PublicKey, _msg: &ChannelReady) {} fn handle_shutdown(&self, _their_node_id: &PublicKey, _msg: &Shutdown) {} fn handle_closing_signed(&self, _their_node_id: &PublicKey, _msg: &ClosingSigned) {} fn handle_update_add_htlc(&self, _their_node_id: &PublicKey, _msg: &UpdateAddHTLC) {} @@ -527,18 +620,44 @@ mod tests { fn handle_revoke_and_ack(&self, _their_node_id: &PublicKey, _msg: &RevokeAndACK) {} fn handle_update_fee(&self, _their_node_id: &PublicKey, _msg: &UpdateFee) {} fn handle_announcement_signatures(&self, _their_node_id: &PublicKey, _msg: &AnnouncementSignatures) {} - fn peer_disconnected(&self, their_node_id: &PublicKey, _no_connection_possible: bool) { + fn handle_channel_update(&self, _their_node_id: &PublicKey, _msg: &ChannelUpdate) {} + fn handle_open_channel_v2(&self, _their_node_id: &PublicKey, _msg: &OpenChannelV2) {} + fn handle_accept_channel_v2(&self, _their_node_id: &PublicKey, _msg: &AcceptChannelV2) {} + fn handle_stfu(&self, _their_node_id: &PublicKey, _msg: &Stfu) {} + #[cfg(dual_funding)] + fn handle_splice(&self, _their_node_id: &PublicKey, _msg: &Splice) {} + #[cfg(dual_funding)] + fn handle_splice_ack(&self, _their_node_id: &PublicKey, _msg: &SpliceAck) {} + #[cfg(dual_funding)] + fn handle_splice_locked(&self, _their_node_id: &PublicKey, _msg: &SpliceLocked) {} + fn handle_tx_add_input(&self, _their_node_id: &PublicKey, _msg: &TxAddInput) {} + fn handle_tx_add_output(&self, _their_node_id: &PublicKey, _msg: &TxAddOutput) {} + fn handle_tx_remove_input(&self, _their_node_id: &PublicKey, _msg: &TxRemoveInput) {} + fn handle_tx_remove_output(&self, _their_node_id: &PublicKey, _msg: &TxRemoveOutput) {} + fn handle_tx_complete(&self, _their_node_id: &PublicKey, _msg: &TxComplete) {} + fn handle_tx_signatures(&self, _their_node_id: &PublicKey, _msg: &TxSignatures) {} + fn handle_tx_init_rbf(&self, _their_node_id: &PublicKey, _msg: &TxInitRbf) {} + fn handle_tx_ack_rbf(&self, _their_node_id: &PublicKey, _msg: &TxAckRbf) {} + fn handle_tx_abort(&self, _their_node_id: &PublicKey, _msg: &TxAbort) {} + fn peer_disconnected(&self, their_node_id: &PublicKey) { if *their_node_id == self.expected_pubkey { + self.disconnected_flag.store(true, Ordering::SeqCst); self.pubkey_disconnected.clone().try_send(()).unwrap(); } } - fn peer_connected(&self, their_node_id: &PublicKey, _msg: &Init) { + fn peer_connected(&self, their_node_id: &PublicKey, _init_msg: &Init, _inbound: bool) -> Result<(), ()> { if *their_node_id == self.expected_pubkey { self.pubkey_connected.clone().try_send(()).unwrap(); } + Ok(()) } fn handle_channel_reestablish(&self, _their_node_id: &PublicKey, _msg: &ChannelReestablish) {} fn handle_error(&self, _their_node_id: &PublicKey, _msg: &ErrorMessage) {} + fn provided_node_features(&self) -> NodeFeatures { NodeFeatures::empty() } + fn provided_init_features(&self, _their_node_id: &PublicKey) -> InitFeatures { InitFeatures::empty() } + fn get_chain_hashes(&self) -> Option> { + Some(vec![ChainHash::using_genesis_block(Network::Testnet)]) + } } impl MessageSendEventsProvider for MsgHandler { fn get_and_clear_pending_msg_events(&self) -> Vec { @@ -548,6 +667,22 @@ mod tests { } } + fn make_tcp_connection() -> (std::net::TcpStream, std::net::TcpStream) { + if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9735") { + (std::net::TcpStream::connect("127.0.0.1:9735").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:19735") { + (std::net::TcpStream::connect("127.0.0.1:19735").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9997") { + (std::net::TcpStream::connect("127.0.0.1:9997").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9998") { + (std::net::TcpStream::connect("127.0.0.1:9998").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9999") { + (std::net::TcpStream::connect("127.0.0.1:9999").unwrap(), listener.accept().unwrap().0) + } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:46926") { + (std::net::TcpStream::connect("127.0.0.1:46926").unwrap(), listener.accept().unwrap().0) + } else { panic!("Failed to bind to v4 localhost on common ports"); } + } + async fn do_basic_connection_test() { let secp_ctx = Secp256k1::new(); let a_key = SecretKey::from_slice(&[1; 32]).unwrap(); @@ -561,12 +696,15 @@ mod tests { expected_pubkey: b_pub, pubkey_connected: a_connected_sender, pubkey_disconnected: a_disconnected_sender, + disconnected_flag: AtomicBool::new(false), msg_events: Mutex::new(Vec::new()), }); let a_manager = Arc::new(PeerManager::new(MessageHandler { chan_handler: Arc::clone(&a_handler), - route_handler: Arc::clone(&a_handler) as Arc, - }, a_key.clone(), &[1; 32], Arc::new(TestLogger()))); + route_handler: Arc::clone(&a_handler), + onion_message_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + custom_message_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + }, 0, &[1; 32], Arc::new(TestLogger()), Arc::new(TestNodeSigner::new(a_key)))); let (b_connected_sender, mut b_connected) = mpsc::channel(1); let (b_disconnected_sender, mut b_disconnected) = mpsc::channel(1); @@ -574,28 +712,24 @@ mod tests { expected_pubkey: a_pub, pubkey_connected: b_connected_sender, pubkey_disconnected: b_disconnected_sender, + disconnected_flag: AtomicBool::new(false), msg_events: Mutex::new(Vec::new()), }); let b_manager = Arc::new(PeerManager::new(MessageHandler { chan_handler: Arc::clone(&b_handler), - route_handler: Arc::clone(&b_handler) as Arc, - }, b_key.clone(), &[2; 32], Arc::new(TestLogger()))); + route_handler: Arc::clone(&b_handler), + onion_message_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + custom_message_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + }, 0, &[2; 32], Arc::new(TestLogger()), Arc::new(TestNodeSigner::new(b_key)))); // We bind on localhost, hoping the environment is properly configured with a local // address. This may not always be the case in containers and the like, so if this test is // failing for you check that you have a loopback interface and it is configured with // 127.0.0.1. - let (conn_a, conn_b) = if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9735") { - (std::net::TcpStream::connect("127.0.0.1:9735").unwrap(), listener.accept().unwrap().0) - } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:9999") { - (std::net::TcpStream::connect("127.0.0.1:9999").unwrap(), listener.accept().unwrap().0) - } else if let Ok(listener) = std::net::TcpListener::bind("127.0.0.1:46926") { - (std::net::TcpStream::connect("127.0.0.1:46926").unwrap(), listener.accept().unwrap().0) - } else { panic!("Failed to bind to v4 localhost on common ports"); }; + let (conn_a, conn_b) = make_tcp_connection(); - let (sender, _receiver) = mpsc::channel(2); - let fut_a = super::setup_outbound(Arc::clone(&a_manager), sender.clone(), b_pub, tokio::net::TcpStream::from_std(conn_a).unwrap()); - let fut_b = super::setup_inbound(b_manager, sender, tokio::net::TcpStream::from_std(conn_b).unwrap()); + let fut_a = super::setup_outbound(Arc::clone(&a_manager), b_pub, conn_a); + let fut_b = super::setup_inbound(b_manager, conn_b); tokio::time::timeout(Duration::from_secs(10), a_connected.recv()).await.unwrap(); tokio::time::timeout(Duration::from_secs(1), b_connected.recv()).await.unwrap(); @@ -603,23 +737,72 @@ mod tests { a_handler.msg_events.lock().unwrap().push(MessageSendEvent::HandleError { node_id: b_pub, action: ErrorAction::DisconnectPeer { msg: None } }); - assert!(a_disconnected.try_recv().is_err()); - assert!(b_disconnected.try_recv().is_err()); + assert!(!a_handler.disconnected_flag.load(Ordering::SeqCst)); + assert!(!b_handler.disconnected_flag.load(Ordering::SeqCst)); a_manager.process_events(); tokio::time::timeout(Duration::from_secs(10), a_disconnected.recv()).await.unwrap(); tokio::time::timeout(Duration::from_secs(1), b_disconnected.recv()).await.unwrap(); + assert!(a_handler.disconnected_flag.load(Ordering::SeqCst)); + assert!(b_handler.disconnected_flag.load(Ordering::SeqCst)); fut_a.await; fut_b.await; } - #[tokio::test(threaded_scheduler)] + #[tokio::test(flavor = "multi_thread")] async fn basic_threaded_connection_test() { do_basic_connection_test().await; } + #[tokio::test] async fn basic_unthreaded_connection_test() { do_basic_connection_test().await; } + + async fn race_disconnect_accept() { + // Previously, if we handed an already-disconnected socket to `setup_inbound` we'd panic. + // This attempts to find other similar races by opening connections and shutting them down + // while connecting. Sadly in testing this did *not* reproduce the previous issue. + let secp_ctx = Secp256k1::new(); + let a_key = SecretKey::from_slice(&[1; 32]).unwrap(); + let b_key = SecretKey::from_slice(&[2; 32]).unwrap(); + let b_pub = PublicKey::from_secret_key(&secp_ctx, &b_key); + + let a_manager = Arc::new(PeerManager::new(MessageHandler { + chan_handler: Arc::new(lightning::ln::peer_handler::ErroringMessageHandler::new()), + onion_message_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + route_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + custom_message_handler: Arc::new(lightning::ln::peer_handler::IgnoringMessageHandler{}), + }, 0, &[1; 32], Arc::new(TestLogger()), Arc::new(TestNodeSigner::new(a_key)))); + + // Make two connections, one for an inbound and one for an outbound connection + let conn_a = { + let (conn_a, _) = make_tcp_connection(); + conn_a + }; + let conn_b = { + let (_, conn_b) = make_tcp_connection(); + conn_b + }; + + // Call connection setup inside new tokio tasks. + let manager_reference = Arc::clone(&a_manager); + tokio::spawn(async move { + super::setup_inbound(manager_reference, conn_a).await + }); + tokio::spawn(async move { + super::setup_outbound(a_manager, b_pub, conn_b).await + }); + } + + #[tokio::test(flavor = "multi_thread")] + async fn threaded_race_disconnect_accept() { + race_disconnect_accept().await; + } + + #[tokio::test] + async fn unthreaded_race_disconnect_accept() { + race_disconnect_accept().await; + } }