From: Matt Corallo Date: Wed, 6 Oct 2021 16:58:56 +0000 (+0000) Subject: Create a simple `FairRwLock` to avoid readers starving writers X-Git-Tag: v0.0.107~31^2~7 X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=ae4ceb71a584f0aa9e0c1a14a6219d87f1668eba;p=rust-lightning Create a simple `FairRwLock` to avoid readers starving writers Because we handle messages (which can take some time, persisting things to disk or validating cryptographic signatures) with the top-level read lock, but require the top-level write lock to connect new peers or handle disconnection, we are particularly sensitive to writer starvation issues. Rust's libstd RwLock does not provide any fairness guarantees, using whatever the OS provides as-is. On Linux, pthreads defaults to starving writers, which Rust's RwLock exposes to us (without any configurability). Here we work around that issue by blocking readers if there are pending writers, optimizing for readable code over perfectly-optimized blocking. --- diff --git a/lightning/src/debug_sync.rs b/lightning/src/debug_sync.rs index b31ceacea..6b36682f4 100644 --- a/lightning/src/debug_sync.rs +++ b/lightning/src/debug_sync.rs @@ -362,3 +362,5 @@ fn read_write_lockorder_fail() { let _a = a.write().unwrap(); } } + +pub type FairRwLock = RwLock; diff --git a/lightning/src/lib.rs b/lightning/src/lib.rs index 6d4cc50a9..abdc10c57 100644 --- a/lightning/src/lib.rs +++ b/lightning/src/lib.rs @@ -159,6 +159,8 @@ mod sync { pub use debug_sync::*; #[cfg(not(test))] pub use ::std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard}; + #[cfg(not(test))] + pub use crate::util::fairrwlock::FairRwLock; } #[cfg(not(feature = "std"))] diff --git a/lightning/src/ln/peer_handler.rs b/lightning/src/ln/peer_handler.rs index bb7b697e4..0685785db 100644 --- a/lightning/src/ln/peer_handler.rs +++ b/lightning/src/ln/peer_handler.rs @@ -33,7 +33,7 @@ use routing::network_graph::{NetworkGraph, NetGraphMsgHandler}; use prelude::*; use io; use alloc::collections::LinkedList; -use sync::{Arc, Mutex, MutexGuard, RwLock}; +use sync::{Arc, Mutex, MutexGuard, FairRwLock}; use core::sync::atomic::{AtomicBool, Ordering}; use core::{cmp, hash, fmt, mem}; use core::ops::Deref; @@ -428,7 +428,7 @@ pub struct PeerManager, - peers: RwLock>, + peers: FairRwLock>, /// Only add to this set when noise completes. /// Locked *after* peers. When an item is removed, it must be removed with the `peers` write /// lock held. Entries may be added with only the `peers` read lock held (though the @@ -570,7 +570,7 @@ impl P PeerManager { message_handler, - peers: RwLock::new(PeerHolder { + peers: FairRwLock::new(PeerHolder { peers: HashMap::new(), }), node_id_to_descriptor: Mutex::new(HashMap::new()), diff --git a/lightning/src/sync.rs b/lightning/src/sync.rs index bde547036..482759b8c 100644 --- a/lightning/src/sync.rs +++ b/lightning/src/sync.rs @@ -113,3 +113,5 @@ impl RwLock { Err(()) } } + +pub type FairRwLock = RwLock; diff --git a/lightning/src/util/fairrwlock.rs b/lightning/src/util/fairrwlock.rs new file mode 100644 index 000000000..8dd74f2b5 --- /dev/null +++ b/lightning/src/util/fairrwlock.rs @@ -0,0 +1,50 @@ +use std::sync::{TryLockResult, LockResult, RwLock, RwLockReadGuard, RwLockWriteGuard}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +/// Rust libstd's RwLock does not provide any fairness guarantees (and, in fact, when used on +/// Linux with pthreads under the hood, readers trivially and completely starve writers). +/// Because we often hold read locks while doing message processing in multiple threads which +/// can use significant CPU time, with write locks being time-sensitive but relatively small in +/// CPU time, we can end up with starvation completely blocking incoming connections or pings, +/// especially during initial graph sync. +/// +/// Thus, we need to block readers when a writer is pending, which we do with a trivial RwLock +/// wrapper here. Its not particularly optimized, but provides some reasonable fairness by +/// blocking readers (by taking the write lock) if there are writers pending when we go to take +/// a read lock. +pub struct FairRwLock { + lock: RwLock, + waiting_writers: AtomicUsize, +} + +impl FairRwLock { + pub fn new(t: T) -> Self { + Self { lock: RwLock::new(t), waiting_writers: AtomicUsize::new(0) } + } + + // Note that all atomic accesses are relaxed, as we do not rely on the atomics here for any + // ordering at all, instead relying on the underlying RwLock to provide ordering of unrelated + // memory. + pub fn write(&self) -> LockResult> { + self.waiting_writers.fetch_add(1, Ordering::Relaxed); + let res = self.lock.write(); + self.waiting_writers.fetch_sub(1, Ordering::Relaxed); + res + } + + pub fn try_write(&self) -> TryLockResult> { + self.lock.try_write() + } + + pub fn read(&self) -> LockResult> { + if self.waiting_writers.load(Ordering::Relaxed) != 0 { + let _write_queue_lock = self.lock.write(); + } + // Note that we don't consider ensuring that an underlying RwLock allowing writers to + // starve readers doesn't exhibit the same behavior here. I'm not aware of any + // libstd-backing RwLock which exhibits this behavior, and as documented in the + // struct-level documentation, it shouldn't pose a significant issue for our current + // codebase. + self.lock.read() + } +} diff --git a/lightning/src/util/mod.rs b/lightning/src/util/mod.rs index 95826b7e0..075798331 100644 --- a/lightning/src/util/mod.rs +++ b/lightning/src/util/mod.rs @@ -25,6 +25,8 @@ pub mod persist; pub(crate) mod atomic_counter; pub(crate) mod byte_utils; pub(crate) mod chacha20; +#[cfg(feature = "std")] +pub(crate) mod fairrwlock; #[cfg(fuzzing)] pub mod zbase32; #[cfg(not(fuzzing))]