Create a simple `FairRwLock` to avoid readers starving writers
authorMatt Corallo <git@bluematt.me>
Wed, 6 Oct 2021 16:58:56 +0000 (16:58 +0000)
committerMatt Corallo <git@bluematt.me>
Tue, 10 May 2022 23:40:20 +0000 (23:40 +0000)
Because we handle messages (which can take some time, persisting
things to disk or validating cryptographic signatures) with the
top-level read lock, but require the top-level write lock to
connect new peers or handle disconnection, we are particularly
sensitive to writer starvation issues.

Rust's libstd RwLock does not provide any fairness guarantees,
using whatever the OS provides as-is. On Linux, pthreads defaults
to starving writers, which Rust's RwLock exposes to us (without
any configurability).

Here we work around that issue by blocking readers if there are
pending writers, optimizing for readable code over
perfectly-optimized blocking.

lightning/src/debug_sync.rs
lightning/src/lib.rs
lightning/src/ln/peer_handler.rs
lightning/src/sync.rs
lightning/src/util/fairrwlock.rs [new file with mode: 0644]
lightning/src/util/mod.rs

index b31ceacea15852def4203b037d8e36216094f5c4..6b36682f43233f876dcacaf2e1ae34da0fef5e4e 100644 (file)
@@ -362,3 +362,5 @@ fn read_write_lockorder_fail() {
                let _a = a.write().unwrap();
        }
 }
+
+pub type FairRwLock<T> = RwLock<T>;
index 6d4cc50a920cad4cde49b91d0329e227c6ad6379..abdc10c577a4f476b70929dad51499b26f1b0cfe 100644 (file)
@@ -159,6 +159,8 @@ mod sync {
        pub use debug_sync::*;
        #[cfg(not(test))]
        pub use ::std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard};
+       #[cfg(not(test))]
+       pub use crate::util::fairrwlock::FairRwLock;
 }
 
 #[cfg(not(feature = "std"))]
index bb7b697e420ea5603e18dca088808b5150938ecd..0685785db1ceb9b61cfc5276e9da77c1fc7768b2 100644 (file)
@@ -33,7 +33,7 @@ use routing::network_graph::{NetworkGraph, NetGraphMsgHandler};
 use prelude::*;
 use io;
 use alloc::collections::LinkedList;
-use sync::{Arc, Mutex, MutexGuard, RwLock};
+use sync::{Arc, Mutex, MutexGuard, FairRwLock};
 use core::sync::atomic::{AtomicBool, Ordering};
 use core::{cmp, hash, fmt, mem};
 use core::ops::Deref;
@@ -428,7 +428,7 @@ pub struct PeerManager<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: De
                L::Target: Logger,
                CMH::Target: CustomMessageHandler {
        message_handler: MessageHandler<CM, RM>,
-       peers: RwLock<PeerHolder<Descriptor>>,
+       peers: FairRwLock<PeerHolder<Descriptor>>,
        /// Only add to this set when noise completes.
        /// Locked *after* peers. When an item is removed, it must be removed with the `peers` write
        /// lock held. Entries may be added with only the `peers` read lock held (though the
@@ -570,7 +570,7 @@ impl<Descriptor: SocketDescriptor, CM: Deref, RM: Deref, L: Deref, CMH: Deref> P
 
                PeerManager {
                        message_handler,
-                       peers: RwLock::new(PeerHolder {
+                       peers: FairRwLock::new(PeerHolder {
                                peers: HashMap::new(),
                        }),
                        node_id_to_descriptor: Mutex::new(HashMap::new()),
index bde547036536b479bcf181740e0ea31a7cf42ebb..482759b8ca88b626fd0585f0e73f84d33a7d967c 100644 (file)
@@ -113,3 +113,5 @@ impl<T> RwLock<T> {
                Err(())
        }
 }
+
+pub type FairRwLock<T> = RwLock<T>;
diff --git a/lightning/src/util/fairrwlock.rs b/lightning/src/util/fairrwlock.rs
new file mode 100644 (file)
index 0000000..8dd74f2
--- /dev/null
@@ -0,0 +1,50 @@
+use std::sync::{TryLockResult, LockResult, RwLock, RwLockReadGuard, RwLockWriteGuard};
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+/// Rust libstd's RwLock does not provide any fairness guarantees (and, in fact, when used on
+/// Linux with pthreads under the hood, readers trivially and completely starve writers).
+/// Because we often hold read locks while doing message processing in multiple threads which
+/// can use significant CPU time, with write locks being time-sensitive but relatively small in
+/// CPU time, we can end up with starvation completely blocking incoming connections or pings,
+/// especially during initial graph sync.
+///
+/// Thus, we need to block readers when a writer is pending, which we do with a trivial RwLock
+/// wrapper here. Its not particularly optimized, but provides some reasonable fairness by
+/// blocking readers (by taking the write lock) if there are writers pending when we go to take
+/// a read lock.
+pub struct FairRwLock<T> {
+       lock: RwLock<T>,
+       waiting_writers: AtomicUsize,
+}
+
+impl<T> FairRwLock<T> {
+       pub fn new(t: T) -> Self {
+               Self { lock: RwLock::new(t), waiting_writers: AtomicUsize::new(0) }
+       }
+
+       // Note that all atomic accesses are relaxed, as we do not rely on the atomics here for any
+       // ordering at all, instead relying on the underlying RwLock to provide ordering of unrelated
+       // memory.
+       pub fn write(&self) -> LockResult<RwLockWriteGuard<T>> {
+               self.waiting_writers.fetch_add(1, Ordering::Relaxed);
+               let res = self.lock.write();
+               self.waiting_writers.fetch_sub(1, Ordering::Relaxed);
+               res
+       }
+
+       pub fn try_write(&self) -> TryLockResult<RwLockWriteGuard<T>> {
+               self.lock.try_write()
+       }
+
+       pub fn read(&self) -> LockResult<RwLockReadGuard<T>> {
+               if self.waiting_writers.load(Ordering::Relaxed) != 0 {
+                       let _write_queue_lock = self.lock.write();
+               }
+               // Note that we don't consider ensuring that an underlying RwLock allowing writers to
+               // starve readers doesn't exhibit the same behavior here. I'm not aware of any
+               // libstd-backing RwLock which exhibits this behavior, and as documented in the
+               // struct-level documentation, it shouldn't pose a significant issue for our current
+               // codebase.
+               self.lock.read()
+       }
+}
index 95826b7e06ee73e0a02b3e1b36cf8ac6ef9ed12e..0757983314e73f8698673e226a884c2752911547 100644 (file)
@@ -25,6 +25,8 @@ pub mod persist;
 pub(crate) mod atomic_counter;
 pub(crate) mod byte_utils;
 pub(crate) mod chacha20;
+#[cfg(feature = "std")]
+pub(crate) mod fairrwlock;
 #[cfg(fuzzing)]
 pub mod zbase32;
 #[cfg(not(fuzzing))]