Avoid disconnecting all peers if user code is slow

author Matt Corallo <git@bluematt.me>

Sun, 26 Sep 2021 00:09:17 +0000 (00:09 +0000)

committer Matt Corallo <git@bluematt.me>

Fri, 21 Jan 2022 00:36:59 +0000 (00:36 +0000)
author Matt Corallo <git@bluematt.me>
Sun, 26 Sep 2021 00:09:17 +0000 (00:09 +0000)
committer Matt Corallo <git@bluematt.me>
Fri, 21 Jan 2022 00:36:59 +0000 (00:36 +0000)
diff --git a/lightning-background-processor/src/lib.rs b/lightning-background-processor/src/lib.rs

index 2dbc8053b4eb1247e5d64dc482e1817ad5db08ec..363dfba93ce407efce441fc69af536fbbcd53fda 100644 (file)
--- a/lightning-background-processor/src/lib.rs
+++ b/lightning-background-processor/src/lib.rs
@@ -219,11 +219,17 @@ impl BackgroundProcessor {
                         let mut have_pruned = false;
  
                         loop {
-                               peer_manager.process_events();
+                               peer_manager.process_events(); // Note that this may block on ChannelManager's locking
                                 channel_manager.process_pending_events(&event_handler);
                                 chain_monitor.process_pending_events(&event_handler);
+
+                               // We wait up to 100ms, but track how long it takes to detect being put to sleep,
+                               // see `await_start`'s use below.
+                               let await_start = Instant::now();
                                 let updates_available =
                                         channel_manager.await_persistable_update_timeout(Duration::from_millis(100));
+                               let await_time = await_start.elapsed();
+
                                 if updates_available {
                                         log_trace!(logger, "Persisting ChannelManager...");
                                         persister.persist_manager(&*channel_manager)?;
@@ -239,15 +245,20 @@ impl BackgroundProcessor {
                                         channel_manager.timer_tick_occurred();
                                         last_freshness_call = Instant::now();
                                 }
-                               if last_ping_call.elapsed().as_secs() > PING_TIMER * 2 {
+                               if await_time > Duration::from_secs(1) {
                                         // On various platforms, we may be starved of CPU cycles for several reasons.
                                         // E.g. on iOS, if we've been in the background, we will be entirely paused.
                                         // Similarly, if we're on a desktop platform and the device has been asleep, we
                                         // may not get any cycles.
-                                       // In any case, if we've been entirely paused for more than double our ping
-                                       // timer, we should have disconnected all sockets by now (and they're probably
-                                       // dead anyway), so disconnect them by calling `timer_tick_occurred()` twice.
-                                       log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers.");
+                                       // We detect this by checking if our max-100ms-sleep, above, ran longer than a
+                                       // full second, at which point we assume sockets may have been killed (they
+                                       // appear to be at least on some platforms, even if it has only been a second).
+                                       // Note that we have to take care to not get here just because user event
+                                       // processing was slow at the top of the loop. For example, the sample client
+                                       // may call Bitcoin Core RPCs during event handling, which very often takes
+                                       // more than a handful of seconds to complete, and shouldn't disconnect all our
+                                       // peers.
+                                       log_trace!(logger, "100ms sleep took more than a second, disconnecting peers.");
                                         peer_manager.disconnect_all_peers();
                                         last_ping_call = Instant::now();
                                 } else if last_ping_call.elapsed().as_secs() > PING_TIMER {
author	Matt Corallo <git@bluematt.me>
	Sun, 26 Sep 2021 00:09:17 +0000 (00:09 +0000)
committer	Matt Corallo <git@bluematt.me>
	Fri, 21 Jan 2022 00:36:59 +0000 (00:36 +0000)