Merge pull request #1253 from TheBlueMatt/2022-01-background-persist-exit

author valentinewallace <valentinewallace@users.noreply.github.com>

Mon, 24 Jan 2022 16:23:12 +0000 (11:23 -0500)

committer GitHub <noreply@github.com>

Mon, 24 Jan 2022 16:23:12 +0000 (11:23 -0500)
author valentinewallace <valentinewallace@users.noreply.github.com>
Mon, 24 Jan 2022 16:23:12 +0000 (11:23 -0500)
committer GitHub <noreply@github.com>
Mon, 24 Jan 2022 16:23:12 +0000 (11:23 -0500)
diff --combined lightning-background-processor/src/lib.rs

index c3be857a3611f09490902386f1008b503f271b91,8f6ed657b3d8e116b5913ebb9b75e4dbdc218e41..b60dc6c830432c47777faa4a7b84db9e666ba954
--- 1/lightning-background-processor/src/lib.rs
--- 2/lightning-background-processor/src/lib.rs
+++ b/lightning-background-processor/src/lib.rs
@@@ -61,7 -61,7 +61,7 @@@ const FRESHNESS_TIMER: u64 = 60
   const FRESHNESS_TIMER: u64 = 1;
   
   #[cfg(all(not(test), not(debug_assertions)))]
- -const PING_TIMER: u64 = 5;
+ +const PING_TIMER: u64 = 10;
   /// Signature operations take a lot longer without compiler optimisations.
   /// Increasing the ping timer allows for this but slower devices will be disconnected if the
   /// timeout is reached.
@@@ -219,17 -219,11 +219,17 @@@ impl BackgroundProcessor 
                         let mut have_pruned = false;
   
                         loop {
- -                              peer_manager.process_events();
+ +                              peer_manager.process_events(); // Note that this may block on ChannelManager's locking
                                 channel_manager.process_pending_events(&event_handler);
                                 chain_monitor.process_pending_events(&event_handler);
+ +
+ +                              // We wait up to 100ms, but track how long it takes to detect being put to sleep,
+ +                              // see `await_start`'s use below.
+ +                              let await_start = Instant::now();
                                 let updates_available =
                                         channel_manager.await_persistable_update_timeout(Duration::from_millis(100));
+ +                              let await_time = await_start.elapsed();
+ +
                                 if updates_available {
                                         log_trace!(logger, "Persisting ChannelManager...");
                                         persister.persist_manager(&*channel_manager)?;
@@@ -238,27 -232,22 +238,27 @@@
                                 // Exit the loop if the background processor was requested to stop.
                                 if stop_thread.load(Ordering::Acquire) == true {
                                         log_trace!(logger, "Terminating background processor.");
-                                       return Ok(());
+                                       break;
                                 }
                                 if last_freshness_call.elapsed().as_secs() > FRESHNESS_TIMER {
                                         log_trace!(logger, "Calling ChannelManager's timer_tick_occurred");
                                         channel_manager.timer_tick_occurred();
                                         last_freshness_call = Instant::now();
                                 }
- -                              if last_ping_call.elapsed().as_secs() > PING_TIMER * 2 {
+ +                              if await_time > Duration::from_secs(1) {
                                         // On various platforms, we may be starved of CPU cycles for several reasons.
                                         // E.g. on iOS, if we've been in the background, we will be entirely paused.
                                         // Similarly, if we're on a desktop platform and the device has been asleep, we
                                         // may not get any cycles.
- -                                      // In any case, if we've been entirely paused for more than double our ping
- -                                      // timer, we should have disconnected all sockets by now (and they're probably
- -                                      // dead anyway), so disconnect them by calling `timer_tick_occurred()` twice.
- -                                      log_trace!(logger, "Awoke after more than double our ping timer, disconnecting peers.");
+ +                                      // We detect this by checking if our max-100ms-sleep, above, ran longer than a
+ +                                      // full second, at which point we assume sockets may have been killed (they
+ +                                      // appear to be at least on some platforms, even if it has only been a second).
+ +                                      // Note that we have to take care to not get here just because user event
+ +                                      // processing was slow at the top of the loop. For example, the sample client
+ +                                      // may call Bitcoin Core RPCs during event handling, which very often takes
+ +                                      // more than a handful of seconds to complete, and shouldn't disconnect all our
+ +                                      // peers.
+ +                                      log_trace!(logger, "100ms sleep took more than a second, disconnecting peers.");
                                         peer_manager.disconnect_all_peers();
                                         last_ping_call = Instant::now();
                                 } else if last_ping_call.elapsed().as_secs() > PING_TIMER {
@@@ -280,6 -269,10 +280,10 @@@
                                         }
                                 }
                         }
+                       // After we exit, ensure we persist the ChannelManager one final time - this avoids
+                       // some races where users quit while channel updates were in-flight, with
+                       // ChannelMonitor update(s) persisted without a corresponding ChannelManager update.
+                       persister.persist_manager(&*channel_manager)
                 });
                 Self { stop_thread: stop_thread_clone, thread_handle: Some(handle) }
         }
author	valentinewallace <valentinewallace@users.noreply.github.com>
	Mon, 24 Jan 2022 16:23:12 +0000 (11:23 -0500)
committer	GitHub <noreply@github.com>
	Mon, 24 Jan 2022 16:23:12 +0000 (11:23 -0500)