From e004a953d43654c14fef698d14e5aee2497dbcc4 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Sat, 15 Jul 2023 06:42:33 +0000
Subject: [PATCH] Substantially optimize reference-row-fetching

By first fetching the rows we need from a smaller index, we avoid
walking a large index which contained the full `blob_signed`. This
reduces reference-row-fetching from 680 seconds to 152 seconds when
searching today for reference rows against 7 days ago.

Old:

```
ln-gossip=# EXPLAIN ANALYZE SELECT DISTINCT ON (short_channel_id, direction) id, blob_signed, direction
        FROM channel_updates
        WHERE seen < '2023-07-07 00:00:00' AND short_channel_id IN (
            SELECT DISTINCT ON (short_channel_id) short_channel_id
            FROM channel_updates
            WHERE seen >= '2023-07-07 00:00:00'
        )
        ORDER BY short_channel_id ASC, direction ASC, seen DESC;
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Unique  (cost=186279.46..11921204.82 rows=168910 width=161) (actual time=732.365..680504.173 rows=129985 loops=1)
   ->  Merge Join  (cost=186279.46..11632998.93 rows=57641177 width=161) (actual time=732.364..679193.755 rows=31714061 loops=1)
         Merge Cond: (channel_updates.short_channel_id = channel_updates_1.short_channel_id)
         ->  Index Only Scan using channel_updates_scid_dir_seen on channel_updates  (cost=0.56..10718853.69 rows=57641177 width=161) (actual time=0.638..673675.749 rows=57408667 loops=1)
               Index Cond: (seen < '2023-07-07 00:00:00'::timestamp without time zone)
               Heap Fetches: 0
         ->  Unique  (cost=186278.90..192574.84 rows=84455 width=8) (actual time=478.881..750.241 rows=68210 loops=1)
               ->  Sort  (cost=186278.90..189426.87 rows=1259188 width=8) (actual time=478.878..653.035 rows=1452661 loops=1)
                     Sort Key: channel_updates_1.short_channel_id
                     Sort Method: external merge  Disk: 17680kB
                     ->  Index Only Scan using channel_updates_seen_scid on channel_updates channel_updates_1  (cost=0.56..41481.08 rows=1259188 width=8) (actual time=0.885..264.333 rows=1504495 loops=1)
                           Index Cond: (seen >= '2023-07-07 00:00:00'::timestamp without time zone)
                           Heap Fetches: 2273
 Planning Time: 0.164 ms
 JIT:
   Functions: 9
   Options: Inlining true, Optimization true, Expressions true, Deforming true
   Timing: Generation 21.265 ms, Inlining 37.914 ms, Optimization 113.040 ms, Emission 101.901 ms, Total 274.121 ms
 Execution Time: 680601.155 ms
(19 rows)
```

New:

```
ln-gossip=# EXPLAIN ANALYZE SELECT id, direction, blob_signed FROM channel_updates
    WHERE id IN (
        SELECT DISTINCT ON (short_channel_id, direction) id
        FROM channel_updates
        WHERE seen < '2023-07-07 00:00:00'
        ORDER BY short_channel_id ASC, direction ASC, seen DESC
    ) AND short_channel_id IN (
        SELECT DISTINCT ON (short_channel_id) short_channel_id
        FROM channel_updates
        WHERE seen >= '2023-07-07 00:00:00'
    );
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=2942503.92..2943867.77 rows=169870 width=145) (actual time=22862.627..152436.685 rows=130116 loops=1)
   Hash Cond: (channel_updates.short_channel_id = channel_updates_2.short_channel_id)
   ->  Nested Loop  (cost=2738282.26..2739200.18 rows=169870 width=153) (actual time=22141.452..151504.140 rows=393250 loops=1)
         ->  HashAggregate  (cost=2738281.69..2738283.69 rows=200 width=4) (actual time=22139.440..22339.035 rows=393250 loops=1)
               Group Key: channel_updates_1.id
               Batches: 1  Memory Usage: 45089kB
               ->  Result  (cost=0.56..2736158.32 rows=169870 width=21) (actual time=0.102..21984.409 rows=393250 loops=1)
                     ->  Unique  (cost=0.56..2736158.32 rows=169870 width=21) (actual time=0.074..21943.089 rows=393250 loops=1)
                           ->  Index Only Scan using channel_updates_scid_dir_seen_desc_with_id on channel_updates channel_updates_1  (cost=0.56..2448011.03 rows=57629457 width=21) (actual time=0.073..19776.181 rows=57408667 loops=1)
                                 Index Cond: (seen < '2023-07-07 00:00:00'::timestamp without time zone)
                                 Heap Fetches: 0
         ->  Index Only Scan using channel_updates_id_with_scid_dir_blob on channel_updates  (cost=0.56..4.60 rows=1 width=153) (actual time=0.328..0.328 rows=1 loops=393250)
               Index Cond: (id = channel_updates_1.id)
               Heap Fetches: 0
   ->  Hash  (cost=203159.97..203159.97 rows=84935 width=8) (actual time=721.105..721.107 rows=70731 loops=1)
         Buckets: 131072  Batches: 1  Memory Usage: 3787kB
         ->  Unique  (cost=195708.67..202310.62 rows=84935 width=8) (actual time=552.965..713.465 rows=70731 loops=1)
               ->  Sort  (cost=195708.67..199009.65 rows=1320391 width=8) (actual time=552.962..650.323 rows=1537141 loops=1)
                     Sort Key: channel_updates_2.short_channel_id
                     Sort Method: external merge  Disk: 18064kB
                     ->  Index Only Scan using channel_updates_seen_scid on channel_updates channel_updates_2  (cost=0.56..43421.19 rows=1320391 width=8) (actual time=66.736..324.130 rows=1537141 loops=1)
                           Index Cond: (seen >= '2023-07-07 00:00:00'::timestamp without time zone)
                           Heap Fetches: 68
 Planning Time: 0.520 ms
 JIT:
   Functions: 21
   Options: Inlining true, Optimization true, Expressions true, Deforming true
   Timing: Generation 0.643 ms, Inlining 7.055 ms, Optimization 33.167 ms, Emission 25.782 ms, Total 66.648 ms
 Execution Time: 152458.777 ms
(29 rows)
```
---
 src/config.rs | 11 +++++++++--
 src/lookup.rs | 11 +++++++----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/config.rs b/src/config.rs
index 6728312..2d93487 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -15,7 +15,7 @@ use lightning::util::ser::Readable;
 use lightning_block_sync::http::HttpEndpoint;
 use tokio_postgres::Config;
 
-pub(crate) const SCHEMA_VERSION: i32 = 9;
+pub(crate) const SCHEMA_VERSION: i32 = 10;
 pub(crate) const SNAPSHOT_CALCULATION_INTERVAL: u32 = 3600 * 24; // every 24 hours, in seconds
 /// If the last update in either direction was more than six days ago, we send a reminder
 /// That reminder may be either in the form of a channel announcement, or in the form of empty
@@ -105,8 +105,9 @@ pub(crate) fn db_channel_update_table_creation_query() -> &'static str {
 pub(crate) fn db_index_creation_query() -> &'static str {
 	"
 	CREATE INDEX IF NOT EXISTS channel_updates_seen_scid ON channel_updates(seen, short_channel_id);
-	CREATE INDEX IF NOT EXISTS channel_updates_scid_dir_seen ON channel_updates(short_channel_id ASC, direction ASC, seen DESC) INCLUDE (id, blob_signed);
 	CREATE INDEX IF NOT EXISTS channel_updates_scid_dir_seen_asc ON channel_updates(short_channel_id, direction, seen);
+	CREATE INDEX IF NOT EXISTS channel_updates_scid_dir_seen_desc_with_id ON channel_updates(short_channel_id ASC, direction ASC, seen DESC) INCLUDE (id);
+	CREATE INDEX IF NOT EXISTS channel_updates_id_with_scid_dir_blob ON channel_updates(id) INCLUDE (short_channel_id, direction, blob_signed);
 	CREATE UNIQUE INDEX IF NOT EXISTS channel_updates_key ON channel_updates (short_channel_id, direction, timestamp);
 	"
 }
@@ -228,6 +229,12 @@ pub(crate) async fn upgrade_db(schema: i32, client: &mut tokio_postgres::Client)
 		tx.execute("UPDATE config SET db_schema = 9 WHERE id = 1", &[]).await.unwrap();
 		tx.commit().await.unwrap();
 	}
+	if schema >= 1 && schema <= 9 {
+		let tx = client.transaction().await.unwrap();
+		tx.execute("DROP INDEX channel_updates_scid_dir_seen", &[]).await.unwrap();
+		tx.execute("UPDATE config SET db_schema = 10 WHERE id = 1", &[]).await.unwrap();
+		tx.commit().await.unwrap();
+	}
 	if schema <= 1 || schema > SCHEMA_VERSION {
 		panic!("Unknown schema in db: {}, we support up to {}", schema, SCHEMA_VERSION);
 	}
diff --git a/src/lookup.rs b/src/lookup.rs
index 8018e5c..5969b2a 100644
--- a/src/lookup.rs
+++ b/src/lookup.rs
@@ -209,14 +209,17 @@ pub(super) async fn fetch_channel_updates(delta_set: &mut DeltaSet, client: &Cli
 	// there was an update in either direction that happened after the last sync (to avoid
 	// collecting too many reference updates)
 	let reference_rows = client.query("
-		SELECT DISTINCT ON (short_channel_id, direction) id, direction, blob_signed
-		FROM channel_updates
-		WHERE seen < $1 AND short_channel_id IN (
+		SELECT id, direction, blob_signed FROM channel_updates
+		WHERE id IN (
+			SELECT DISTINCT ON (short_channel_id, direction) id
+			FROM channel_updates
+			WHERE seen < $1
+			ORDER BY short_channel_id ASC, direction ASC, seen DESC
+		) AND short_channel_id IN (
 			SELECT DISTINCT ON (short_channel_id) short_channel_id
 			FROM channel_updates
 			WHERE seen >= $1
 		)
-		ORDER BY short_channel_id ASC, direction ASC, seen DESC
 		", &[&last_sync_timestamp_object]).await.unwrap();
 
 	println!("Fetched reference rows ({}): {:?}", reference_rows.len(), start.elapsed());
-- 
2.30.2