From e004a953d43654c14fef698d14e5aee2497dbcc4 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sat, 15 Jul 2023 06:42:33 +0000 Subject: [PATCH] Substantially optimize reference-row-fetching By first fetching the rows we need from a smaller index, we avoid walking a large index which contained the full `blob_signed`. This reduces reference-row-fetching from 680 seconds to 152 seconds when searching today for reference rows against 7 days ago. Old: ``` ln-gossip=# EXPLAIN ANALYZE SELECT DISTINCT ON (short_channel_id, direction) id, blob_signed, direction FROM channel_updates WHERE seen < '2023-07-07 00:00:00' AND short_channel_id IN ( SELECT DISTINCT ON (short_channel_id) short_channel_id FROM channel_updates WHERE seen >= '2023-07-07 00:00:00' ) ORDER BY short_channel_id ASC, direction ASC, seen DESC; ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Unique (cost=186279.46..11921204.82 rows=168910 width=161) (actual time=732.365..680504.173 rows=129985 loops=1) -> Merge Join (cost=186279.46..11632998.93 rows=57641177 width=161) (actual time=732.364..679193.755 rows=31714061 loops=1) Merge Cond: (channel_updates.short_channel_id = channel_updates_1.short_channel_id) -> Index Only Scan using channel_updates_scid_dir_seen on channel_updates (cost=0.56..10718853.69 rows=57641177 width=161) (actual time=0.638..673675.749 rows=57408667 loops=1) Index Cond: (seen < '2023-07-07 00:00:00'::timestamp without time zone) Heap Fetches: 0 -> Unique (cost=186278.90..192574.84 rows=84455 width=8) (actual time=478.881..750.241 rows=68210 loops=1) -> Sort (cost=186278.90..189426.87 rows=1259188 width=8) (actual time=478.878..653.035 rows=1452661 loops=1) Sort Key: channel_updates_1.short_channel_id Sort Method: external merge Disk: 17680kB -> Index Only Scan using channel_updates_seen_scid on channel_updates channel_updates_1 (cost=0.56..41481.08 rows=1259188 width=8) (actual time=0.885..264.333 rows=1504495 loops=1) Index Cond: (seen >= '2023-07-07 00:00:00'::timestamp without time zone) Heap Fetches: 2273 Planning Time: 0.164 ms JIT: Functions: 9 Options: Inlining true, Optimization true, Expressions true, Deforming true Timing: Generation 21.265 ms, Inlining 37.914 ms, Optimization 113.040 ms, Emission 101.901 ms, Total 274.121 ms Execution Time: 680601.155 ms (19 rows) ``` New: ``` ln-gossip=# EXPLAIN ANALYZE SELECT id, direction, blob_signed FROM channel_updates WHERE id IN ( SELECT DISTINCT ON (short_channel_id, direction) id FROM channel_updates WHERE seen < '2023-07-07 00:00:00' ORDER BY short_channel_id ASC, direction ASC, seen DESC ) AND short_channel_id IN ( SELECT DISTINCT ON (short_channel_id) short_channel_id FROM channel_updates WHERE seen >= '2023-07-07 00:00:00' ); ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ Hash Join (cost=2942503.92..2943867.77 rows=169870 width=145) (actual time=22862.627..152436.685 rows=130116 loops=1) Hash Cond: (channel_updates.short_channel_id = channel_updates_2.short_channel_id) -> Nested Loop (cost=2738282.26..2739200.18 rows=169870 width=153) (actual time=22141.452..151504.140 rows=393250 loops=1) -> HashAggregate (cost=2738281.69..2738283.69 rows=200 width=4) (actual time=22139.440..22339.035 rows=393250 loops=1) Group Key: channel_updates_1.id Batches: 1 Memory Usage: 45089kB -> Result (cost=0.56..2736158.32 rows=169870 width=21) (actual time=0.102..21984.409 rows=393250 loops=1) -> Unique (cost=0.56..2736158.32 rows=169870 width=21) (actual time=0.074..21943.089 rows=393250 loops=1) -> Index Only Scan using channel_updates_scid_dir_seen_desc_with_id on channel_updates channel_updates_1 (cost=0.56..2448011.03 rows=57629457 width=21) (actual time=0.073..19776.181 rows=57408667 loops=1) Index Cond: (seen < '2023-07-07 00:00:00'::timestamp without time zone) Heap Fetches: 0 -> Index Only Scan using channel_updates_id_with_scid_dir_blob on channel_updates (cost=0.56..4.60 rows=1 width=153) (actual time=0.328..0.328 rows=1 loops=393250) Index Cond: (id = channel_updates_1.id) Heap Fetches: 0 -> Hash (cost=203159.97..203159.97 rows=84935 width=8) (actual time=721.105..721.107 rows=70731 loops=1) Buckets: 131072 Batches: 1 Memory Usage: 3787kB -> Unique (cost=195708.67..202310.62 rows=84935 width=8) (actual time=552.965..713.465 rows=70731 loops=1) -> Sort (cost=195708.67..199009.65 rows=1320391 width=8) (actual time=552.962..650.323 rows=1537141 loops=1) Sort Key: channel_updates_2.short_channel_id Sort Method: external merge Disk: 18064kB -> Index Only Scan using channel_updates_seen_scid on channel_updates channel_updates_2 (cost=0.56..43421.19 rows=1320391 width=8) (actual time=66.736..324.130 rows=1537141 loops=1) Index Cond: (seen >= '2023-07-07 00:00:00'::timestamp without time zone) Heap Fetches: 68 Planning Time: 0.520 ms JIT: Functions: 21 Options: Inlining true, Optimization true, Expressions true, Deforming true Timing: Generation 0.643 ms, Inlining 7.055 ms, Optimization 33.167 ms, Emission 25.782 ms, Total 66.648 ms Execution Time: 152458.777 ms (29 rows) ``` --- src/config.rs | 11 +++++++++-- src/lookup.rs | 11 +++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/config.rs b/src/config.rs index 6728312..2d93487 100644 --- a/src/config.rs +++ b/src/config.rs @@ -15,7 +15,7 @@ use lightning::util::ser::Readable; use lightning_block_sync::http::HttpEndpoint; use tokio_postgres::Config; -pub(crate) const SCHEMA_VERSION: i32 = 9; +pub(crate) const SCHEMA_VERSION: i32 = 10; pub(crate) const SNAPSHOT_CALCULATION_INTERVAL: u32 = 3600 * 24; // every 24 hours, in seconds /// If the last update in either direction was more than six days ago, we send a reminder /// That reminder may be either in the form of a channel announcement, or in the form of empty @@ -105,8 +105,9 @@ pub(crate) fn db_channel_update_table_creation_query() -> &'static str { pub(crate) fn db_index_creation_query() -> &'static str { " CREATE INDEX IF NOT EXISTS channel_updates_seen_scid ON channel_updates(seen, short_channel_id); - CREATE INDEX IF NOT EXISTS channel_updates_scid_dir_seen ON channel_updates(short_channel_id ASC, direction ASC, seen DESC) INCLUDE (id, blob_signed); CREATE INDEX IF NOT EXISTS channel_updates_scid_dir_seen_asc ON channel_updates(short_channel_id, direction, seen); + CREATE INDEX IF NOT EXISTS channel_updates_scid_dir_seen_desc_with_id ON channel_updates(short_channel_id ASC, direction ASC, seen DESC) INCLUDE (id); + CREATE INDEX IF NOT EXISTS channel_updates_id_with_scid_dir_blob ON channel_updates(id) INCLUDE (short_channel_id, direction, blob_signed); CREATE UNIQUE INDEX IF NOT EXISTS channel_updates_key ON channel_updates (short_channel_id, direction, timestamp); " } @@ -228,6 +229,12 @@ pub(crate) async fn upgrade_db(schema: i32, client: &mut tokio_postgres::Client) tx.execute("UPDATE config SET db_schema = 9 WHERE id = 1", &[]).await.unwrap(); tx.commit().await.unwrap(); } + if schema >= 1 && schema <= 9 { + let tx = client.transaction().await.unwrap(); + tx.execute("DROP INDEX channel_updates_scid_dir_seen", &[]).await.unwrap(); + tx.execute("UPDATE config SET db_schema = 10 WHERE id = 1", &[]).await.unwrap(); + tx.commit().await.unwrap(); + } if schema <= 1 || schema > SCHEMA_VERSION { panic!("Unknown schema in db: {}, we support up to {}", schema, SCHEMA_VERSION); } diff --git a/src/lookup.rs b/src/lookup.rs index 8018e5c..5969b2a 100644 --- a/src/lookup.rs +++ b/src/lookup.rs @@ -209,14 +209,17 @@ pub(super) async fn fetch_channel_updates(delta_set: &mut DeltaSet, client: &Cli // there was an update in either direction that happened after the last sync (to avoid // collecting too many reference updates) let reference_rows = client.query(" - SELECT DISTINCT ON (short_channel_id, direction) id, direction, blob_signed - FROM channel_updates - WHERE seen < $1 AND short_channel_id IN ( + SELECT id, direction, blob_signed FROM channel_updates + WHERE id IN ( + SELECT DISTINCT ON (short_channel_id, direction) id + FROM channel_updates + WHERE seen < $1 + ORDER BY short_channel_id ASC, direction ASC, seen DESC + ) AND short_channel_id IN ( SELECT DISTINCT ON (short_channel_id) short_channel_id FROM channel_updates WHERE seen >= $1 ) - ORDER BY short_channel_id ASC, direction ASC, seen DESC ", &[&last_sync_timestamp_object]).await.unwrap(); println!("Fetched reference rows ({}): {:?}", reference_rows.len(), start.elapsed()); -- 2.30.2