From 9974cc5c593bfac1bcdd51e572f53bf89dc8f6a2 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Sat, 15 Jun 2024 22:31:14 +0000 Subject: [PATCH] Add additional comments explaining how the rate limiter works --- xdp.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/xdp.c b/xdp.c index 4eea3e4..122a9eb 100644 --- a/xdp.c +++ b/xdp.c @@ -178,6 +178,11 @@ struct { } // Rate limits are done in a static-sized leaky bucket with a decimal counter +// +// They are stored in a single uint64_t with the top RATE_BUCKET_BITS holding +// the packet count/size and the remaining low bits holding the the time (as a +// fixed-point decimal). +// // Bucket size is always exactly (1 << RATE_BUCKET_INTEGER_BITS) #define RATE_BUCKET_DECIMAL_BITS 8 #define RATE_BUCKET_INTEGER_BITS 4 @@ -193,7 +198,7 @@ struct { #ifdef RATE_CNT struct ratelimit { struct bpf_spin_lock lock; - uint64_t sent_time; + uint64_t pkts_and_time; }; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -229,19 +234,27 @@ struct { #define DO_RATE_LIMIT(do_lock, rate, time_masked, amt_in_pkt, limit_ns_per_pkt, matchbool) do { \ if (rate) { \ do_lock; \ - int64_t bucket_pkts = (rate->sent_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS); \ - /* We mask the top 12 bits, so date overflows every 52 days, handled below */ \ - int64_t time_diff = time_masked - ((int64_t)(rate->sent_time & RATE_TIME_MASK)); \ - if (unlikely(time_diff < -1000000000 || time_diff > 16000000000)) { \ + int64_t bucket_pkts = (rate->pkts_and_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS); \ + /* We mask the top 12 bits, so date overflows every 52 days, resetting the counter */ \ + int64_t time_diff = time_masked - ((int64_t)(rate->pkts_and_time & RATE_TIME_MASK)); \ + if (unlikely(time_diff < RATE_MIN_TIME_OFFSET || time_diff > RATE_MAX_TIME_OFFSET)) { \ bucket_pkts = 0; \ } else { \ if (unlikely(time_diff < 0)) { time_diff = 0; } \ + /* To avoid storing too many bits, we make a simplifying assumption that all packets */ \ + /* hit by a rule are the same size. Thus, when a rule is denominated in bytes rather */ \ + /* than packets, we can keep counting packets and simply adjust the ratelimit by the*/ \ + /* size of the packet we're looking at. */ \ + /* Thus, here, we simply reduce our packet counter by the */ \ + /* time difference / our ns/packet limit times the size of the current packet. */ \ int64_t pkts_since_last = (time_diff << RATE_BUCKET_BITS) * ((uint64_t)amt_in_pkt) / ((uint64_t)limit_ns_per_pkt); \ bucket_pkts -= pkts_since_last; \ } \ + /* Accept as long as we can add one to our bucket without overflow */ \ if (bucket_pkts < (((1 << RATE_BUCKET_INTEGER_BITS) - 1) << RATE_BUCKET_DECIMAL_BITS)) { \ if (unlikely(bucket_pkts < 0)) bucket_pkts = 0; \ - rate->sent_time = time_masked | ((bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS)) << (64 - RATE_BUCKET_BITS)); \ + uint64_t new_packet_count = bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS); \ + rate->pkts_and_time = time_masked | (new_packet_count << (64 - RATE_BUCKET_BITS)); \ matchbool = 0; \ } else { \ matchbool = 1; \ @@ -251,7 +264,7 @@ if (rate) { \ #define CREATE_PERSRC_LOOKUP(IPV, IP_TYPE) \ struct persrc_rate##IPV##_entry { \ - uint64_t sent_time; \ + uint64_t pkts_and_time; \ IP_TYPE srcip; \ }; \ \ @@ -275,19 +288,19 @@ static int check_v##IPV##_persrc_ratelimit(IP_TYPE key, void *map, size_t map_li \ uint64_t bucket_idx = SRC_HASH_BUCKET_COUNT; \ uint64_t min_sent_idx = 0; \ - uint64_t min_sent_time = UINT64_MAX; \ + uint64_t min_time = UINT64_MAX; \ for (uint64_t i = 0; i < SRC_HASH_BUCKET_COUNT; i++) { \ if (first_bucket[i].srcip == key) { \ bucket_idx = i; \ break; \ } \ - int64_t time_offset = ((int64_t)cur_time_masked) - (first_bucket[i].sent_time & RATE_TIME_MASK); \ + int64_t time_offset = ((int64_t)cur_time_masked) - (first_bucket[i].pkts_and_time & RATE_TIME_MASK); \ if (time_offset < RATE_MIN_TIME_OFFSET || time_offset > RATE_MAX_TIME_OFFSET) { \ - min_sent_time = 0; \ + min_time = 0; \ min_sent_idx = i; \ } \ - if ((first_bucket[i].sent_time & RATE_TIME_MASK) < min_sent_time) { \ - min_sent_time = first_bucket[i].sent_time & RATE_TIME_MASK; \ + if ((first_bucket[i].pkts_and_time & RATE_TIME_MASK) < min_time) { \ + min_time = first_bucket[i].pkts_and_time & RATE_TIME_MASK; \ min_sent_idx = i; \ } \ } \ @@ -295,7 +308,7 @@ static int check_v##IPV##_persrc_ratelimit(IP_TYPE key, void *map, size_t map_li struct persrc_rate##IPV##_entry *entry = &first_bucket[bucket_idx]; \ if (entry->srcip != key) { \ entry->srcip = key; \ - entry->sent_time = 0; \ + entry->pkts_and_time = 0; \ } \ int matched = 0; \ DO_RATE_LIMIT(, entry, cur_time_masked, amt, limit_ns_per_pkt, matched); \ -- 2.39.5