Move ratelimits into map lookup fn to reduce BPF verifier instructions
authorMatt Corallo <git@bluematt.me>
Wed, 27 Oct 2021 23:58:21 +0000 (23:58 +0000)
committerMatt Corallo <git@bluematt.me>
Thu, 28 Oct 2021 01:34:22 +0000 (01:34 +0000)
genrules.py
xdp.c

index 7e71d755fbd43e1187e76651b3e4c3ede8716eeb..8cb74398f68ffd05891e775568280145e5486aba 100755 (executable)
@@ -406,62 +406,40 @@ with open("rules.h", "w") as out:
 
                         first_action =   "int64_t time_masked = bpf_ktime_get_ns() & RATE_TIME_MASK;\n"
                         first_action += f"int64_t per_pkt_ns = (1000000000LL << RATE_BUCKET_INTEGER_BITS) / {math.floor(value)};\n"
+                        if ty == "0x8006" or ty == "0x8306":
+                            first_action += "uint64_t amt = data_end - pktdata;\n"
+                        else:
+                            first_action += "uint64_t amt = 1;\n"
                         if ty == "0x8006" or ty == "0x800c":
-                            spin_lock = "bpf_spin_lock(&rate->lock);"
-                            spin_unlock = "bpf_spin_unlock(&rate->lock);"
                             first_action += f"const uint32_t ratelimitidx = {ratelimitcnt};\n"
                             first_action += "struct ratelimit *rate = bpf_map_lookup_elem(&rate_map, &ratelimitidx);\n"
                             ratelimitcnt += 1
+                            first_action +=  "int matched = 0;\n"
+                            first_action += "DO_RATE_LIMIT(bpf_spin_lock(&rate->lock), rate, time_masked, amt, per_pkt_ns, matched);\n"
+                            first_action += "if (rate) { bpf_spin_unlock(&rate->lock); }\n"
                         else:
-                            spin_lock = "/* No locking as we're locked in get_v*_persrc_ratelimit */"
-                            spin_unlock = "bpf_spin_unlock(rate_ptr.lock);"
                             if proto == 4:
                                 if mid_byte > 32:
                                     continue
                                 first_action += f"const uint32_t srcip = ip->saddr & MASK4({mid_byte});\n"
                                 first_action += f"void *rate_map = &v4_src_rate_{len(v4persrcratelimits)};\n"
-                                first_action += f"struct persrc_rate4_ptr rate_ptr = get_v4_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked);\n"
-                                first_action += f"struct persrc_rate4_entry *rate = rate_ptr.rate;\n"
+                                first_action += f"int matched = check_v4_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked, amt, per_pkt_ns);\n"
                                 v4persrcratelimits.append((high_byte + 1) * 4096)
                             elif mid_byte <= 64:
                                 first_action += f"const uint64_t srcip = BE128BEHIGH64(ip6->saddr & MASK6({mid_byte}));\n"
                                 first_action += f"void *rate_map = &v5_src_rate_{len(v5persrcratelimits)};\n"
-                                first_action += f"struct persrc_rate5_ptr rate_ptr = get_v5_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked);\n"
-                                first_action += f"struct persrc_rate5_entry *rate = rate_ptr.rate;\n"
+                                first_action += f"int matched = check_v5_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked, amt, per_pkt_ns);\n"
                                 v5persrcratelimits.append((high_byte + 1) * 4096)
                             else:
                                 if mid_byte > 128:
                                     continue
                                 first_action += f"const uint128_t srcip = ip6->saddr & MASK6({mid_byte});\n"
                                 first_action += f"void *rate_map = &v6_src_rate_{len(v6persrcratelimits)};\n"
-                                first_action += f"struct persrc_rate6_ptr rate_ptr = get_v6_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked);\n"
-                                first_action += f"struct persrc_rate6_entry *rate = rate_ptr.rate;\n"
+                                first_action += f"int matched = check_v6_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked, amt, per_pkt_ns);\n"
                                 v6persrcratelimits.append((high_byte + 1) * 4096)
-                        if ty == "0x8006" or ty == "0x8306":
-                            first_action += "uint64_t amt = data_end - pktdata;\n"
-                        else:
-                            first_action += "uint64_t amt = 1;\n"
-                        first_action +=  "if (rate) {\n"
-                        first_action += f"\t{spin_lock}\n"
-                        first_action +=  "\tint64_t bucket_pkts = (rate->sent_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS);\n"
-                        # We mask the top 12 bits, so date overflows every 52 days, handled below
-                        first_action +=  "\tint64_t time_diff = time_masked - ((int64_t)(rate->sent_time & RATE_TIME_MASK));\n"
-                        first_action +=  "\tif (unlikely(time_diff < -1000000000 || time_diff > 16000000000)) {\n"
-                        first_action +=  "\t\tbucket_pkts = 0;\n"
-                        first_action +=  "\t} else {\n"
-                        first_action +=  "\t\tif (unlikely(time_diff < 0)) { time_diff = 0; }\n"
-                        first_action += f"\t\tint64_t pkts_since_last = (time_diff << RATE_BUCKET_BITS) * amt / per_pkt_ns;\n"
-                        first_action +=  "\t\tbucket_pkts -= pkts_since_last;\n"
-                        first_action +=  "\t}\n"
-                        first_action +=  "\tif (bucket_pkts >= (((1 << RATE_BUCKET_INTEGER_BITS) - 1) << RATE_BUCKET_DECIMAL_BITS)) {\n"
-                        first_action += f"\t\t{spin_unlock}\n"
-                        first_action +=  "\t\t{stats_replace}\n"
-                        first_action +=  "\t\treturn XDP_DROP;\n"
-                        first_action +=  "\t} else {\n"
-                        first_action +=  "\t\tif (unlikely(bucket_pkts < 0)) bucket_pkts = 0;\n"
-                        first_action += f"\t\trate->sent_time = time_masked | ((bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS)) << (64 - RATE_BUCKET_BITS));\n"
-                        first_action += f"\t\t{spin_unlock}\n"
-                        first_action +=  "\t}\n"
+                        first_action +=  "if (matched) {\n"
+                        first_action +=  "\t{stats_replace}\n"
+                        first_action +=  "\treturn XDP_DROP;\n"
                         first_action +=  "}\n"
                 elif ty == "0x8007":
                     if low_bytes & 1 == 0:
diff --git a/xdp.c b/xdp.c
index 36a45e2a72783c2ee49a326c689edc16e69cd1a3..f59019a180411681df1844e13806f8ce35ad1f80 100644 (file)
--- a/xdp.c
+++ b/xdp.c
@@ -226,6 +226,29 @@ struct {
 #define SRC_HASH_BUCKET_COUNT_POW 4
 #define SRC_HASH_BUCKET_COUNT (1 << SRC_HASH_BUCKET_COUNT_POW)
 
+#define DO_RATE_LIMIT(do_lock, rate, time_masked, amt_in_pkt, limit_ns_per_pkt, matchbool) do { \
+if (rate) { \
+       do_lock; \
+       int64_t bucket_pkts = (rate->sent_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS); \
+       /* We mask the top 12 bits, so date overflows every 52 days, handled below */ \
+       int64_t time_diff = time_masked - ((int64_t)(rate->sent_time & RATE_TIME_MASK)); \
+       if (unlikely(time_diff < -1000000000 || time_diff > 16000000000)) { \
+               bucket_pkts = 0; \
+       } else { \
+               if (unlikely(time_diff < 0)) { time_diff = 0; } \
+               int64_t pkts_since_last = (time_diff << RATE_BUCKET_BITS) * ((uint64_t)amt_in_pkt) / ((uint64_t)limit_ns_per_pkt); \
+               bucket_pkts -= pkts_since_last; \
+       } \
+       if (bucket_pkts < (((1 << RATE_BUCKET_INTEGER_BITS) - 1) << RATE_BUCKET_DECIMAL_BITS)) { \
+               if (unlikely(bucket_pkts < 0)) bucket_pkts = 0; \
+               rate->sent_time = time_masked | ((bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS)) << (64 - RATE_BUCKET_BITS)); \
+               matchbool = 0; \
+       } else { \
+               matchbool = 1; \
+       } \
+} \
+} while(0);
+
 #define CREATE_PERSRC_LOOKUP(IPV, IP_TYPE) \
 struct persrc_rate##IPV##_entry { \
        uint64_t sent_time; \
@@ -237,19 +260,12 @@ struct persrc_rate##IPV##_bucket { \
        struct persrc_rate##IPV##_entry entries[]; \
 }; \
  \
-struct persrc_rate##IPV##_ptr { \
-       struct persrc_rate##IPV##_entry *rate; \
-       struct bpf_spin_lock *lock; \
-}; \
- \
-__attribute__((always_inline)) \
-static inline struct persrc_rate##IPV##_ptr get_v##IPV##_persrc_ratelimit(IP_TYPE key, void *map, size_t map_limit, int64_t cur_time_masked) { \
-       struct persrc_rate##IPV##_ptr res = { .rate = NULL, .lock = NULL }; \
+static int check_v##IPV##_persrc_ratelimit(IP_TYPE key, void *map, size_t map_limit, int64_t cur_time_masked, uint64_t amt, uint64_t limit_ns_per_pkt) { \
        uint64_t hash = siphash_##IP_TYPE(key); \
  \
        const uint32_t map_key = hash % SRC_HASH_MAX_PARALLELISM; \
        struct persrc_rate##IPV##_bucket *buckets = bpf_map_lookup_elem(map, &map_key); \
-       if (!buckets) return res; \
+       if (!buckets) return 0; \
  \
        hash >>= SRC_HASH_MAX_PARALLELISM_POW; \
        map_limit >>= SRC_HASH_MAX_PARALLELISM_POW; \
@@ -261,9 +277,8 @@ static inline struct persrc_rate##IPV##_ptr get_v##IPV##_persrc_ratelimit(IP_TYP
        uint64_t min_sent_time = UINT64_MAX; \
        for (int i = 0; i < SRC_HASH_BUCKET_COUNT; i++) { \
                if (first_bucket[i].srcip == key) { \
-                       res.rate = &first_bucket[i]; \
-                       res.lock = &buckets->lock; \
-                       return res; \
+                       min_sent_idx = i; \
+                       break; \
                } \
                int64_t time_offset = ((int64_t)cur_time_masked) - (first_bucket[i].sent_time & RATE_TIME_MASK); \
                if (time_offset < RATE_MIN_TIME_OFFSET || time_offset > RATE_MAX_TIME_OFFSET) { \
@@ -275,11 +290,15 @@ static inline struct persrc_rate##IPV##_ptr get_v##IPV##_persrc_ratelimit(IP_TYP
                        min_sent_idx = i; \
                } \
        } \
-       res.rate = &first_bucket[min_sent_idx]; \
-       res.rate->srcip = key; \
-       res.rate->sent_time = 0; \
-       res.lock = &buckets->lock; \
-       return res; \
+       struct persrc_rate##IPV##_entry *entry = &first_bucket[min_sent_idx]; \
+       if (entry->srcip != key) { \
+               entry->srcip = key; \
+               entry->sent_time = 0; \
+       } \
+       int matched = 0; \
+       DO_RATE_LIMIT(, entry, cur_time_masked, amt, limit_ns_per_pkt, matched); \
+       bpf_spin_unlock(&buckets->lock); \
+       return matched; \
 }
 
 CREATE_PERSRC_LOOKUP(6, uint128_t)