}
// Rate limits are done in a static-sized leaky bucket with a decimal counter
+//
+// They are stored in a single uint64_t with the top RATE_BUCKET_BITS holding
+// the packet count/size and the remaining low bits holding the the time (as a
+// fixed-point decimal).
+//
// Bucket size is always exactly (1 << RATE_BUCKET_INTEGER_BITS)
#define RATE_BUCKET_DECIMAL_BITS 8
#define RATE_BUCKET_INTEGER_BITS 4
#ifdef RATE_CNT
struct ratelimit {
struct bpf_spin_lock lock;
- uint64_t sent_time;
+ uint64_t pkts_and_time;
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
#define DO_RATE_LIMIT(do_lock, rate, time_masked, amt_in_pkt, limit_ns_per_pkt, matchbool) do { \
if (rate) { \
do_lock; \
- int64_t bucket_pkts = (rate->sent_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS); \
- /* We mask the top 12 bits, so date overflows every 52 days, handled below */ \
- int64_t time_diff = time_masked - ((int64_t)(rate->sent_time & RATE_TIME_MASK)); \
- if (unlikely(time_diff < -1000000000 || time_diff > 16000000000)) { \
+ int64_t bucket_pkts = (rate->pkts_and_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS); \
+ /* We mask the top 12 bits, so date overflows every 52 days, resetting the counter */ \
+ int64_t time_diff = time_masked - ((int64_t)(rate->pkts_and_time & RATE_TIME_MASK)); \
+ if (unlikely(time_diff < RATE_MIN_TIME_OFFSET || time_diff > RATE_MAX_TIME_OFFSET)) { \
bucket_pkts = 0; \
} else { \
if (unlikely(time_diff < 0)) { time_diff = 0; } \
+ /* To avoid storing too many bits, we make a simplifying assumption that all packets */ \
+ /* hit by a rule are the same size. Thus, when a rule is denominated in bytes rather */ \
+ /* than packets, we can keep counting packets and simply adjust the ratelimit by the*/ \
+ /* size of the packet we're looking at. */ \
+ /* Thus, here, we simply reduce our packet counter by the */ \
+ /* time difference / our ns/packet limit times the size of the current packet. */ \
int64_t pkts_since_last = (time_diff << RATE_BUCKET_BITS) * ((uint64_t)amt_in_pkt) / ((uint64_t)limit_ns_per_pkt); \
bucket_pkts -= pkts_since_last; \
} \
+ /* Accept as long as we can add one to our bucket without overflow */ \
if (bucket_pkts < (((1 << RATE_BUCKET_INTEGER_BITS) - 1) << RATE_BUCKET_DECIMAL_BITS)) { \
if (unlikely(bucket_pkts < 0)) bucket_pkts = 0; \
- rate->sent_time = time_masked | ((bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS)) << (64 - RATE_BUCKET_BITS)); \
+ uint64_t new_packet_count = bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS); \
+ rate->pkts_and_time = time_masked | (new_packet_count << (64 - RATE_BUCKET_BITS)); \
matchbool = 0; \
} else { \
matchbool = 1; \
#define CREATE_PERSRC_LOOKUP(IPV, IP_TYPE) \
struct persrc_rate##IPV##_entry { \
- uint64_t sent_time; \
+ uint64_t pkts_and_time; \
IP_TYPE srcip; \
}; \
\
\
uint64_t bucket_idx = SRC_HASH_BUCKET_COUNT; \
uint64_t min_sent_idx = 0; \
- uint64_t min_sent_time = UINT64_MAX; \
+ uint64_t min_time = UINT64_MAX; \
for (uint64_t i = 0; i < SRC_HASH_BUCKET_COUNT; i++) { \
if (first_bucket[i].srcip == key) { \
bucket_idx = i; \
break; \
} \
- int64_t time_offset = ((int64_t)cur_time_masked) - (first_bucket[i].sent_time & RATE_TIME_MASK); \
+ int64_t time_offset = ((int64_t)cur_time_masked) - (first_bucket[i].pkts_and_time & RATE_TIME_MASK); \
if (time_offset < RATE_MIN_TIME_OFFSET || time_offset > RATE_MAX_TIME_OFFSET) { \
- min_sent_time = 0; \
+ min_time = 0; \
min_sent_idx = i; \
} \
- if ((first_bucket[i].sent_time & RATE_TIME_MASK) < min_sent_time) { \
- min_sent_time = first_bucket[i].sent_time & RATE_TIME_MASK; \
+ if ((first_bucket[i].pkts_and_time & RATE_TIME_MASK) < min_time) { \
+ min_time = first_bucket[i].pkts_and_time & RATE_TIME_MASK; \
min_sent_idx = i; \
} \
} \
struct persrc_rate##IPV##_entry *entry = &first_bucket[bucket_idx]; \
if (entry->srcip != key) { \
entry->srcip = key; \
- entry->sent_time = 0; \
+ entry->pkts_and_time = 0; \
} \
int matched = 0; \
DO_RATE_LIMIT(, entry, cur_time_masked, amt, limit_ns_per_pkt, matched); \