IP_PROTO_TCP = 6
IP_PROTO_UDP = 17
+ORD_LESS = 0
+ORD_GREATER = 1
+ORD_EQUAL = 2
+
class ASTAction(Enum):
OR = 1
AND = 2
return ASTNode(ASTAction.EXPR, BitExpr(expr))
+class IpRule:
+ def __init__(self, ty, offset, net, proto):
+ self.ty = ty
+ self.offset = offset
+ if offset is None:
+ self.offset = 0
+ self.net = net
+ self.proto = proto
+
+ def ord(self, other):
+ assert self.ty == other.ty
+ assert self.proto == other.proto
+ if self.offset < other.offset:
+ return ORD_LESS
+ if self.offset > other.offset:
+ return ORD_GREATER
+
+ if self.net.overlaps(other.net):
+ if self.net.prefixlen > other.net.prefixlen:
+ return ORD_LESS
+ elif self.net.prefixlen < other.net.prefixlen:
+ return ORD_GREATER
+ else:
+ if self.net < other.net:
+ return ORD_LESS
+ else:
+ assert self.net > other.net
+ return ORD_GREATER
+
+ return ORD_EQUAL
+
+ def __lt__(self, other):
+ return self.ord(other) == ORD_LESS
+
+ def __eq__(self, other):
+ return type(other) == IpRule and self.ty == other.ty and self.offset == other.offset and self.net == other.net and self.proto == other.proto
+
+ def __str__(self):
+ if self.proto == 4:
+ assert self.offset == 0
+ return f"""if ((ip->{self.ty} & MASK4({self.net.prefixlen})) != BIGEND32({int(self.net.network_address)}ULL))
+ break;"""
+ else:
+ u32s = [(int(self.net.network_address) >> (3*32)) & 0xffffffff,
+ (int(self.net.network_address) >> (2*32)) & 0xffffffff,
+ (int(self.net.network_address) >> (1*32)) & 0xffffffff,
+ (int(self.net.network_address) >> (0*32)) & 0xffffffff]
+ if self.offset == 0:
+ mask = f"MASK6({self.net.prefixlen})"
+ else:
+ mask = f"MASK6_OFFS({self.offset}, {self.net.prefixlen})"
+ return f"""if ((ip6->{self.ty} & {mask}) != (BIGEND128({u32s[0]}ULL, {u32s[1]}ULL, {u32s[2]}ULL, {u32s[3]}ULL) & {mask}))
+ break;"""
def ip_to_rule(proto, inip, ty, offset):
if proto == 4:
assert offset is None
net = ipaddress.IPv4Network(inip.strip())
- if net.prefixlen == 0:
- return ""
- return f"""if ((ip->{ty} & MASK4({net.prefixlen})) != BIGEND32({int(net.network_address)}ULL))
- break;"""
+ return IpRule(ty, offset, net, 4)
else:
net = ipaddress.IPv6Network(inip.strip())
- if net.prefixlen == 0:
- return ""
- u32s = [(int(net.network_address) >> (3*32)) & 0xffffffff,
- (int(net.network_address) >> (2*32)) & 0xffffffff,
- (int(net.network_address) >> (1*32)) & 0xffffffff,
- (int(net.network_address) >> (0*32)) & 0xffffffff]
- if offset is None:
- mask = f"MASK6({net.prefixlen})"
- else:
- mask = f"MASK6_OFFS({offset}, {net.prefixlen})"
- return f"""if ((ip6->{ty} & {mask}) != (BIGEND128({u32s[0]}ULL, {u32s[1]}ULL, {u32s[2]}ULL, {u32s[3]}ULL) & {mask}))
- break;"""
+ return IpRule(ty, offset, net, 6)
def fragment_to_rule(ipproto, rules):
ast = parse_ast(rules, parse_frag_expr, False)
return f"""if (ip6 == NULL) break;
if (!( {ast.write("((((uint32_t)(ip6->flow_lbl[0] & 0xf)) << 2*8) | (((uint32_t)ip6->flow_lbl[1]) << 1*8) | (uint32_t)ip6->flow_lbl[0])")} )) break;"""
+class RuleAction(Enum):
+ CONDITIONS = 1
+ ACTION = 2
+ LIST = 3
+class RuleNode:
+ def __init__(self, ty, action, inner):
+ self.ty = ty
+ self.action = action
+ self.inner = inner
+ if ty == RuleAction.ACTION:
+ assert inner is None
+ assert type(action) == str
+ elif ty == RuleAction.LIST:
+ assert type(inner) == list
+ assert action is None
+ for item in inner:
+ assert type(item) == RuleNode
+ else:
+ assert ty == RuleAction.CONDITIONS
+ assert type(action) == list
+ assert type(inner) == RuleNode
+
+ def __lt__(self, other):
+ assert self.ty == RuleAction.CONDITIONS
+ assert other.ty == RuleAction.CONDITIONS
+
+ o = ORD_EQUAL
+
+ # RFC first has us sort by dest, then source, then other conditions. We don't implement the
+ # other conditions because doing so requires re-implementing the Flowspec wire format,
+ # which isn't trivial. However, we do implement the source/dest sorting in the hopes it
+ # allows us to group rules according to source/dest IP and hopefully LLVM optimizes out
+ # later rules.
+
+ selfdest = next(filter(lambda a : type(a) == IpRule and a.ty == "daddr", self.action), None)
+ otherdest = next(filter(lambda a : type(a) == IpRule and a.ty == "daddr", self.action), None)
+ if o == ORD_EQUAL and selfdest is not None and otherdest is not None:
+ o = selfdest.ord(otherdest)
+
+ if o == ORD_EQUAL:
+ selfsrc = next(filter(lambda a : type(a) == IpRule and a.ty == "saddr", self.action), None)
+ othersrc = next(filter(lambda a : type(a) == IpRule and a.ty == "saddr", self.action), None)
+ if selfsrc is not None and othersrc is None:
+ return True
+ elif selfsrc is None and othersrc is not None:
+ return False
+ elif selfsrc is not None and othersrc is not None:
+ o = selfsrc.ord(othersrc)
+
+ if o == ORD_LESS:
+ return True
+ return self.action < other.action
+
+ def maybe_join(self, neighbor):
+ if self.ty == RuleAction.CONDITIONS and neighbor.ty == RuleAction.CONDITIONS:
+ overlapping_conditions = [x for x in self.action if x in neighbor.action]
+ if len(overlapping_conditions) != 0:
+ us = RuleNode(RuleAction.CONDITIONS, [x for x in self.action if x not in overlapping_conditions], self.inner)
+ them = RuleNode(RuleAction.CONDITIONS, [x for x in neighbor.action if x not in overlapping_conditions], neighbor.inner)
+ self.action = overlapping_conditions
+ if self.inner.ty == RuleAction.LIST and us.action == []:
+ self.inner.inner.append(them)
+ else:
+ self.inner = RuleNode(RuleAction.LIST, None, [us, them])
+ self.inner.flatten()
+ return True
+ return False
+
+ def flatten(self):
+ # LLVM can be pretty bad at optimizing out common subexpressions. Thus, we have to do a
+ # pass here to toptimize out common subexpressions in back-to-back rules.
+ # See https://bugs.llvm.org/show_bug.cgi?id=52455
+ assert self.ty == RuleAction.LIST
+ did_update = True
+ while did_update:
+ did_update = False
+ for i in range(0, len(self.inner) - 1):
+ if self.inner[i].maybe_join(self.inner[i + 1]):
+ del self.inner[i + 1]
+ did_update = True
+ break
+
+ def write(self, out, pfx="\t"):
+ if self.ty == RuleAction.CONDITIONS:
+ out.write(pfx + "do {\\\n")
+ for cond in self.action:
+ out.write("\t" + pfx + str(cond).replace("\n", " \\\n\t" + pfx) + " \\\n")
+ self.inner.write(out, pfx)
+ out.write(pfx + "} while(0);\\\n")
+ elif self.ty == RuleAction.LIST:
+ for item in self.inner:
+ item.write(out, pfx + "\t")
+ else:
+ out.write("\t" + pfx + self.action.strip().replace("\n", " \\\n\t" + pfx) + " \\\n")
with open("rules.h", "w") as out:
parse = argparse.ArgumentParser()
assert False
out.write("#define REQ_8021Q " + args.vlan_tag + "\n")
- rules6 = ""
- rules4 = ""
+ rules6 = []
+ rules4 = []
use_v6_frags = False
- rulecnt = 0
+ stats_rulecnt = 0
ratelimitcnt = 0
v4persrcratelimits = []
v5persrcratelimits = []
t = lastrule.split("{")
if t[0].strip() == "flow4":
proto = 4
- rules4 += "\tdo {\\\n"
elif t[0].strip() == "flow6":
proto = 6
- rules6 += "\tdo {\\\n"
else:
continue
+ conditions = []
def write_rule(r):
- global rules4, rules6
- if proto == 6:
- rules6 += "\t\t" + r.replace("\n", " \\\n\t\t") + " \\\n"
- else:
- rules4 += "\t\t" + r.replace("\n", " \\\n\t\t") + " \\\n"
+ global conditions
+ conditions.append(r)
rule = t[1].split("}")[0].strip()
for step in rule.split(";"):
else:
assert False
+ actions = ""
+ def write_rule(r):
+ global actions
+ actions += r + "\n"
+
# Now write the match handling!
first_action = None
- stats_action = None
+ stats_action = ""
last_action = None
for community in line.split("("):
if not community.startswith("generic, "):
value = 1.0 + mantissa / (2**23)
value *= 2**(exp-127)
- first_action = "int64_t time = bpf_ktime_get_ns() & RATE_TIME_MASK;\n"
+ first_action = "int64_t time_masked = bpf_ktime_get_ns() & RATE_TIME_MASK;\n"
first_action += f"int64_t per_pkt_ns = (1000000000LL << RATE_BUCKET_INTEGER_BITS) / {math.floor(value)};\n"
+ if ty == "0x8006" or ty == "0x8306":
+ first_action += "uint64_t amt = data_end - pktdata;\n"
+ else:
+ first_action += "uint64_t amt = 1;\n"
if ty == "0x8006" or ty == "0x800c":
- spin_lock = "bpf_spin_lock(&rate->lock);"
- spin_unlock = "bpf_spin_unlock(&rate->lock);"
first_action += f"const uint32_t ratelimitidx = {ratelimitcnt};\n"
first_action += "struct ratelimit *rate = bpf_map_lookup_elem(&rate_map, &ratelimitidx);\n"
ratelimitcnt += 1
+ first_action += "int matched = 0;\n"
+ first_action += "DO_RATE_LIMIT(bpf_spin_lock(&rate->lock), rate, time_masked, amt, per_pkt_ns, matched);\n"
+ first_action += "if (rate) { bpf_spin_unlock(&rate->lock); }\n"
else:
- spin_lock = "/* No locking as we're locked in get_v*_persrc_ratelimit */"
- spin_unlock = "bpf_spin_unlock(rate_ptr.lock);"
if proto == 4:
if mid_byte > 32:
continue
first_action += f"const uint32_t srcip = ip->saddr & MASK4({mid_byte});\n"
first_action += f"void *rate_map = &v4_src_rate_{len(v4persrcratelimits)};\n"
- first_action += f"struct persrc_rate4_ptr rate_ptr = get_v4_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096});\n"
- first_action += f"struct persrc_rate4_entry *rate = rate_ptr.rate;\n"
+ first_action += f"int matched = check_v4_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked, amt, per_pkt_ns);\n"
v4persrcratelimits.append((high_byte + 1) * 4096)
elif mid_byte <= 64:
first_action += f"const uint64_t srcip = BE128BEHIGH64(ip6->saddr & MASK6({mid_byte}));\n"
first_action += f"void *rate_map = &v5_src_rate_{len(v5persrcratelimits)};\n"
- first_action += f"struct persrc_rate5_ptr rate_ptr = get_v5_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096});\n"
- first_action += f"struct persrc_rate5_entry *rate = rate_ptr.rate;\n"
+ first_action += f"int matched = check_v5_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked, amt, per_pkt_ns);\n"
v5persrcratelimits.append((high_byte + 1) * 4096)
else:
if mid_byte > 128:
continue
first_action += f"const uint128_t srcip = ip6->saddr & MASK6({mid_byte});\n"
first_action += f"void *rate_map = &v6_src_rate_{len(v6persrcratelimits)};\n"
- first_action += f"struct persrc_rate6_ptr rate_ptr = get_v6_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096});\n"
- first_action += f"struct persrc_rate6_entry *rate = rate_ptr.rate;\n"
+ first_action += f"int matched = check_v6_persrc_ratelimit(srcip, rate_map, {(high_byte + 1) * 4096}, time_masked, amt, per_pkt_ns);\n"
v6persrcratelimits.append((high_byte + 1) * 4096)
- if ty == "0x8006" or ty == "0x8306":
- first_action += "uint64_t amt = data_end - pktdata;\n"
- else:
- first_action += "uint64_t amt = 1;\n"
- first_action += "if (rate) {\n"
- first_action += f"\t{spin_lock}\n"
- first_action += "\tint64_t bucket_pkts = (rate->sent_time & (~RATE_TIME_MASK)) >> (64 - RATE_BUCKET_BITS);\n"
- # We mask the top 12 bits, so date overflows every 52 days, handled below
- first_action += "\tint64_t time_diff = time - ((int64_t)(rate->sent_time & RATE_TIME_MASK));\n"
- first_action += "\tif (unlikely(time_diff < -1000000000 || time_diff > 16000000000)) {\n"
- first_action += "\t\tbucket_pkts = 0;\n"
- first_action += "\t} else {\n"
- first_action += "\t\tif (unlikely(time_diff < 0)) { time_diff = 0; }\n"
- first_action += f"\t\tint64_t pkts_since_last = (time_diff << RATE_BUCKET_BITS) * amt / per_pkt_ns;\n"
- first_action += "\t\tbucket_pkts -= pkts_since_last;\n"
- first_action += "\t}\n"
- first_action += "\tif (bucket_pkts >= (((1 << RATE_BUCKET_INTEGER_BITS) - 1) << RATE_BUCKET_DECIMAL_BITS)) {\n"
- first_action += f"\t\t{spin_unlock}\n"
- first_action += "\t\t{stats_replace}\n"
- first_action += "\t\treturn XDP_DROP;\n"
- first_action += "\t} else {\n"
- first_action += "\t\tif (unlikely(bucket_pkts < 0)) bucket_pkts = 0;\n"
- first_action += f"\t\trate->sent_time = time | ((bucket_pkts + (1 << RATE_BUCKET_DECIMAL_BITS)) << (64 - RATE_BUCKET_BITS));\n"
- first_action += f"\t\t{spin_unlock}\n"
- first_action += "\t}\n"
+ first_action += "if (matched) {\n"
+ first_action += "\t{stats_replace}\n"
+ first_action += "\treturn XDP_DROP;\n"
first_action += "}\n"
elif ty == "0x8007":
if low_bytes & 1 == 0:
last_action = "return XDP_PASS;"
if low_bytes & 2 == 2:
- stats_action = f"const uint32_t ruleidx = STATIC_RULE_CNT + {rulecnt};\n"
+ stats_action = f"const uint32_t ruleidx = STATIC_RULE_CNT + {stats_rulecnt};\n"
stats_action += "INCREMENT_MATCH(ruleidx);"
elif ty == "0x8008":
assert False # We do not implement the redirect action
write_rule("ip6->flow_lbl[0] = (ip6->flow_lbl[0] & 0x3f) | " + str((low_bytes & 3) << 6) + ";")
if first_action is not None:
write_rule(first_action.replace("{stats_replace}", stats_action))
- if stats_action is not None and (first_action is None or "{stats_replace}" not in first_action):
+ if stats_action != "" and (first_action is None or "{stats_replace}" not in first_action):
write_rule(stats_action)
if last_action is not None:
write_rule(last_action)
if proto == 6:
- rules6 += "\t} while(0);\\\n"
+ rules6.append(RuleNode(RuleAction.CONDITIONS, conditions, RuleNode(RuleAction.ACTION, actions, None)))
else:
- rules4 += "\t} while(0);\\\n"
- rulecnt += 1
+ rules4.append(RuleNode(RuleAction.CONDITIONS, conditions, RuleNode(RuleAction.ACTION, actions, None)))
+ if stats_action != "":
+ print(rule)
+ stats_rulecnt += 1
lastrule = None
out.write("\n")
- out.write(f"#define RULECNT {rulecnt}\n")
+ out.write(f"#define STATS_RULECNT {stats_rulecnt}\n")
if ratelimitcnt != 0:
out.write(f"#define RATE_CNT {ratelimitcnt}\n")
- if rules4 != "":
+
+ if len(rules4) != 0:
out.write("#define NEED_V4_PARSE\n")
- out.write("#define RULES4 {\\\n" + rules4 + "}\n")
- if rules6:
+ out.write("#define RULES4 {\\\n")
+ # First sort the rules according to the RFC, then make it a single
+ # LIST rule and call flatten() to unify redundant conditions
+ rules4.sort()
+ rules4 = RuleNode(RuleAction.LIST, None, rules4)
+ rules4.flatten()
+ rules4.write(out)
+ out.write("}\n")
+
+ if len(rules6) != 0:
out.write("#define NEED_V6_PARSE\n")
- out.write("#define RULES6 {\\\n" + rules6 + "}\n")
+ out.write("#define RULES6 {\\\n")
+ # First sort the rules according to the RFC, then make it a single
+ # LIST rule and call flatten() to unify redundant conditions
+ rules6.sort()
+ rules6 = RuleNode(RuleAction.LIST, None, rules6)
+ rules6.flatten()
+ rules6.write(out)
+ out.write("}\n")
+
if args.v6frag == "ignore-parse-if-rule":
if use_v6_frags:
out.write("#define PARSE_V6_FRAG PARSE\n")