import ipaddress
from enum import Enum
import argparse
+import math
IP_PROTO_ICMP = 1
if proto == 4:
return "if (!( " + ast.write("((ip->tos & 0xfc) >> 2)") + " )) break;"
else:
- return "if (!( " + ast.write("((ip6->priority << 4) | ((ip6->flow_lbl[0] & 0xc0) >> 4) >> 2)") + " )) break;"
+ return "if (!( " + ast.write("((ip6->priority << 2) | ((ip6->flow_lbl[0] & 0xc0) >> 6))") + " )) break;"
def port_to_rule(ty, rules):
if ty == "port" :
ast = parse_ast(rules, parse_numbers_expr)
- return "if (tcp == NULL && udp == NULL) break;\nif (!( " + ast.write("sport", "dport") + " )) break;"
+ return "if (!ports_valid) break;\nif (!( " + ast.write("sport", "dport") + " )) break;"
ast = parse_ast(rules, parse_numbers_expr)
- return "if (tcp == NULL && udp == NULL) break;\nif (!( " + ast.write(ty) + " )) break;"
+ return "if (!ports_valid) break;\nif (!( " + ast.write(ty) + " )) break;"
def tcp_flags_to_rule(rules):
ast = parse_ast(rules, parse_bit_expr)
return f"""if (ip6 == NULL) break;
if (!( {ast.write("((((uint32_t)(ip6->flow_lbl[0] & 0xf)) << 2*8) | (((uint32_t)ip6->flow_lbl[1]) << 1*8) | (uint32_t)ip6->flow_lbl[0])")} )) break;"""
+
with open("rules.h", "w") as out:
parse = argparse.ArgumentParser()
parse.add_argument("--ihl", dest="ihl", required=True, choices=["drop-options","accept-options","parse-options"])
rules4 = ""
use_v6_frags = False
rulecnt = 0
+ ratelimitcnt = 0
+ lastrule = None
for line in sys.stdin.readlines():
- t = line.split("{")
- if len(t) != 2:
- continue
- if t[0].strip() == "flow4":
- proto = 4
- rules4 += "\tdo {\\\n"
- elif t[0].strip() == "flow6":
- proto = 6
- rules6 += "\tdo {\\\n"
- else:
+ if "{" in line:
+ if lastrule is not None:
+ print("Skipped rule due to lack of understood community tag: " + lastrule)
+ lastrule = line
continue
-
- def write_rule(r):
- global rules4, rules6
- if proto == 6:
- rules6 += "\t\t" + r.replace("\n", " \\\n\t\t") + " \\\n"
+ if "BGP.ext_community: " in line:
+ assert lastrule is not None
+
+ t = lastrule.split("{")
+ if t[0].strip() == "flow4":
+ proto = 4
+ rules4 += "\tdo {\\\n"
+ elif t[0].strip() == "flow6":
+ proto = 6
+ rules6 += "\tdo {\\\n"
else:
- rules4 += "\t\t" + r.replace("\n", " \\\n\t\t") + " \\\n"
-
- rule = t[1].split("}")[0].strip()
- for step in rule.split(";"):
- if step.strip().startswith("src") or step.strip().startswith("dst"):
- nets = step.strip()[3:].strip().split(" ")
- if len(nets) > 1:
- assert nets[1] == "offset"
- offset = nets[2]
+ continue
+
+ def write_rule(r):
+ global rules4, rules6
+ if proto == 6:
+ rules6 += "\t\t" + r.replace("\n", " \\\n\t\t") + " \\\n"
else:
- offset = None
- if step.strip().startswith("src"):
- write_rule(ip_to_rule(proto, nets[0], "saddr", offset))
+ rules4 += "\t\t" + r.replace("\n", " \\\n\t\t") + " \\\n"
+
+ rule = t[1].split("}")[0].strip()
+ for step in rule.split(";"):
+ if step.strip().startswith("src") or step.strip().startswith("dst"):
+ nets = step.strip()[3:].strip().split(" ")
+ if len(nets) > 1:
+ assert nets[1] == "offset"
+ offset = nets[2]
+ else:
+ offset = None
+ if step.strip().startswith("src"):
+ write_rule(ip_to_rule(proto, nets[0], "saddr", offset))
+ else:
+ write_rule(ip_to_rule(proto, nets[0], "daddr", offset))
+ elif step.strip().startswith("proto") and proto == 4:
+ write_rule(proto_to_rule(4, step.strip()[6:]))
+ elif step.strip().startswith("next header") and proto == 6:
+ write_rule(proto_to_rule(6, step.strip()[12:]))
+ elif step.strip().startswith("icmp type"):
+ write_rule(icmp_type_to_rule(proto, step.strip()[10:]))
+ elif step.strip().startswith("icmp code"):
+ write_rule(icmp_code_to_rule(proto, step.strip()[10:]))
+ elif step.strip().startswith("sport") or step.strip().startswith("dport") or step.strip().startswith("port"):
+ write_rule(port_to_rule(step.strip().split(" ")[0], step.strip().split(" ", 1)[1]))
+ elif step.strip().startswith("length"):
+ write_rule(len_to_rule(step.strip()[7:]))
+ elif step.strip().startswith("dscp"):
+ write_rule(dscp_to_rule(proto, step.strip()[5:]))
+ elif step.strip().startswith("tcp flags"):
+ write_rule(tcp_flags_to_rule(step.strip()[10:]))
+ elif step.strip().startswith("label"):
+ write_rule(flow_label_to_rule(step.strip()[6:]))
+ elif step.strip().startswith("fragment"):
+ if proto == 6:
+ use_v6_frags = True
+ write_rule(fragment_to_rule(proto, step.strip()[9:]))
+ elif step.strip() == "":
+ pass
else:
- write_rule(ip_to_rule(proto, nets[0], "daddr", offset))
- elif step.strip().startswith("proto") and proto == 4:
- write_rule(proto_to_rule(4, step.strip()[6:]))
- elif step.strip().startswith("next header") and proto == 6:
- write_rule(proto_to_rule(6, step.strip()[12:]))
- elif step.strip().startswith("icmp type"):
- write_rule(icmp_type_to_rule(proto, step.strip()[10:]))
- elif step.strip().startswith("icmp code"):
- write_rule(icmp_code_to_rule(proto, step.strip()[10:]))
- elif step.strip().startswith("sport") or step.strip().startswith("dport") or step.strip().startswith("port"):
- write_rule(port_to_rule(step.strip().split(" ")[0], step.strip().split(" ", 1)[1]))
- elif step.strip().startswith("length"):
- write_rule(len_to_rule(step.strip()[7:]))
- elif step.strip().startswith("dscp"):
- write_rule(dscp_to_rule(proto, step.strip()[5:]))
- elif step.strip().startswith("tcp flags"):
- write_rule(tcp_flags_to_rule(step.strip()[10:]))
- elif step.strip().startswith("label"):
- write_rule(flow_label_to_rule(step.strip()[6:]))
- elif step.strip().startswith("fragment"):
- if proto == 6:
- use_v6_frags = True
- write_rule(fragment_to_rule(proto, step.strip()[9:]))
- elif step.strip() == "":
- pass
+ assert False
+
+ # Now write the match handling!
+ first_action = None
+ last_action = None
+ for community in line.split("("):
+ if not community.startswith("generic, "):
+ continue
+ blocks = community.split(",")
+ assert len(blocks) == 3
+ if len(blocks[1].strip()) != 10: # Should be 0x12345678
+ continue
+ ty = blocks[1].strip()[:6]
+ low_bytes = int(blocks[2].strip(") \n"), 16)
+ if ty == "0x8006" or ty == "0x800c":
+ if first_action is not None:
+ # Two ratelimit actions, just drop the old one. RFC 8955 says we can.
+ first_action = None
+ exp = (low_bytes & (0xff << 23)) >> 23
+ if low_bytes == 0:
+ first_action = "return XDP_DROP;"
+ elif low_bytes & (1 << 31) != 0:
+ # Negative limit, just drop
+ first_action = "return XDP_DROP;"
+ elif exp == 0xff:
+ # NaN/INF. Just treat as INF and accept
+ first_action = None
+ elif exp <= 127: # < 1
+ first_action = "return XDP_DROP;"
+ elif exp >= 127 + 63: # The count won't even fit in 64-bits, just accept
+ first_action = None
+ else:
+ mantissa = low_bytes & ((1 << 23) - 1)
+ value = 1.0 + mantissa / (2**23)
+ value *= 2**(exp-127)
+ if ty == "0x8006":
+ accessor = "rate->rate.sent_bytes"
+ else:
+ accessor = "rate->rate.sent_packets"
+ # Note that int64_t will overflow after 292 years of uptime
+ first_action = "int64_t time = bpf_ktime_get_ns();\n"
+ first_action += f"const uint32_t ratelimitidx = {ratelimitcnt};\n"
+ first_action += "uint64_t allowed_since_last = 0;\n"
+ first_action += "struct ratelimit *rate = bpf_map_lookup_elem(&rate_map, &ratelimitidx);\n"
+ first_action += "if (rate) {\n"
+ first_action += "\tbpf_spin_lock(&rate->lock);\n"
+ first_action += f"\tif (likely({accessor} > 0))" + " {\n"
+ first_action += "\t\tint64_t diff = time - rate->sent_time;\n"
+ # Unlikely or not, if the flow is slow, take a perf hit (though with the else if branch it doesn't matter)
+ first_action += "\t\tif (unlikely(diff > 1000000000))\n"
+ first_action += f"\t\t\t{accessor} = 0;\n"
+ first_action += "\t\telse if (likely(diff > 0))\n"
+ first_action += f"\t\t\tallowed_since_last = ((uint64_t)diff) * {math.floor(value)} / 1000000000;\n"
+ first_action += "\t}\n"
+ first_action += f"\tif ({accessor} - ((int64_t)allowed_since_last) <= 0)" + " {\n"
+ if ty == "0x8006":
+ first_action += f"\t\t{accessor} = data_end - pktdata;\n"
+ else:
+ first_action += f"\t\t{accessor} = 1;\n"
+ first_action += "\t\trate->sent_time = time;\n"
+ first_action += "\t\tbpf_spin_unlock(&rate->lock);\n"
+ first_action += "\t} else {\n"
+ first_action += "\t\tbpf_spin_unlock(&rate->lock);\n"
+ first_action += "\t\treturn XDP_DROP;\n"
+ first_action += "\t}\n"
+ first_action += "}\n"
+ ratelimitcnt += 1
+ elif ty == "0x8007":
+ if low_bytes & 1 == 0:
+ last_action = "return XDP_PASS;"
+ if low_bytes & 2 == 2:
+ write_rule(f"const uint32_t ruleidx = STATIC_RULE_CNT + {rulecnt};")
+ write_rule("INCREMENT_MATCH(ruleidx);")
+ elif ty == "0x8008":
+ assert False # We do not implement the redirect action
+ elif ty == "0x8009":
+ if low_bytes & ~0b111111 != 0:
+ assert False # Invalid DSCP value
+ if proto == 4:
+ write_rule("int32_t chk = ~BE16(ip->check) & 0xffff;")
+ write_rule("uint8_t orig_tos = ip->tos;")
+ write_rule("ip->tos = (ip->tos & 3) | " + str(low_bytes << 2) + ";")
+ write_rule("chk = (chk - orig_tos + ip->tos);")
+ write_rule("if (unlikely(chk > 0xffff)) { chk -= 65535; }")
+ write_rule("else if (unlikely(chk < 0)) { chk += 65535; }")
+ write_rule("ip->check = ~BE16(chk);")
+ else:
+ write_rule("ip6->priority = " + str(low_bytes >> 2) + ";")
+ write_rule("ip6->flow_lbl[0] = (ip6->flow_lbl[0] & 0x3f) | " + str((low_bytes & 3) << 6) + ";")
+ if first_action is not None:
+ write_rule(first_action)
+ if last_action is not None:
+ write_rule(last_action)
+ if proto == 6:
+ rules6 += "\t} while(0);\\\n"
else:
- assert False
- write_rule(f"const uint32_t ruleidx = STATIC_RULE_CNT + {rulecnt};")
- write_rule("DO_RETURN(ruleidx, XDP_DROP);")
- if proto == 6:
- rules6 += "\t} while(0);\\\n"
- else:
- rules4 += "\t} while(0);\\\n"
- rulecnt += 1
+ rules4 += "\t} while(0);\\\n"
+ rulecnt += 1
+ lastrule = None
out.write("\n")
out.write(f"#define RULECNT {rulecnt}\n")
+ if ratelimitcnt != 0:
+ out.write(f"#define RATE_CNT {ratelimitcnt}\n")
if rules4 != "":
out.write("#define NEED_V4_PARSE\n")
out.write("#define RULES4 {\\\n" + rules4 + "}\n")