From: Matt Corallo Date: Tue, 11 May 2021 20:27:37 +0000 (+0000) Subject: Initial checkin X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=a89c8e77ebd6b257bb622df741f71b3262896a84;p=bpfnofrags Initial checkin --- a89c8e77ebd6b257bb622df741f71b3262896a84 diff --git a/egress_tc.c b/egress_tc.c new file mode 100644 index 0000000..db53626 --- /dev/null +++ b/egress_tc.c @@ -0,0 +1,13 @@ +#include "swapper.h" + +#include +#include +#include + +SEC("egress") +int tc_egress(struct __sk_buff *ctx) +{ + maybe_swap_egress_eth((void *)(size_t)ctx->data, (void *)(size_t)ctx->data_end); + + return TC_ACT_OK; +} diff --git a/flowspec_wrapper_xdp.c b/flowspec_wrapper_xdp.c new file mode 100644 index 0000000..5a44a30 --- /dev/null +++ b/flowspec_wrapper_xdp.c @@ -0,0 +1,16 @@ +#include "swapper.h" + +#include +#include +#include + +// Wrapper file that can call out to flowspec-xdp-generated XDP libs + +int xdp_drop_prog(struct xdp_md *ctx); + +SEC("xdp_ingress") +int xdp_ingress_prog(struct xdp_md *ctx) +{ + maybe_swap_ingress_eth((void *)(size_t)ctx->data, (void *)(size_t)ctx->data_end); + return xdp_drop_prog(ctx); +} diff --git a/ingress_xdp.c b/ingress_xdp.c new file mode 100644 index 0000000..797ea8b --- /dev/null +++ b/ingress_xdp.c @@ -0,0 +1,13 @@ +#include "swapper.h" + +#include +#include +#include + +SEC("xdp_ingress") +int xdp_ingress_prog(struct xdp_md *ctx) +{ + maybe_swap_ingress_eth((void *)(size_t)ctx->data, (void *)(size_t)ctx->data_end); + + return XDP_PASS; +} diff --git a/swapper.h b/swapper.h new file mode 100644 index 0000000..9795149 --- /dev/null +++ b/swapper.h @@ -0,0 +1,158 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__LITTLE_ENDIAN) +#define BE16(a) ((((uint16_t)(a & 0xff00)) >> 8) | (((uint16_t)(a & 0xff)) << 8)) +#define IP32(a, b, c, d) (((((uint32_t)a) & 0xff) << 0*8) | \ + ((((uint32_t)b) & 0xff) << 1*8) | \ + ((((uint32_t)c) & 0xff) << 2*8) | \ + ((((uint32_t)d) & 0xff) << 3*8)) +#elif defined(__BIG_ENDIAN) +#define BE16(a) ((uint16_t)a) +#define IP32(a, b, c, d) (((((uint32_t)a) & 0xff) << 3*8) | \ + ((((uint32_t)b) & 0xff) << 2*8) | \ + ((((uint32_t)c) & 0xff) << 1*8) | \ + ((((uint32_t)d) & 0xff) << 0*8)) +#else +#error "Need endian info" +#endif + +#include "ip_filter.h" + +/* IP flags. */ +#define IP_CE 0x8000 /* Flag: "Congestion" */ +#define IP_DF 0x4000 /* Flag: "Don't Fragment" */ +#define IP_MF 0x2000 /* Flag: "More Fragments" */ +#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ + +#define IP_PROTO_UDP 17 +#define IP_PROTO_FIRST_FRAG 253 +#define IP_PROTO_SECOND_FRAG 254 + +#define unlikely(a) __builtin_expect(a, 0) +#define likely(a) __builtin_expect(a, 1) + +struct packet_count { + uint64_t packets; +}; +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 2); + __u32 *key; + struct packet_count *value; +} frag_count_map SEC(".maps"); + +#define INC_COUNTER(counter_id) do { \ + const int reason = counter_id; \ + struct packet_count *value = bpf_map_lookup_elem(&frag_count_map, &reason); \ + if (value) { \ + value->packets += 1; \ + } \ +} while (0) + +static inline void _maybe_swap_egress(struct iphdr *ip) { + if (unlikely(ip->ihl != 5)) return; + + CHECK_SRC_DST(ip->daddr); + + if (ip->protocol == IP_PROTO_UDP) { + if (ip->frag_off == BE16(IP_MF)) { + int32_t chk = ~BE16(ip->check) & 0xffff; + chk = chk - IP_MF - IP_PROTO_UDP + IP_PROTO_FIRST_FRAG; + // We're only decreasing the checksum here + if (unlikely(chk < 0)) { chk += 65535; } + ip->check = ~BE16(chk); + + ip->frag_off = 0; + ip->protocol = IP_PROTO_FIRST_FRAG; + INC_COUNTER(0); + } else if (ip->frag_off == BE16(185)) { + int32_t chk = ~BE16(ip->check) & 0xffff; + chk = chk - 185 - IP_PROTO_UDP + IP_PROTO_SECOND_FRAG; + // We're only increasing the checksum here + if (unlikely(chk > 0xffff)) { chk -= 65535; } + ip->check = ~BE16(chk); + + ip->frag_off = 0; + ip->protocol = IP_PROTO_SECOND_FRAG; + INC_COUNTER(1); + } + } +} + +static inline void _maybe_swap_ingress(struct iphdr *ip) { + if (unlikely(ip->ihl != 5)) return; + + if (ip->protocol == IP_PROTO_SECOND_FRAG) { + int32_t chk = ~BE16(ip->check) & 0xffff; + chk = chk + 185 + IP_PROTO_UDP - IP_PROTO_SECOND_FRAG; + // We're only decreasing the checksum here + if (unlikely(chk < 0)) { chk += 65535; } + ip->check = ~BE16(chk); + + ip->frag_off = BE16(185); + ip->protocol = IP_PROTO_UDP; + INC_COUNTER(0); + } else if (ip->protocol == IP_PROTO_FIRST_FRAG) { + int32_t chk = ~BE16(ip->check) & 0xffff; + chk = chk + IP_MF + IP_PROTO_UDP - IP_PROTO_FIRST_FRAG; + // We're only increasing the checksum here + if (unlikely(chk > 0xffff)) { chk -= 65535; } + ip->check = ~BE16(chk); + + ip->frag_off = BE16(IP_MF); + ip->protocol = IP_PROTO_UDP; + INC_COUNTER(1); + } +} + + +// Our own ethhdr with optional vlan tags +struct _ethhdr_vlan { + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_source[ETH_ALEN]; /* source ether addr */ + __be16 vlan_magic; /* 0x8100 */ + __be16 tci; /* PCP (3 bits), DEI (1 bit), and VLAN (12 bits) */ + __be16 h_proto; /* packet type ID field */ +} __attribute__((packed)); + +#define _CHECK_LEN_RETURN(start, struc) \ + if (unlikely((void*)(start) + sizeof(struct struc) > data_end)) return; + +#define _CHECK_ETH_TO_HEADER(swap_fn) \ + void * pktdata; \ + unsigned short eth_proto; \ + \ + _CHECK_LEN_RETURN(data, ethhdr); \ + struct ethhdr *const eth = (void*)data; \ + pktdata = (void *)data + sizeof(struct ethhdr); \ + \ + if (eth->h_proto == BE16(ETH_P_8021Q)) { \ + _CHECK_LEN_RETURN(data, _ethhdr_vlan); \ + struct _ethhdr_vlan *const eth_vlan = (void*)data; \ + pktdata = (void *)data + sizeof(struct _ethhdr_vlan); \ + eth_proto = eth_vlan->h_proto; \ + } else { \ + eth_proto = eth->h_proto; \ + } \ +\ + if (eth_proto == BE16(ETH_P_IP)) { \ + _CHECK_LEN_RETURN(pktdata, iphdr); \ + struct iphdr *ip = (struct iphdr*) pktdata; \ + swap_fn(ip); \ + } else if (eth_proto == BE16(ETH_P_IPV6)) { \ + /* TODO: Support v6? */ \ + } + +static inline void maybe_swap_egress_eth(void *data, void* data_end) { + _CHECK_ETH_TO_HEADER(_maybe_swap_egress); +} +static inline void maybe_swap_ingress_eth(void *data, void* data_end) { + _CHECK_ETH_TO_HEADER(_maybe_swap_ingress); +}