Initial checkin
authorMatt Corallo <git@bluematt.me>
Tue, 11 May 2021 20:27:37 +0000 (20:27 +0000)
committerMatt Corallo <git@bluematt.me>
Tue, 11 May 2021 23:52:49 +0000 (23:52 +0000)
egress_tc.c [new file with mode: 0644]
flowspec_wrapper_xdp.c [new file with mode: 0644]
ingress_xdp.c [new file with mode: 0644]
swapper.h [new file with mode: 0644]

diff --git a/egress_tc.c b/egress_tc.c
new file mode 100644 (file)
index 0000000..db53626
--- /dev/null
@@ -0,0 +1,13 @@
+#include "swapper.h"
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("egress")
+int tc_egress(struct __sk_buff *ctx)
+{
+       maybe_swap_egress_eth((void *)(size_t)ctx->data, (void *)(size_t)ctx->data_end);
+
+       return TC_ACT_OK;
+}
diff --git a/flowspec_wrapper_xdp.c b/flowspec_wrapper_xdp.c
new file mode 100644 (file)
index 0000000..5a44a30
--- /dev/null
@@ -0,0 +1,16 @@
+#include "swapper.h"
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+
+// Wrapper file that can call out to flowspec-xdp-generated XDP libs
+
+int xdp_drop_prog(struct xdp_md *ctx);
+
+SEC("xdp_ingress")
+int xdp_ingress_prog(struct xdp_md *ctx)
+{
+       maybe_swap_ingress_eth((void *)(size_t)ctx->data, (void *)(size_t)ctx->data_end);
+       return xdp_drop_prog(ctx);
+}
diff --git a/ingress_xdp.c b/ingress_xdp.c
new file mode 100644 (file)
index 0000000..797ea8b
--- /dev/null
@@ -0,0 +1,13 @@
+#include "swapper.h"
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_ingress")
+int xdp_ingress_prog(struct xdp_md *ctx)
+{
+       maybe_swap_ingress_eth((void *)(size_t)ctx->data, (void *)(size_t)ctx->data_end);
+
+       return XDP_PASS;
+}
diff --git a/swapper.h b/swapper.h
new file mode 100644 (file)
index 0000000..9795149
--- /dev/null
+++ b/swapper.h
@@ -0,0 +1,158 @@
+#include <stdint.h>
+#include <unistd.h>
+#include <endian.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#if defined(__LITTLE_ENDIAN)
+#define BE16(a) ((((uint16_t)(a & 0xff00)) >> 8) | (((uint16_t)(a & 0xff)) << 8))
+#define IP32(a, b, c, d) (((((uint32_t)a) & 0xff) << 0*8) | \
+                          ((((uint32_t)b) & 0xff) << 1*8) | \
+                          ((((uint32_t)c) & 0xff) << 2*8) | \
+                          ((((uint32_t)d) & 0xff) << 3*8))
+#elif defined(__BIG_ENDIAN)
+#define BE16(a) ((uint16_t)a)
+#define IP32(a, b, c, d) (((((uint32_t)a) & 0xff) << 3*8) | \
+                          ((((uint32_t)b) & 0xff) << 2*8) | \
+                          ((((uint32_t)c) & 0xff) << 1*8) | \
+                          ((((uint32_t)d) & 0xff) << 0*8))
+#else
+#error "Need endian info"
+#endif
+
+#include "ip_filter.h"
+
+/* IP flags. */
+#define IP_CE          0x8000          /* Flag: "Congestion"           */
+#define IP_DF          0x4000          /* Flag: "Don't Fragment"       */
+#define IP_MF          0x2000          /* Flag: "More Fragments"       */
+#define IP_OFFSET      0x1FFF          /* "Fragment Offset" part       */
+
+#define IP_PROTO_UDP 17
+#define IP_PROTO_FIRST_FRAG 253
+#define IP_PROTO_SECOND_FRAG 254
+
+#define unlikely(a) __builtin_expect(a, 0)
+#define likely(a) __builtin_expect(a, 1)
+
+struct packet_count {
+       uint64_t packets;
+};
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __uint(max_entries, 2);
+       __u32 *key;
+       struct packet_count *value;
+} frag_count_map SEC(".maps");
+
+#define INC_COUNTER(counter_id) do { \
+       const int reason = counter_id; \
+       struct packet_count *value = bpf_map_lookup_elem(&frag_count_map, &reason); \
+       if (value) { \
+               value->packets += 1; \
+       } \
+} while (0)
+
+static inline void _maybe_swap_egress(struct iphdr *ip) {
+       if (unlikely(ip->ihl != 5)) return;
+
+       CHECK_SRC_DST(ip->daddr);
+
+       if (ip->protocol == IP_PROTO_UDP) {
+               if (ip->frag_off == BE16(IP_MF)) {
+                       int32_t chk = ~BE16(ip->check) & 0xffff;
+                       chk = chk - IP_MF - IP_PROTO_UDP + IP_PROTO_FIRST_FRAG;
+                       // We're only decreasing the checksum here
+                       if (unlikely(chk < 0)) { chk += 65535; }
+                       ip->check = ~BE16(chk);
+
+                       ip->frag_off = 0;
+                       ip->protocol = IP_PROTO_FIRST_FRAG;
+                       INC_COUNTER(0);
+               } else if (ip->frag_off == BE16(185)) {
+                       int32_t chk = ~BE16(ip->check) & 0xffff;
+                       chk = chk - 185 - IP_PROTO_UDP + IP_PROTO_SECOND_FRAG;
+                       // We're only increasing the checksum here
+                       if (unlikely(chk > 0xffff)) { chk -= 65535; }
+                       ip->check = ~BE16(chk);
+
+                       ip->frag_off = 0;
+                       ip->protocol = IP_PROTO_SECOND_FRAG;
+                       INC_COUNTER(1);
+               }
+       }
+}
+
+static inline void _maybe_swap_ingress(struct iphdr *ip) {
+       if (unlikely(ip->ihl != 5)) return;
+
+       if (ip->protocol == IP_PROTO_SECOND_FRAG) {
+               int32_t chk = ~BE16(ip->check) & 0xffff;
+               chk = chk + 185 + IP_PROTO_UDP - IP_PROTO_SECOND_FRAG;
+               // We're only decreasing the checksum here
+               if (unlikely(chk < 0)) { chk += 65535; }
+               ip->check = ~BE16(chk);
+
+               ip->frag_off = BE16(185);
+               ip->protocol = IP_PROTO_UDP;
+               INC_COUNTER(0);
+       } else if (ip->protocol == IP_PROTO_FIRST_FRAG) {
+               int32_t chk = ~BE16(ip->check) & 0xffff;
+               chk = chk + IP_MF + IP_PROTO_UDP - IP_PROTO_FIRST_FRAG;
+               // We're only increasing the checksum here
+               if (unlikely(chk > 0xffff)) { chk -= 65535; }
+               ip->check = ~BE16(chk);
+
+               ip->frag_off = BE16(IP_MF);
+               ip->protocol = IP_PROTO_UDP;
+               INC_COUNTER(1);
+       }
+}
+
+
+// Our own ethhdr with optional vlan tags
+struct _ethhdr_vlan {
+       unsigned char   h_dest[ETH_ALEN];       /* destination eth addr */
+       unsigned char   h_source[ETH_ALEN];     /* source ether addr    */
+       __be16          vlan_magic;             /* 0x8100 */
+       __be16          tci;            /* PCP (3 bits), DEI (1 bit), and VLAN (12 bits) */
+       __be16          h_proto;                /* packet type ID field */
+} __attribute__((packed));
+
+#define _CHECK_LEN_RETURN(start, struc) \
+       if (unlikely((void*)(start) + sizeof(struct struc) > data_end)) return;
+
+#define _CHECK_ETH_TO_HEADER(swap_fn) \
+       void * pktdata; \
+       unsigned short eth_proto; \
+ \
+       _CHECK_LEN_RETURN(data, ethhdr); \
+       struct ethhdr *const eth = (void*)data; \
+       pktdata = (void *)data + sizeof(struct ethhdr); \
+ \
+       if (eth->h_proto == BE16(ETH_P_8021Q)) { \
+               _CHECK_LEN_RETURN(data, _ethhdr_vlan); \
+               struct _ethhdr_vlan *const eth_vlan = (void*)data; \
+               pktdata = (void *)data + sizeof(struct _ethhdr_vlan); \
+               eth_proto = eth_vlan->h_proto; \
+       } else { \
+               eth_proto = eth->h_proto; \
+       } \
+\
+       if (eth_proto == BE16(ETH_P_IP)) { \
+               _CHECK_LEN_RETURN(pktdata, iphdr); \
+               struct iphdr *ip = (struct iphdr*) pktdata; \
+               swap_fn(ip); \
+       } else if (eth_proto == BE16(ETH_P_IPV6)) { \
+               /* TODO: Support v6? */ \
+       }
+
+static inline void maybe_swap_egress_eth(void *data, void* data_end) {
+       _CHECK_ETH_TO_HEADER(_maybe_swap_egress);
+}
+static inline void maybe_swap_ingress_eth(void *data, void* data_end) {
+       _CHECK_ETH_TO_HEADER(_maybe_swap_ingress);
+}