Fix AS path detection
[dnsseed-rust] / src / bgp_client.rs
index c20323d0e9fe6b7a7150e743b479a394fdba04c6..8dfb3eba579d94cbc0c3297f1cea9a6e7fccca1a 100644 (file)
@@ -1,7 +1,7 @@
 use std::sync::{Arc, Mutex};
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::cmp;
-use std::collections::HashMap;
+use std::collections::{HashMap, hash_map};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
 use std::time::{Duration, Instant};
 
@@ -24,23 +24,68 @@ use crate::timeout_stream::TimeoutStream;
 
 const PATH_SUFFIX_LEN: usize = 3;
 #[derive(Clone)]
-struct Route { // 32 bytes
+struct Route { // 32 bytes with a path id u32
        path_suffix: [u32; PATH_SUFFIX_LEN],
        path_len: u32,
        pref: u32,
        med: u32,
 }
+#[allow(dead_code)]
+const ROUTE_LEN: usize = 36 - std::mem::size_of::<(u32, Route)>();
+
+// To keep memory tight (and since we dont' need such close alignment), newtype the v4/v6 routing
+// table entries to make sure they are aligned to single bytes.
+
+#[repr(packed)]
+#[derive(PartialEq, Eq, Hash)]
+struct V4Addr {
+       addr: [u8; 4],
+       pfxlen: u8,
+}
+impl From<(Ipv4Addr, u8)> for V4Addr {
+       fn from(p: (Ipv4Addr, u8)) -> Self {
+               Self {
+                       addr: p.0.octets(),
+                       pfxlen: p.1,
+               }
+       }
+}
+#[allow(dead_code)]
+const V4_ALIGN: usize = 1 - std::mem::align_of::<V4Addr>();
+#[allow(dead_code)]
+const V4_SIZE: usize = 5 - std::mem::size_of::<V4Addr>();
+
+#[repr(packed)]
+#[derive(PartialEq, Eq, Hash)]
+struct V6Addr {
+       addr: [u8; 16],
+       pfxlen: u8,
+}
+impl From<(Ipv6Addr, u8)> for V6Addr {
+       fn from(p: (Ipv6Addr, u8)) -> Self {
+               Self {
+                       addr: p.0.octets(),
+                       pfxlen: p.1,
+               }
+       }
+}
+#[allow(dead_code)]
+const V6_ALIGN: usize = 1 - std::mem::align_of::<V6Addr>();
+#[allow(dead_code)]
+const V6_SIZE: usize = 17 - std::mem::size_of::<V6Addr>();
 
 struct RoutingTable {
-       v4_table: HashMap<(Ipv4Addr, u8), HashMap<u32, Route>>,
-       v6_table: HashMap<(Ipv6Addr, u8), HashMap<u32, Route>>,
+       // We really want a HashMap for the values here, but they'll only ever contain a few entries,
+       // and Vecs are way more memory-effecient in that case.
+       v4_table: HashMap<V4Addr, Vec<(u32, Route)>>,
+       v6_table: HashMap<V6Addr, Vec<(u32, Route)>>,
 }
 
 impl RoutingTable {
        fn new() -> Self {
                Self {
-                       v4_table: HashMap::new(),
-                       v6_table: HashMap::new(),
+                       v4_table: HashMap::with_capacity(900_000),
+                       v6_table: HashMap::with_capacity(100_000),
                }
        }
 
@@ -48,62 +93,88 @@ impl RoutingTable {
                macro_rules! lookup_res {
                        ($addrty: ty, $addr: expr, $table: expr, $addr_bits: expr) => { {
                                //TODO: Optimize this (probably means making the tables btrees)!
-                               let mut lookup = $addr.octets();
+                               let mut lookup = <$addrty>::from(($addr, $addr_bits));
                                for i in 0..$addr_bits {
-                                       let lookup_addr = <$addrty>::from(lookup);
-                                       if let Some(routes) = $table.get(&(lookup_addr, $addr_bits - i as u8)).map(|hm| hm.values()) {
+                                       if let Some(routes) = $table.get(&lookup) {
                                                if routes.len() > 0 {
-                                                       return ($addr_bits - i as u8, routes.collect());
+                                                       return (lookup.pfxlen, routes.iter().map(|v| &v.1).collect());
                                                }
                                        }
-                                       lookup[lookup.len() - (i/8) - 1] &= !(1u8 << (i % 8));
+                                       lookup.addr[lookup.addr.len() - (i/8) - 1] &= !(1u8 << (i % 8));
+                                       lookup.pfxlen -= 1;
                                }
                                (0, vec![])
                        } }
                }
                match ip {
-                       IpAddr::V4(v4a) => lookup_res!(Ipv4Addr, v4a, self.v4_table, 32),
-                       IpAddr::V6(v6a) => lookup_res!(Ipv6Addr, v6a, self.v6_table, 128)
+                       IpAddr::V4(v4a) => lookup_res!(V4Addr, v4a, self.v4_table, 32),
+                       IpAddr::V6(v6a) => lookup_res!(V6Addr, v6a, self.v6_table, 128)
                }
        }
 
        fn withdraw(&mut self, route: NLRIEncoding) {
+               macro_rules! remove {
+                       ($rt: expr, $v: expr, $id: expr) => { {
+                               match $rt.entry($v.into()) {
+                                       hash_map::Entry::Occupied(mut entry) => {
+                                               entry.get_mut().retain(|e| e.0 != $id);
+                                               if entry.get_mut().is_empty() {
+                                                       entry.remove();
+                                               }
+                                       },
+                                       _ => {},
+                               }
+                       } }
+               }
                match route {
                        NLRIEncoding::IP(p) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.get_mut(&(v4a, len)).and_then(|hm| hm.remove(&0)),
-                                       IpAddr::V6(v6a) => self.v6_table.get_mut(&(v6a, len)).and_then(|hm| hm.remove(&0)),
+                                       IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), 0),
+                                       IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), 0),
                                }
                        },
                        NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.get_mut(&(v4a, len)).and_then(|hm| hm.remove(&id)),
-                                       IpAddr::V6(v6a) => self.v6_table.get_mut(&(v6a, len)).and_then(|hm| hm.remove(&id)),
+                                       IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), id),
+                                       IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), id),
                                }
                        },
-                       NLRIEncoding::IP_MPLS(_) => None,
+                       NLRIEncoding::IP_MPLS(_) => (),
+                       NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
+                       NLRIEncoding::IP_VPN_MPLS(_) => (),
+                       NLRIEncoding::L2VPN(_) => (),
                };
        }
 
        fn announce(&mut self, prefix: NLRIEncoding, route: Route) {
+               macro_rules! insert {
+                       ($rt: expr, $v: expr, $id: expr) => { {
+                               let entry = $rt.entry($v.into()).or_insert(Vec::new());
+                               entry.retain(|e| e.0 != $id);
+                               entry.push(($id, route));
+                       } }
+               }
                match prefix {
                        NLRIEncoding::IP(p) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.entry((v4a, len)).or_insert(HashMap::new()).insert(0, route),
-                                       IpAddr::V6(v6a) => self.v6_table.entry((v6a, len)).or_insert(HashMap::new()).insert(0, route),
+                                       IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), 0),
+                                       IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), 0),
                                }
                        },
                        NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.entry((v4a, len)).or_insert(HashMap::new()).insert(id, route),
-                                       IpAddr::V6(v6a) => self.v6_table.entry((v6a, len)).or_insert(HashMap::new()).insert(id, route),
+                                       IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), id),
+                                       IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), id),
                                }
                        },
-                       NLRIEncoding::IP_MPLS(_) => None,
+                       NLRIEncoding::IP_MPLS(_) => (),
+                       NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
+                       NLRIEncoding::IP_VPN_MPLS(_) => (),
+                       NLRIEncoding::L2VPN(_) => (),
                };
        }
 }
@@ -131,8 +202,8 @@ impl<'a> std::io::Read for BytesDecoder<'a> {
        }
 }
 
-struct MsgCoder<'a>(&'a Printer);
-impl<'a> codec::Decoder for MsgCoder<'a> {
+struct MsgCoder(Option<Capabilities>);
+impl codec::Decoder for MsgCoder {
        type Item = Message;
        type Error = std::io::Error;
 
@@ -141,15 +212,17 @@ impl<'a> codec::Decoder for MsgCoder<'a> {
                        buf: bytes,
                        pos: 0
                };
-               match (Reader {
+               let def_cap = Default::default();
+               let mut reader = Reader {
                        stream: &mut decoder,
-                       capabilities: Capabilities {
-                               FOUR_OCTET_ASN_SUPPORT: true,
-                               EXTENDED_PATH_NLRI_SUPPORT: true,
-                       }
-               }).read() {
+                       capabilities: if let Some(cap) = &self.0 { cap } else { &def_cap },
+               };
+               match reader.read() {
                        Ok((_header, msg)) => {
                                decoder.buf.advance(decoder.pos);
+                               if let Message::Open(ref o) = &msg {
+                                       self.0 = Some(Capabilities::from_parameters(o.parameters.clone()));
+                               }
                                Ok(Some(msg))
                        },
                        Err(e) => match e.kind() {
@@ -159,12 +232,12 @@ impl<'a> codec::Decoder for MsgCoder<'a> {
                }
        }
 }
-impl<'a> codec::Encoder for MsgCoder<'a> {
+impl codec::Encoder for MsgCoder {
        type Item = Message;
        type Error = std::io::Error;
 
        fn encode(&mut self, msg: Message, res: &mut bytes::BytesMut) -> Result<(), std::io::Error> {
-               msg.write(&mut BytesCoder(res))?;
+               msg.encode(&mut BytesCoder(res))?;
                Ok(())
        }
 }
@@ -186,14 +259,35 @@ impl BGPClient {
                });
 
                let primary_route = path_vecs.pop().unwrap();
-               'asn_candidates: for asn in primary_route.path_suffix.iter().rev() {
-                       if *asn == 0 { continue 'asn_candidates; }
-                       for secondary_route in path_vecs.iter() {
-                               if !secondary_route.path_suffix.contains(asn) {
-                                       continue 'asn_candidates;
+               if path_vecs.len() > 3 {
+                       // If we have at least 3 paths, try to find the last unique ASN which doesn't show up in other paths
+                       // If we hit a T1 that is reasonably assumed to care about net neutrality, return the
+                       // previous ASN.
+                       let mut prev_asn = 0;
+                       'asn_candidates: for asn in primary_route.path_suffix.iter().rev() {
+                               if *asn == 0 { continue 'asn_candidates; }
+                               match *asn {
+                                       // Included: CenturyLink (L3), Cogent, Telia, NTT, GTT, Level3,
+                                       //           GBLX (L3), Zayo, TI Sparkle Seabone, HE, Telefonica
+                                       // Left out from Caida top-20: TATA, PCCW, Vodafone, RETN, Orange, Telstra,
+                                       //                             Singtel, Rostelecom, DTAG
+                                       209|174|1299|2914|3257|3356|3549|6461|6762|6939|12956 if prev_asn != 0 => return prev_asn,
+                                       _ => if path_vecs.iter().any(|route| !route.path_suffix.contains(asn)) {
+                                               if prev_asn != 0 { return prev_asn } else {
+                                                       // Multi-origin prefix, just give up and take the last AS in the
+                                                       // default path
+                                                       break 'asn_candidates;
+                                               }
+                                       } else {
+                                               // We only ever possibly return an ASN if it appears in all paths
+                                               prev_asn = *asn;
+                                       },
                                }
                        }
-                       return *asn;
+                       // All paths were the same, if the first ASN is non-0, return it.
+                       if prev_asn != 0 {
+                               return prev_asn;
+                       }
                }
 
                for asn in primary_route.path_suffix.iter().rev() {
@@ -275,7 +369,7 @@ impl BGPClient {
                                                future::err(())
                                        })
                                }).and_then(move |stream| {
-                                       let (write, read) = Framed::new(stream.0, MsgCoder(printer)).split();
+                                       let (write, read) = Framed::new(stream.0, MsgCoder(None)).split();
                                        let (mut sender, receiver) = mpsc::channel(10); // We never really should send more than 10 messages unless they're dumb
                                        tokio::spawn(write.sink_map_err(|_| { () }).send_all(receiver)
                                                .then(|_| {
@@ -294,7 +388,7 @@ impl BGPClient {
                                                        OpenCapability::AddPath(vec![
                                                                (AFI::IPV4, SAFI::Unicast, AddPathDirection::ReceivePaths),
                                                                (AFI::IPV6, SAFI::Unicast, AddPathDirection::ReceivePaths)]),
-                                               ])]
+                                               ])],
                                        }));
                                        TimeoutStream::new_persistent(read, timeout).for_each(move |bgp_msg| {
                                                if client.shutdown.load(Ordering::Relaxed) {