Fix AS path detection
[dnsseed-rust] / src / bgp_client.rs
index 4d393c39de094026a263d7557392a1f289e3b564..8dfb3eba579d94cbc0c3297f1cea9a6e7fccca1a 100644 (file)
@@ -1,8 +1,7 @@
 use std::sync::{Arc, Mutex};
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::cmp;
-use std::ops::Bound::Included;
-use std::collections::BTreeMap;
+use std::collections::{HashMap, hash_map};
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
 use std::time::{Duration, Instant};
 
@@ -23,88 +22,159 @@ use futures::sync::mpsc;
 use crate::printer::{Printer, Stat};
 use crate::timeout_stream::TimeoutStream;
 
-struct Route {
-       path: Vec<u32>,
+const PATH_SUFFIX_LEN: usize = 3;
+#[derive(Clone)]
+struct Route { // 32 bytes with a path id u32
+       path_suffix: [u32; PATH_SUFFIX_LEN],
+       path_len: u32,
        pref: u32,
        med: u32,
 }
+#[allow(dead_code)]
+const ROUTE_LEN: usize = 36 - std::mem::size_of::<(u32, Route)>();
+
+// To keep memory tight (and since we dont' need such close alignment), newtype the v4/v6 routing
+// table entries to make sure they are aligned to single bytes.
+
+#[repr(packed)]
+#[derive(PartialEq, Eq, Hash)]
+struct V4Addr {
+       addr: [u8; 4],
+       pfxlen: u8,
+}
+impl From<(Ipv4Addr, u8)> for V4Addr {
+       fn from(p: (Ipv4Addr, u8)) -> Self {
+               Self {
+                       addr: p.0.octets(),
+                       pfxlen: p.1,
+               }
+       }
+}
+#[allow(dead_code)]
+const V4_ALIGN: usize = 1 - std::mem::align_of::<V4Addr>();
+#[allow(dead_code)]
+const V4_SIZE: usize = 5 - std::mem::size_of::<V4Addr>();
+
+#[repr(packed)]
+#[derive(PartialEq, Eq, Hash)]
+struct V6Addr {
+       addr: [u8; 16],
+       pfxlen: u8,
+}
+impl From<(Ipv6Addr, u8)> for V6Addr {
+       fn from(p: (Ipv6Addr, u8)) -> Self {
+               Self {
+                       addr: p.0.octets(),
+                       pfxlen: p.1,
+               }
+       }
+}
+#[allow(dead_code)]
+const V6_ALIGN: usize = 1 - std::mem::align_of::<V6Addr>();
+#[allow(dead_code)]
+const V6_SIZE: usize = 17 - std::mem::size_of::<V6Addr>();
 
 struct RoutingTable {
-       v4_table: BTreeMap<(Ipv4Addr, u8, u32), Arc<Route>>,
-       v6_table: BTreeMap<(Ipv6Addr, u8, u32), Arc<Route>>,
+       // We really want a HashMap for the values here, but they'll only ever contain a few entries,
+       // and Vecs are way more memory-effecient in that case.
+       v4_table: HashMap<V4Addr, Vec<(u32, Route)>>,
+       v6_table: HashMap<V6Addr, Vec<(u32, Route)>>,
 }
 
 impl RoutingTable {
        fn new() -> Self {
                Self {
-                       v4_table: BTreeMap::new(),
-                       v6_table: BTreeMap::new(),
+                       v4_table: HashMap::with_capacity(900_000),
+                       v6_table: HashMap::with_capacity(100_000),
                }
        }
 
-       fn get_route_attrs(&self, ip: IpAddr) -> Vec<Arc<Route>> {
+       fn get_route_attrs(&self, ip: IpAddr) -> (u8, Vec<&Route>) {
                macro_rules! lookup_res {
                        ($addrty: ty, $addr: expr, $table: expr, $addr_bits: expr) => { {
-                               let mut res = Vec::new();
-                               //TODO: Optimize this!
-                               for i in (0..$addr_bits).rev() {
-                                       let mut lookup = $addr.octets();
-                                       for b in 0..(i / 8) {
-                                               lookup[lookup.len() - b - 1] = 0;
-                                       }
-                                       lookup[lookup.len() - (i/8) - 1] &= !(((1u16 << (i % 8)) - 1) as u8);
-                                       let lookup_addr = <$addrty>::from(lookup);
-                                       for attrs in $table.range((Included((lookup_addr, $addr_bits - i as u8, 0)), Included((lookup_addr, $addr_bits - i as u8, std::u32::MAX)))) {
-                                               res.push(Arc::clone(&attrs.1));
+                               //TODO: Optimize this (probably means making the tables btrees)!
+                               let mut lookup = <$addrty>::from(($addr, $addr_bits));
+                               for i in 0..$addr_bits {
+                                       if let Some(routes) = $table.get(&lookup) {
+                                               if routes.len() > 0 {
+                                                       return (lookup.pfxlen, routes.iter().map(|v| &v.1).collect());
+                                               }
                                        }
-                                       if !res.is_empty() { break; }
+                                       lookup.addr[lookup.addr.len() - (i/8) - 1] &= !(1u8 << (i % 8));
+                                       lookup.pfxlen -= 1;
                                }
-                               res
+                               (0, vec![])
                        } }
                }
                match ip {
-                       IpAddr::V4(v4a) => lookup_res!(Ipv4Addr, v4a, self.v4_table, 32),
-                       IpAddr::V6(v6a) => lookup_res!(Ipv6Addr, v6a, self.v6_table, 128)
+                       IpAddr::V4(v4a) => lookup_res!(V4Addr, v4a, self.v4_table, 32),
+                       IpAddr::V6(v6a) => lookup_res!(V6Addr, v6a, self.v6_table, 128)
                }
        }
 
        fn withdraw(&mut self, route: NLRIEncoding) {
+               macro_rules! remove {
+                       ($rt: expr, $v: expr, $id: expr) => { {
+                               match $rt.entry($v.into()) {
+                                       hash_map::Entry::Occupied(mut entry) => {
+                                               entry.get_mut().retain(|e| e.0 != $id);
+                                               if entry.get_mut().is_empty() {
+                                                       entry.remove();
+                                               }
+                                       },
+                                       _ => {},
+                               }
+                       } }
+               }
                match route {
                        NLRIEncoding::IP(p) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.remove(&(v4a, len, 0)),
-                                       IpAddr::V6(v6a) => self.v6_table.remove(&(v6a, len, 0)),
+                                       IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), 0),
+                                       IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), 0),
                                }
                        },
                        NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.remove(&(v4a, len, id)),
-                                       IpAddr::V6(v6a) => self.v6_table.remove(&(v6a, len, id)),
+                                       IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), id),
+                                       IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), id),
                                }
                        },
-                       NLRIEncoding::IP_MPLS(_) => None,
+                       NLRIEncoding::IP_MPLS(_) => (),
+                       NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
+                       NLRIEncoding::IP_VPN_MPLS(_) => (),
+                       NLRIEncoding::L2VPN(_) => (),
                };
        }
 
-       fn announce(&mut self, prefix: NLRIEncoding, route: Arc<Route>) {
+       fn announce(&mut self, prefix: NLRIEncoding, route: Route) {
+               macro_rules! insert {
+                       ($rt: expr, $v: expr, $id: expr) => { {
+                               let entry = $rt.entry($v.into()).or_insert(Vec::new());
+                               entry.retain(|e| e.0 != $id);
+                               entry.push(($id, route));
+                       } }
+               }
                match prefix {
                        NLRIEncoding::IP(p) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.insert((v4a, len, 0), route),
-                                       IpAddr::V6(v6a) => self.v6_table.insert((v6a, len, 0), route),
+                                       IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), 0),
+                                       IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), 0),
                                }
                        },
                        NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
                                let (ip, len) = <(IpAddr, u8)>::from(&p);
                                match ip {
-                                       IpAddr::V4(v4a) => self.v4_table.insert((v4a, len, id), route),
-                                       IpAddr::V6(v6a) => self.v6_table.insert((v6a, len, id), route),
+                                       IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), id),
+                                       IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), id),
                                }
                        },
-                       NLRIEncoding::IP_MPLS(_) => None,
+                       NLRIEncoding::IP_MPLS(_) => (),
+                       NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
+                       NLRIEncoding::IP_VPN_MPLS(_) => (),
+                       NLRIEncoding::L2VPN(_) => (),
                };
        }
 }
@@ -132,8 +202,8 @@ impl<'a> std::io::Read for BytesDecoder<'a> {
        }
 }
 
-struct MsgCoder<'a>(&'a Printer);
-impl<'a> codec::Decoder for MsgCoder<'a> {
+struct MsgCoder(Option<Capabilities>);
+impl codec::Decoder for MsgCoder {
        type Item = Message;
        type Error = std::io::Error;
 
@@ -142,15 +212,17 @@ impl<'a> codec::Decoder for MsgCoder<'a> {
                        buf: bytes,
                        pos: 0
                };
-               match (Reader {
+               let def_cap = Default::default();
+               let mut reader = Reader {
                        stream: &mut decoder,
-                       capabilities: Capabilities {
-                               FOUR_OCTET_ASN_SUPPORT: true,
-                               EXTENDED_PATH_NLRI_SUPPORT: true,
-                       }
-               }).read() {
+                       capabilities: if let Some(cap) = &self.0 { cap } else { &def_cap },
+               };
+               match reader.read() {
                        Ok((_header, msg)) => {
                                decoder.buf.advance(decoder.pos);
+                               if let Message::Open(ref o) = &msg {
+                                       self.0 = Some(Capabilities::from_parameters(o.parameters.clone()));
+                               }
                                Ok(Some(msg))
                        },
                        Err(e) => match e.kind() {
@@ -160,12 +232,12 @@ impl<'a> codec::Decoder for MsgCoder<'a> {
                }
        }
 }
-impl<'a> codec::Encoder for MsgCoder<'a> {
+impl codec::Encoder for MsgCoder {
        type Item = Message;
        type Error = std::io::Error;
 
        fn encode(&mut self, msg: Message, res: &mut bytes::BytesMut) -> Result<(), std::io::Error> {
-               msg.write(&mut BytesCoder(res))?;
+               msg.encode(&mut BytesCoder(res))?;
                Ok(())
        }
 }
@@ -176,25 +248,69 @@ pub struct BGPClient {
 }
 impl BGPClient {
        pub fn get_asn(&self, addr: IpAddr) -> u32 {
-               let mut path_vecs = self.routes.lock().unwrap().get_route_attrs(addr).clone();
+               let lock = self.routes.lock().unwrap();
+               let mut path_vecs = lock.get_route_attrs(addr).1;
                if path_vecs.is_empty() { return 0; }
 
                path_vecs.sort_unstable_by(|path_a, path_b| {
                        path_a.pref.cmp(&path_b.pref)
-                               .then(path_b.path.len().cmp(&path_a.path.len()))
+                               .then(path_b.path_len.cmp(&path_a.path_len))
                                .then(path_b.med.cmp(&path_a.med))
                });
 
                let primary_route = path_vecs.pop().unwrap();
-               'asn_candidates: for asn in primary_route.path.iter().rev() {
-                       for secondary_route in path_vecs.iter() {
-                               if !secondary_route.path.contains(asn) {
-                                       continue 'asn_candidates;
+               if path_vecs.len() > 3 {
+                       // If we have at least 3 paths, try to find the last unique ASN which doesn't show up in other paths
+                       // If we hit a T1 that is reasonably assumed to care about net neutrality, return the
+                       // previous ASN.
+                       let mut prev_asn = 0;
+                       'asn_candidates: for asn in primary_route.path_suffix.iter().rev() {
+                               if *asn == 0 { continue 'asn_candidates; }
+                               match *asn {
+                                       // Included: CenturyLink (L3), Cogent, Telia, NTT, GTT, Level3,
+                                       //           GBLX (L3), Zayo, TI Sparkle Seabone, HE, Telefonica
+                                       // Left out from Caida top-20: TATA, PCCW, Vodafone, RETN, Orange, Telstra,
+                                       //                             Singtel, Rostelecom, DTAG
+                                       209|174|1299|2914|3257|3356|3549|6461|6762|6939|12956 if prev_asn != 0 => return prev_asn,
+                                       _ => if path_vecs.iter().any(|route| !route.path_suffix.contains(asn)) {
+                                               if prev_asn != 0 { return prev_asn } else {
+                                                       // Multi-origin prefix, just give up and take the last AS in the
+                                                       // default path
+                                                       break 'asn_candidates;
+                                               }
+                                       } else {
+                                               // We only ever possibly return an ASN if it appears in all paths
+                                               prev_asn = *asn;
+                                       },
                                }
                        }
-                       return *asn;
+                       // All paths were the same, if the first ASN is non-0, return it.
+                       if prev_asn != 0 {
+                               return prev_asn;
+                       }
                }
-               *primary_route.path.last().unwrap_or(&0)
+
+               for asn in primary_route.path_suffix.iter().rev() {
+                       if *asn != 0 {
+                               return *asn;
+                       }
+               }
+               0
+       }
+
+       pub fn get_path(&self, addr: IpAddr) -> (u8, [u32; PATH_SUFFIX_LEN]) {
+               let lock = self.routes.lock().unwrap();
+               let (prefixlen, mut path_vecs) = lock.get_route_attrs(addr);
+               if path_vecs.is_empty() { return (0, [0; PATH_SUFFIX_LEN]); }
+
+               path_vecs.sort_unstable_by(|path_a, path_b| {
+                       path_a.pref.cmp(&path_b.pref)
+                               .then(path_b.path_len.cmp(&path_a.path_len))
+                               .then(path_b.med.cmp(&path_a.med))
+               });
+
+               let primary_route = path_vecs.pop().unwrap();
+               (prefixlen, primary_route.path_suffix)
        }
 
        pub fn disconnect(&self) {
@@ -216,15 +332,25 @@ impl BGPClient {
                        }
                }
                if let Some(mut aspath) = as4_path.or(as_path) {
-                       let mut path = Vec::new();
+                       let mut pathvec = Vec::new();
                        for seg in aspath.segments.drain(..) {
                                match seg {
-                                       Segment::AS_SEQUENCE(mut asn) => path.append(&mut asn),
+                                       Segment::AS_SEQUENCE(mut asn) => pathvec.append(&mut asn),
                                        Segment::AS_SET(_) => {}, // Ignore sets for now, they're not that common anyway
                                }
                        }
+                       let path_len = pathvec.len() as u32;
+                       pathvec.dedup_by(|a, b| (*a).eq(b)); // Drop prepends, cause we don't care in this case
+
+                       let mut path_suffix = [0; PATH_SUFFIX_LEN];
+                       for (idx, asn) in pathvec.iter().rev().enumerate() {
+                               path_suffix[PATH_SUFFIX_LEN - idx - 1] = *asn;
+                               if idx == PATH_SUFFIX_LEN - 1 { break; }
+                       }
+
                        return Some(Route {
-                               path: path.clone(),
+                               path_suffix,
+                               path_len,
                                pref,
                                med,
                        })
@@ -243,7 +369,7 @@ impl BGPClient {
                                                future::err(())
                                        })
                                }).and_then(move |stream| {
-                                       let (write, read) = Framed::new(stream.0, MsgCoder(printer)).split();
+                                       let (write, read) = Framed::new(stream.0, MsgCoder(None)).split();
                                        let (mut sender, receiver) = mpsc::channel(10); // We never really should send more than 10 messages unless they're dumb
                                        tokio::spawn(write.sink_map_err(|_| { () }).send_all(receiver)
                                                .then(|_| {
@@ -262,7 +388,7 @@ impl BGPClient {
                                                        OpenCapability::AddPath(vec![
                                                                (AFI::IPV4, SAFI::Unicast, AddPathDirection::ReceivePaths),
                                                                (AFI::IPV6, SAFI::Unicast, AddPathDirection::ReceivePaths)]),
-                                               ])]
+                                               ])],
                                        }));
                                        TimeoutStream::new_persistent(read, timeout).for_each(move |bgp_msg| {
                                                if client.shutdown.load(Ordering::Relaxed) {
@@ -284,9 +410,8 @@ impl BGPClient {
                                                                        route_table.withdraw(r);
                                                                }
                                                                if let Some(path) = Self::map_attrs(upd.attributes) {
-                                                                       let path_arc = Arc::new(path);
                                                                        for r in upd.announced_routes {
-                                                                               route_table.announce(r, Arc::clone(&path_arc));
+                                                                               route_table.announce(r, path.clone());
                                                                        }
                                                                }
                                                                printer.set_stat(Stat::V4RoutingTableSize(route_table.v4_table.len()));