use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicBool, Ordering};
use std::cmp;
-use std::ops::Bound::Included;
-use std::collections::BTreeMap;
+use std::collections::{HashMap, hash_map};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
use std::time::{Duration, Instant};
use crate::printer::{Printer, Stat};
use crate::timeout_stream::TimeoutStream;
-struct Route {
- path: Vec<u32>,
+const PATH_SUFFIX_LEN: usize = 3;
+#[derive(Clone)]
+struct Route { // 32 bytes with a path id u32
+ path_suffix: [u32; PATH_SUFFIX_LEN],
+ path_len: u32,
pref: u32,
med: u32,
}
+#[allow(dead_code)]
+const ROUTE_LEN: usize = 36 - std::mem::size_of::<(u32, Route)>();
+
+// To keep memory tight (and since we dont' need such close alignment), newtype the v4/v6 routing
+// table entries to make sure they are aligned to single bytes.
+
+#[repr(packed)]
+#[derive(PartialEq, Eq, Hash)]
+struct V4Addr {
+ addr: [u8; 4],
+ pfxlen: u8,
+}
+impl From<(Ipv4Addr, u8)> for V4Addr {
+ fn from(p: (Ipv4Addr, u8)) -> Self {
+ Self {
+ addr: p.0.octets(),
+ pfxlen: p.1,
+ }
+ }
+}
+#[allow(dead_code)]
+const V4_ALIGN: usize = 1 - std::mem::align_of::<V4Addr>();
+#[allow(dead_code)]
+const V4_SIZE: usize = 5 - std::mem::size_of::<V4Addr>();
+
+#[repr(packed)]
+#[derive(PartialEq, Eq, Hash)]
+struct V6Addr {
+ addr: [u8; 16],
+ pfxlen: u8,
+}
+impl From<(Ipv6Addr, u8)> for V6Addr {
+ fn from(p: (Ipv6Addr, u8)) -> Self {
+ Self {
+ addr: p.0.octets(),
+ pfxlen: p.1,
+ }
+ }
+}
+#[allow(dead_code)]
+const V6_ALIGN: usize = 1 - std::mem::align_of::<V6Addr>();
+#[allow(dead_code)]
+const V6_SIZE: usize = 17 - std::mem::size_of::<V6Addr>();
struct RoutingTable {
- v4_table: BTreeMap<(Ipv4Addr, u8, u32), Arc<Route>>,
- v6_table: BTreeMap<(Ipv6Addr, u8, u32), Arc<Route>>,
+ // We really want a HashMap for the values here, but they'll only ever contain a few entries,
+ // and Vecs are way more memory-effecient in that case.
+ v4_table: HashMap<V4Addr, Vec<(u32, Route)>>,
+ v6_table: HashMap<V6Addr, Vec<(u32, Route)>>,
+ max_paths: usize,
+ routes_with_max: usize,
}
impl RoutingTable {
fn new() -> Self {
Self {
- v4_table: BTreeMap::new(),
- v6_table: BTreeMap::new(),
+ v4_table: HashMap::with_capacity(900_000),
+ v6_table: HashMap::with_capacity(100_000),
+ max_paths: 0,
+ routes_with_max: 0,
}
}
- fn get_route_attrs(&self, ip: IpAddr) -> Vec<Arc<Route>> {
+ fn get_route_attrs(&self, ip: IpAddr) -> (u8, Vec<&Route>) {
macro_rules! lookup_res {
($addrty: ty, $addr: expr, $table: expr, $addr_bits: expr) => { {
- let mut res = Vec::new();
- //TODO: Optimize this!
- for i in (0..$addr_bits).rev() {
- let mut lookup = $addr.octets();
- for b in 0..(i / 8) {
- lookup[lookup.len() - b - 1] = 0;
- }
- lookup[lookup.len() - (i/8) - 1] &= !(((1u16 << (i % 8)) - 1) as u8);
- let lookup_addr = <$addrty>::from(lookup);
- for attrs in $table.range((Included((lookup_addr, $addr_bits - i as u8, 0)), Included((lookup_addr, $addr_bits - i as u8, std::u32::MAX)))) {
- res.push(Arc::clone(&attrs.1));
+ //TODO: Optimize this (probably means making the tables btrees)!
+ let mut lookup = <$addrty>::from(($addr, $addr_bits));
+ for i in 0..$addr_bits {
+ if let Some(routes) = $table.get(&lookup) {
+ if routes.len() > 0 {
+ return (lookup.pfxlen, routes.iter().map(|v| &v.1).collect());
+ }
}
- if !res.is_empty() { break; }
+ lookup.addr[lookup.addr.len() - (i/8) - 1] &= !(1u8 << (i % 8));
+ lookup.pfxlen -= 1;
}
- res
+ (0, vec![])
} }
}
match ip {
- IpAddr::V4(v4a) => lookup_res!(Ipv4Addr, v4a, self.v4_table, 32),
- IpAddr::V6(v6a) => lookup_res!(Ipv6Addr, v6a, self.v6_table, 128)
+ IpAddr::V4(v4a) => lookup_res!(V4Addr, v4a, self.v4_table, 32),
+ IpAddr::V6(v6a) => lookup_res!(V6Addr, v6a, self.v6_table, 128)
}
}
fn withdraw(&mut self, route: NLRIEncoding) {
+ macro_rules! remove {
+ ($rt: expr, $v: expr, $id: expr) => { {
+ match $rt.entry($v.into()) {
+ hash_map::Entry::Occupied(mut entry) => {
+ if entry.get().len() == self.max_paths {
+ self.routes_with_max -= 1;
+ if self.routes_with_max == 0 {
+ self.max_paths = 0;
+ }
+ }
+ entry.get_mut().retain(|e| e.0 != $id);
+ if entry.get_mut().is_empty() {
+ entry.remove();
+ }
+ },
+ _ => {},
+ }
+ } }
+ }
match route {
NLRIEncoding::IP(p) => {
let (ip, len) = <(IpAddr, u8)>::from(&p);
match ip {
- IpAddr::V4(v4a) => self.v4_table.remove(&(v4a, len, 0)),
- IpAddr::V6(v6a) => self.v6_table.remove(&(v6a, len, 0)),
+ IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), 0),
+ IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), 0),
}
},
NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
let (ip, len) = <(IpAddr, u8)>::from(&p);
match ip {
- IpAddr::V4(v4a) => self.v4_table.remove(&(v4a, len, id)),
- IpAddr::V6(v6a) => self.v6_table.remove(&(v6a, len, id)),
+ IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), id),
+ IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), id),
}
},
- NLRIEncoding::IP_MPLS(_) => None,
+ NLRIEncoding::IP_MPLS(_) => (),
+ NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
+ NLRIEncoding::IP_VPN_MPLS(_) => (),
+ NLRIEncoding::L2VPN(_) => (),
};
}
- fn announce(&mut self, prefix: NLRIEncoding, route: Arc<Route>) {
+ fn announce(&mut self, prefix: NLRIEncoding, route: Route) {
+ macro_rules! insert {
+ ($rt: expr, $v: expr, $id: expr) => { {
+ let old_max_paths = self.max_paths;
+ let entry = $rt.entry($v.into()).or_insert_with(|| Vec::with_capacity(old_max_paths));
+ let entry_had_max = entry.len() == self.max_paths;
+ entry.retain(|e| e.0 != $id);
+ if entry_had_max {
+ entry.reserve_exact(1);
+ } else {
+ entry.reserve_exact(cmp::max(self.max_paths, entry.len() + 1) - entry.len());
+ }
+ entry.push(($id, route));
+ if entry.len() > self.max_paths {
+ self.max_paths = entry.len();
+ self.routes_with_max = 1;
+ } else if entry.len() == self.max_paths {
+ if !entry_had_max { self.routes_with_max += 1; }
+ }
+ } }
+ }
match prefix {
NLRIEncoding::IP(p) => {
let (ip, len) = <(IpAddr, u8)>::from(&p);
match ip {
- IpAddr::V4(v4a) => self.v4_table.insert((v4a, len, 0), route),
- IpAddr::V6(v6a) => self.v6_table.insert((v6a, len, 0), route),
+ IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), 0),
+ IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), 0),
}
},
NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
let (ip, len) = <(IpAddr, u8)>::from(&p);
match ip {
- IpAddr::V4(v4a) => self.v4_table.insert((v4a, len, id), route),
- IpAddr::V6(v6a) => self.v6_table.insert((v6a, len, id), route),
+ IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), id),
+ IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), id),
}
},
- NLRIEncoding::IP_MPLS(_) => None,
+ NLRIEncoding::IP_MPLS(_) => (),
+ NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
+ NLRIEncoding::IP_VPN_MPLS(_) => (),
+ NLRIEncoding::L2VPN(_) => (),
};
}
}
}
}
-struct MsgCoder<'a>(&'a Printer);
-impl<'a> codec::Decoder for MsgCoder<'a> {
+struct MsgCoder(Option<Capabilities>);
+impl codec::Decoder for MsgCoder {
type Item = Message;
type Error = std::io::Error;
buf: bytes,
pos: 0
};
- match (Reader {
+ let def_cap = Default::default();
+ let mut reader = Reader {
stream: &mut decoder,
- capabilities: Capabilities {
- FOUR_OCTET_ASN_SUPPORT: true,
- EXTENDED_PATH_NLRI_SUPPORT: true,
- }
- }).read() {
+ capabilities: if let Some(cap) = &self.0 { cap } else { &def_cap },
+ };
+ match reader.read() {
Ok((_header, msg)) => {
decoder.buf.advance(decoder.pos);
+ if let Message::Open(ref o) = &msg {
+ self.0 = Some(Capabilities::from_parameters(o.parameters.clone()));
+ }
Ok(Some(msg))
},
Err(e) => match e.kind() {
}
}
}
-impl<'a> codec::Encoder for MsgCoder<'a> {
+impl codec::Encoder for MsgCoder {
type Item = Message;
type Error = std::io::Error;
fn encode(&mut self, msg: Message, res: &mut bytes::BytesMut) -> Result<(), std::io::Error> {
- msg.write(&mut BytesCoder(res))?;
+ msg.encode(&mut BytesCoder(res))?;
Ok(())
}
}
}
impl BGPClient {
pub fn get_asn(&self, addr: IpAddr) -> u32 {
- let mut path_vecs = self.routes.lock().unwrap().get_route_attrs(addr).clone();
+ let lock = self.routes.lock().unwrap();
+ let mut path_vecs = lock.get_route_attrs(addr).1;
if path_vecs.is_empty() { return 0; }
path_vecs.sort_unstable_by(|path_a, path_b| {
path_a.pref.cmp(&path_b.pref)
- .then(path_b.path.len().cmp(&path_a.path.len()))
+ .then(path_b.path_len.cmp(&path_a.path_len))
.then(path_b.med.cmp(&path_a.med))
});
let primary_route = path_vecs.pop().unwrap();
- 'asn_candidates: for asn in primary_route.path.iter().rev() {
- for secondary_route in path_vecs.iter() {
- if !secondary_route.path.contains(asn) {
- continue 'asn_candidates;
+ if path_vecs.len() > 3 {
+ // If we have at least 3 paths, try to find the last unique ASN which doesn't show up in other paths
+ // If we hit a T1 that is reasonably assumed to care about net neutrality, return the
+ // previous ASN.
+ let mut prev_asn = 0;
+ 'asn_candidates: for asn in primary_route.path_suffix.iter().rev() {
+ if *asn == 0 { continue 'asn_candidates; }
+ match *asn {
+ // Included: CenturyLink (L3), Cogent, Telia, NTT, GTT, Level3,
+ // GBLX (L3), Zayo, TI Sparkle Seabone, HE, Telefonica
+ // Left out from Caida top-20: TATA, PCCW, Vodafone, RETN, Orange, Telstra,
+ // Singtel, Rostelecom, DTAG
+ 209|174|1299|2914|3257|3356|3549|6461|6762|6939|12956 if prev_asn != 0 => return prev_asn,
+ _ => if path_vecs.iter().any(|route| !route.path_suffix.contains(asn)) {
+ if prev_asn != 0 { return prev_asn } else {
+ // Multi-origin prefix, just give up and take the last AS in the
+ // default path
+ break 'asn_candidates;
+ }
+ } else {
+ // We only ever possibly return an ASN if it appears in all paths
+ prev_asn = *asn;
+ },
}
}
- return *asn;
+ // All paths were the same, if the first ASN is non-0, return it.
+ if prev_asn != 0 {
+ return prev_asn;
+ }
}
- *primary_route.path.last().unwrap_or(&0)
+
+ for asn in primary_route.path_suffix.iter().rev() {
+ if *asn != 0 {
+ return *asn;
+ }
+ }
+ 0
+ }
+
+ pub fn get_path(&self, addr: IpAddr) -> (u8, [u32; PATH_SUFFIX_LEN]) {
+ let lock = self.routes.lock().unwrap();
+ let (prefixlen, mut path_vecs) = lock.get_route_attrs(addr);
+ if path_vecs.is_empty() { return (0, [0; PATH_SUFFIX_LEN]); }
+
+ path_vecs.sort_unstable_by(|path_a, path_b| {
+ path_a.pref.cmp(&path_b.pref)
+ .then(path_b.path_len.cmp(&path_a.path_len))
+ .then(path_b.med.cmp(&path_a.med))
+ });
+
+ let primary_route = path_vecs.pop().unwrap();
+ (prefixlen, primary_route.path_suffix)
}
pub fn disconnect(&self) {
}
}
if let Some(mut aspath) = as4_path.or(as_path) {
- let mut path = Vec::new();
+ let mut pathvec = Vec::new();
for seg in aspath.segments.drain(..) {
match seg {
- Segment::AS_SEQUENCE(mut asn) => path.append(&mut asn),
+ Segment::AS_SEQUENCE(mut asn) => pathvec.append(&mut asn),
Segment::AS_SET(_) => {}, // Ignore sets for now, they're not that common anyway
}
}
+ let path_len = pathvec.len() as u32;
+ pathvec.dedup_by(|a, b| (*a).eq(b)); // Drop prepends, cause we don't care in this case
+
+ let mut path_suffix = [0; PATH_SUFFIX_LEN];
+ for (idx, asn) in pathvec.iter().rev().enumerate() {
+ path_suffix[PATH_SUFFIX_LEN - idx - 1] = *asn;
+ if idx == PATH_SUFFIX_LEN - 1 { break; }
+ }
+
return Some(Route {
- path: path.clone(),
+ path_suffix,
+ path_len,
pref,
med,
})
} else { None }
}
- fn connect_given_client(addr: SocketAddr, timeout: Duration, printer: &'static Printer, client: Arc<BGPClient>) {
+ fn connect_given_client(remote_asn: u32, addr: SocketAddr, timeout: Duration, printer: &'static Printer, client: Arc<BGPClient>) {
tokio::spawn(Delay::new(Instant::now() + timeout / 4).then(move |_| {
let connect_timeout = Delay::new(Instant::now() + timeout.clone()).then(|_| {
future::err(std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout reached"))
future::err(())
})
}).and_then(move |stream| {
- let (write, read) = Framed::new(stream.0, MsgCoder(printer)).split();
+ let (write, read) = Framed::new(stream.0, MsgCoder(None)).split();
let (mut sender, receiver) = mpsc::channel(10); // We never really should send more than 10 messages unless they're dumb
tokio::spawn(write.sink_map_err(|_| { () }).send_all(receiver)
.then(|_| {
future::err(())
}));
+ let peer_asn = if remote_asn > u16::max_value() as u32 { 23456 } else { remote_asn as u16 };
let _ = sender.try_send(Message::Open(Open {
version: 4,
- peer_asn: 23456,
+ peer_asn,
hold_timer: timeout.as_secs() as u16,
- identifier: 0x453b1215, // 69.59.18.21
+ identifier: 0x453b1215, // 69.59.18.21. Note that you never actually need to change this.
parameters: vec![OpenParameter::Capabilities(vec![
OpenCapability::MultiProtocol((AFI::IPV4, SAFI::Unicast)),
OpenCapability::MultiProtocol((AFI::IPV6, SAFI::Unicast)),
- OpenCapability::FourByteASN(397444),
+ OpenCapability::FourByteASN(remote_asn),
OpenCapability::RouteRefresh,
OpenCapability::AddPath(vec![
(AFI::IPV4, SAFI::Unicast, AddPathDirection::ReceivePaths),
(AFI::IPV6, SAFI::Unicast, AddPathDirection::ReceivePaths)]),
- ])]
+ ])],
}));
TimeoutStream::new_persistent(read, timeout).for_each(move |bgp_msg| {
if client.shutdown.load(Ordering::Relaxed) {
route_table.withdraw(r);
}
if let Some(path) = Self::map_attrs(upd.attributes) {
- let path_arc = Arc::new(path);
for r in upd.announced_routes {
- route_table.announce(r, Arc::clone(&path_arc));
+ route_table.announce(r, path.clone());
}
}
printer.set_stat(Stat::V4RoutingTableSize(route_table.v4_table.len()));
printer.set_stat(Stat::V6RoutingTableSize(route_table.v6_table.len()));
+ printer.set_stat(Stat::RoutingTablePaths(route_table.max_paths));
},
_ => {}
}
})
}).then(move |_| {
if !client_reconn.shutdown.load(Ordering::Relaxed) {
- BGPClient::connect_given_client(addr, timeout, printer, client_reconn);
+ BGPClient::connect_given_client(remote_asn, addr, timeout, printer, client_reconn);
}
future::ok(())
})
);
}
- pub fn new(addr: SocketAddr, timeout: Duration, printer: &'static Printer) -> Arc<BGPClient> {
+ pub fn new(remote_asn: u32, addr: SocketAddr, timeout: Duration, printer: &'static Printer) -> Arc<BGPClient> {
let client = Arc::new(BGPClient {
routes: Mutex::new(RoutingTable::new()),
shutdown: AtomicBool::new(false),
});
- BGPClient::connect_given_client(addr, timeout, printer, Arc::clone(&client));
+ BGPClient::connect_given_client(remote_asn, addr, timeout, printer, Arc::clone(&client));
client
}
}