Fix AS path detection
[dnsseed-rust] / src / bgp_client.rs
1 use std::sync::{Arc, Mutex};
2 use std::sync::atomic::{AtomicBool, Ordering};
3 use std::cmp;
4 use std::collections::{HashMap, hash_map};
5 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
6 use std::time::{Duration, Instant};
7
8 use bgp_rs::{AFI, SAFI, AddPathDirection, Open, OpenCapability, OpenParameter, NLRIEncoding, PathAttribute};
9 use bgp_rs::Capabilities;
10 use bgp_rs::Segment;
11 use bgp_rs::Message;
12 use bgp_rs::Reader;
13
14 use tokio::prelude::*;
15 use tokio::codec;
16 use tokio::codec::Framed;
17 use tokio::net::TcpStream;
18 use tokio::timer::Delay;
19
20 use futures::sync::mpsc;
21
22 use crate::printer::{Printer, Stat};
23 use crate::timeout_stream::TimeoutStream;
24
25 const PATH_SUFFIX_LEN: usize = 3;
26 #[derive(Clone)]
27 struct Route { // 32 bytes with a path id u32
28         path_suffix: [u32; PATH_SUFFIX_LEN],
29         path_len: u32,
30         pref: u32,
31         med: u32,
32 }
33 #[allow(dead_code)]
34 const ROUTE_LEN: usize = 36 - std::mem::size_of::<(u32, Route)>();
35
36 // To keep memory tight (and since we dont' need such close alignment), newtype the v4/v6 routing
37 // table entries to make sure they are aligned to single bytes.
38
39 #[repr(packed)]
40 #[derive(PartialEq, Eq, Hash)]
41 struct V4Addr {
42         addr: [u8; 4],
43         pfxlen: u8,
44 }
45 impl From<(Ipv4Addr, u8)> for V4Addr {
46         fn from(p: (Ipv4Addr, u8)) -> Self {
47                 Self {
48                         addr: p.0.octets(),
49                         pfxlen: p.1,
50                 }
51         }
52 }
53 #[allow(dead_code)]
54 const V4_ALIGN: usize = 1 - std::mem::align_of::<V4Addr>();
55 #[allow(dead_code)]
56 const V4_SIZE: usize = 5 - std::mem::size_of::<V4Addr>();
57
58 #[repr(packed)]
59 #[derive(PartialEq, Eq, Hash)]
60 struct V6Addr {
61         addr: [u8; 16],
62         pfxlen: u8,
63 }
64 impl From<(Ipv6Addr, u8)> for V6Addr {
65         fn from(p: (Ipv6Addr, u8)) -> Self {
66                 Self {
67                         addr: p.0.octets(),
68                         pfxlen: p.1,
69                 }
70         }
71 }
72 #[allow(dead_code)]
73 const V6_ALIGN: usize = 1 - std::mem::align_of::<V6Addr>();
74 #[allow(dead_code)]
75 const V6_SIZE: usize = 17 - std::mem::size_of::<V6Addr>();
76
77 struct RoutingTable {
78         // We really want a HashMap for the values here, but they'll only ever contain a few entries,
79         // and Vecs are way more memory-effecient in that case.
80         v4_table: HashMap<V4Addr, Vec<(u32, Route)>>,
81         v6_table: HashMap<V6Addr, Vec<(u32, Route)>>,
82 }
83
84 impl RoutingTable {
85         fn new() -> Self {
86                 Self {
87                         v4_table: HashMap::with_capacity(900_000),
88                         v6_table: HashMap::with_capacity(100_000),
89                 }
90         }
91
92         fn get_route_attrs(&self, ip: IpAddr) -> (u8, Vec<&Route>) {
93                 macro_rules! lookup_res {
94                         ($addrty: ty, $addr: expr, $table: expr, $addr_bits: expr) => { {
95                                 //TODO: Optimize this (probably means making the tables btrees)!
96                                 let mut lookup = <$addrty>::from(($addr, $addr_bits));
97                                 for i in 0..$addr_bits {
98                                         if let Some(routes) = $table.get(&lookup) {
99                                                 if routes.len() > 0 {
100                                                         return (lookup.pfxlen, routes.iter().map(|v| &v.1).collect());
101                                                 }
102                                         }
103                                         lookup.addr[lookup.addr.len() - (i/8) - 1] &= !(1u8 << (i % 8));
104                                         lookup.pfxlen -= 1;
105                                 }
106                                 (0, vec![])
107                         } }
108                 }
109                 match ip {
110                         IpAddr::V4(v4a) => lookup_res!(V4Addr, v4a, self.v4_table, 32),
111                         IpAddr::V6(v6a) => lookup_res!(V6Addr, v6a, self.v6_table, 128)
112                 }
113         }
114
115         fn withdraw(&mut self, route: NLRIEncoding) {
116                 macro_rules! remove {
117                         ($rt: expr, $v: expr, $id: expr) => { {
118                                 match $rt.entry($v.into()) {
119                                         hash_map::Entry::Occupied(mut entry) => {
120                                                 entry.get_mut().retain(|e| e.0 != $id);
121                                                 if entry.get_mut().is_empty() {
122                                                         entry.remove();
123                                                 }
124                                         },
125                                         _ => {},
126                                 }
127                         } }
128                 }
129                 match route {
130                         NLRIEncoding::IP(p) => {
131                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
132                                 match ip {
133                                         IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), 0),
134                                         IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), 0),
135                                 }
136                         },
137                         NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
138                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
139                                 match ip {
140                                         IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), id),
141                                         IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), id),
142                                 }
143                         },
144                         NLRIEncoding::IP_MPLS(_) => (),
145                         NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
146                         NLRIEncoding::IP_VPN_MPLS(_) => (),
147                         NLRIEncoding::L2VPN(_) => (),
148                 };
149         }
150
151         fn announce(&mut self, prefix: NLRIEncoding, route: Route) {
152                 macro_rules! insert {
153                         ($rt: expr, $v: expr, $id: expr) => { {
154                                 let entry = $rt.entry($v.into()).or_insert(Vec::new());
155                                 entry.retain(|e| e.0 != $id);
156                                 entry.push(($id, route));
157                         } }
158                 }
159                 match prefix {
160                         NLRIEncoding::IP(p) => {
161                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
162                                 match ip {
163                                         IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), 0),
164                                         IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), 0),
165                                 }
166                         },
167                         NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
168                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
169                                 match ip {
170                                         IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), id),
171                                         IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), id),
172                                 }
173                         },
174                         NLRIEncoding::IP_MPLS(_) => (),
175                         NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
176                         NLRIEncoding::IP_VPN_MPLS(_) => (),
177                         NLRIEncoding::L2VPN(_) => (),
178                 };
179         }
180 }
181
182 struct BytesCoder<'a>(&'a mut bytes::BytesMut);
183 impl<'a> std::io::Write for BytesCoder<'a> {
184         fn write(&mut self, b: &[u8]) -> Result<usize, std::io::Error> {
185                 self.0.extend_from_slice(&b);
186                 Ok(b.len())
187         }
188         fn flush(&mut self) -> Result<(), std::io::Error> {
189                 Ok(())
190         }
191 }
192 struct BytesDecoder<'a> {
193         buf: &'a mut bytes::BytesMut,
194         pos: usize,
195 }
196 impl<'a> std::io::Read for BytesDecoder<'a> {
197         fn read(&mut self, b: &mut [u8]) -> Result<usize, std::io::Error> {
198                 let copy_len = cmp::min(b.len(), self.buf.len() - self.pos);
199                 b[..copy_len].copy_from_slice(&self.buf[self.pos..self.pos + copy_len]);
200                 self.pos += copy_len;
201                 Ok(copy_len)
202         }
203 }
204
205 struct MsgCoder(Option<Capabilities>);
206 impl codec::Decoder for MsgCoder {
207         type Item = Message;
208         type Error = std::io::Error;
209
210         fn decode(&mut self, bytes: &mut bytes::BytesMut) -> Result<Option<Message>, std::io::Error> {
211                 let mut decoder = BytesDecoder {
212                         buf: bytes,
213                         pos: 0
214                 };
215                 let def_cap = Default::default();
216                 let mut reader = Reader {
217                         stream: &mut decoder,
218                         capabilities: if let Some(cap) = &self.0 { cap } else { &def_cap },
219                 };
220                 match reader.read() {
221                         Ok((_header, msg)) => {
222                                 decoder.buf.advance(decoder.pos);
223                                 if let Message::Open(ref o) = &msg {
224                                         self.0 = Some(Capabilities::from_parameters(o.parameters.clone()));
225                                 }
226                                 Ok(Some(msg))
227                         },
228                         Err(e) => match e.kind() {
229                                 std::io::ErrorKind::UnexpectedEof => Ok(None),
230                                 _ => Err(e),
231                         },
232                 }
233         }
234 }
235 impl codec::Encoder for MsgCoder {
236         type Item = Message;
237         type Error = std::io::Error;
238
239         fn encode(&mut self, msg: Message, res: &mut bytes::BytesMut) -> Result<(), std::io::Error> {
240                 msg.encode(&mut BytesCoder(res))?;
241                 Ok(())
242         }
243 }
244
245 pub struct BGPClient {
246         routes: Mutex<RoutingTable>,
247         shutdown: AtomicBool,
248 }
249 impl BGPClient {
250         pub fn get_asn(&self, addr: IpAddr) -> u32 {
251                 let lock = self.routes.lock().unwrap();
252                 let mut path_vecs = lock.get_route_attrs(addr).1;
253                 if path_vecs.is_empty() { return 0; }
254
255                 path_vecs.sort_unstable_by(|path_a, path_b| {
256                         path_a.pref.cmp(&path_b.pref)
257                                 .then(path_b.path_len.cmp(&path_a.path_len))
258                                 .then(path_b.med.cmp(&path_a.med))
259                 });
260
261                 let primary_route = path_vecs.pop().unwrap();
262                 if path_vecs.len() > 3 {
263                         // If we have at least 3 paths, try to find the last unique ASN which doesn't show up in other paths
264                         // If we hit a T1 that is reasonably assumed to care about net neutrality, return the
265                         // previous ASN.
266                         let mut prev_asn = 0;
267                         'asn_candidates: for asn in primary_route.path_suffix.iter().rev() {
268                                 if *asn == 0 { continue 'asn_candidates; }
269                                 match *asn {
270                                         // Included: CenturyLink (L3), Cogent, Telia, NTT, GTT, Level3,
271                                         //           GBLX (L3), Zayo, TI Sparkle Seabone, HE, Telefonica
272                                         // Left out from Caida top-20: TATA, PCCW, Vodafone, RETN, Orange, Telstra,
273                                         //                             Singtel, Rostelecom, DTAG
274                                         209|174|1299|2914|3257|3356|3549|6461|6762|6939|12956 if prev_asn != 0 => return prev_asn,
275                                         _ => if path_vecs.iter().any(|route| !route.path_suffix.contains(asn)) {
276                                                 if prev_asn != 0 { return prev_asn } else {
277                                                         // Multi-origin prefix, just give up and take the last AS in the
278                                                         // default path
279                                                         break 'asn_candidates;
280                                                 }
281                                         } else {
282                                                 // We only ever possibly return an ASN if it appears in all paths
283                                                 prev_asn = *asn;
284                                         },
285                                 }
286                         }
287                         // All paths were the same, if the first ASN is non-0, return it.
288                         if prev_asn != 0 {
289                                 return prev_asn;
290                         }
291                 }
292
293                 for asn in primary_route.path_suffix.iter().rev() {
294                         if *asn != 0 {
295                                 return *asn;
296                         }
297                 }
298                 0
299         }
300
301         pub fn get_path(&self, addr: IpAddr) -> (u8, [u32; PATH_SUFFIX_LEN]) {
302                 let lock = self.routes.lock().unwrap();
303                 let (prefixlen, mut path_vecs) = lock.get_route_attrs(addr);
304                 if path_vecs.is_empty() { return (0, [0; PATH_SUFFIX_LEN]); }
305
306                 path_vecs.sort_unstable_by(|path_a, path_b| {
307                         path_a.pref.cmp(&path_b.pref)
308                                 .then(path_b.path_len.cmp(&path_a.path_len))
309                                 .then(path_b.med.cmp(&path_a.med))
310                 });
311
312                 let primary_route = path_vecs.pop().unwrap();
313                 (prefixlen, primary_route.path_suffix)
314         }
315
316         pub fn disconnect(&self) {
317                 self.shutdown.store(true, Ordering::Relaxed);
318         }
319
320         fn map_attrs(mut attrs: Vec<PathAttribute>) -> Option<Route> {
321                 let mut as4_path = None;
322                 let mut as_path = None;
323                 let mut pref = 100;
324                 let mut med = 0;
325                 for attr in attrs.drain(..) {
326                         match attr {
327                                 PathAttribute::AS4_PATH(path) => as4_path = Some(path),
328                                 PathAttribute::AS_PATH(path) => as_path = Some(path),
329                                 PathAttribute::LOCAL_PREF(p) => pref = p,
330                                 PathAttribute::MULTI_EXIT_DISC(m) => med = m,
331                                 _ => {},
332                         }
333                 }
334                 if let Some(mut aspath) = as4_path.or(as_path) {
335                         let mut pathvec = Vec::new();
336                         for seg in aspath.segments.drain(..) {
337                                 match seg {
338                                         Segment::AS_SEQUENCE(mut asn) => pathvec.append(&mut asn),
339                                         Segment::AS_SET(_) => {}, // Ignore sets for now, they're not that common anyway
340                                 }
341                         }
342                         let path_len = pathvec.len() as u32;
343                         pathvec.dedup_by(|a, b| (*a).eq(b)); // Drop prepends, cause we don't care in this case
344
345                         let mut path_suffix = [0; PATH_SUFFIX_LEN];
346                         for (idx, asn) in pathvec.iter().rev().enumerate() {
347                                 path_suffix[PATH_SUFFIX_LEN - idx - 1] = *asn;
348                                 if idx == PATH_SUFFIX_LEN - 1 { break; }
349                         }
350
351                         return Some(Route {
352                                 path_suffix,
353                                 path_len,
354                                 pref,
355                                 med,
356                         })
357                 } else { None }
358         }
359
360         fn connect_given_client(addr: SocketAddr, timeout: Duration, printer: &'static Printer, client: Arc<BGPClient>) {
361                 tokio::spawn(Delay::new(Instant::now() + timeout / 4).then(move |_| {
362                         let connect_timeout = Delay::new(Instant::now() + timeout.clone()).then(|_| {
363                                 future::err(std::io::Error::new(std::io::ErrorKind::TimedOut, "timeout reached"))
364                         });
365                         let client_reconn = Arc::clone(&client);
366                         TcpStream::connect(&addr).select(connect_timeout)
367                                 .or_else(move |_| {
368                                         Delay::new(Instant::now() + timeout / 2).then(|_| {
369                                                 future::err(())
370                                         })
371                                 }).and_then(move |stream| {
372                                         let (write, read) = Framed::new(stream.0, MsgCoder(None)).split();
373                                         let (mut sender, receiver) = mpsc::channel(10); // We never really should send more than 10 messages unless they're dumb
374                                         tokio::spawn(write.sink_map_err(|_| { () }).send_all(receiver)
375                                                 .then(|_| {
376                                                         future::err(())
377                                                 }));
378                                         let _ = sender.try_send(Message::Open(Open {
379                                                 version: 4,
380                                                 peer_asn: 23456,
381                                                 hold_timer: timeout.as_secs() as u16,
382                                                 identifier: 0x453b1215, // 69.59.18.21
383                                                 parameters: vec![OpenParameter::Capabilities(vec![
384                                                         OpenCapability::MultiProtocol((AFI::IPV4, SAFI::Unicast)),
385                                                         OpenCapability::MultiProtocol((AFI::IPV6, SAFI::Unicast)),
386                                                         OpenCapability::FourByteASN(397444),
387                                                         OpenCapability::RouteRefresh,
388                                                         OpenCapability::AddPath(vec![
389                                                                 (AFI::IPV4, SAFI::Unicast, AddPathDirection::ReceivePaths),
390                                                                 (AFI::IPV6, SAFI::Unicast, AddPathDirection::ReceivePaths)]),
391                                                 ])],
392                                         }));
393                                         TimeoutStream::new_persistent(read, timeout).for_each(move |bgp_msg| {
394                                                 if client.shutdown.load(Ordering::Relaxed) {
395                                                         return future::err(std::io::Error::new(std::io::ErrorKind::Other, "Shutting Down"));
396                                                 }
397                                                 match bgp_msg {
398                                                         Message::Open(_) => {
399                                                                 client.routes.lock().unwrap().v4_table.clear();
400                                                                 client.routes.lock().unwrap().v6_table.clear();
401                                                                 printer.add_line("Connected to BGP route provider".to_string(), false);
402                                                         },
403                                                         Message::KeepAlive => {
404                                                                 let _ = sender.try_send(Message::KeepAlive);
405                                                         },
406                                                         Message::Update(mut upd) => {
407                                                                 upd.normalize();
408                                                                 let mut route_table = client.routes.lock().unwrap();
409                                                                 for r in upd.withdrawn_routes {
410                                                                         route_table.withdraw(r);
411                                                                 }
412                                                                 if let Some(path) = Self::map_attrs(upd.attributes) {
413                                                                         for r in upd.announced_routes {
414                                                                                 route_table.announce(r, path.clone());
415                                                                         }
416                                                                 }
417                                                                 printer.set_stat(Stat::V4RoutingTableSize(route_table.v4_table.len()));
418                                                                 printer.set_stat(Stat::V6RoutingTableSize(route_table.v6_table.len()));
419                                                         },
420                                                         _ => {}
421                                                 }
422                                                 future::ok(())
423                                         }).or_else(move |e| {
424                                                 printer.add_line(format!("Got error from BGP stream: {:?}", e), true);
425                                                 future::ok(())
426                                         })
427                                 }).then(move |_| {
428                                         if !client_reconn.shutdown.load(Ordering::Relaxed) {
429                                                 BGPClient::connect_given_client(addr, timeout, printer, client_reconn);
430                                         }
431                                         future::ok(())
432                                 })
433                         })
434                 );
435         }
436
437         pub fn new(addr: SocketAddr, timeout: Duration, printer: &'static Printer) -> Arc<BGPClient> {
438                 let client = Arc::new(BGPClient {
439                         routes: Mutex::new(RoutingTable::new()),
440                         shutdown: AtomicBool::new(false),
441                 });
442                 BGPClient::connect_given_client(addr, timeout, printer, Arc::clone(&client));
443                 client
444         }
445 }