WIP tokio 1 conversion
[dnsseed-rust] / src / bgp_client.rs
1 use std::sync::{Arc, Mutex};
2 use std::sync::atomic::{AtomicBool, Ordering};
3 use std::cmp;
4 use std::collections::{HashMap, hash_map};
5 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr};
6 use std::time::{Duration, Instant};
7
8 use bgp_rs::{AFI, SAFI, AddPathDirection, Open, OpenCapability, OpenParameter, NLRIEncoding, PathAttribute};
9 use bgp_rs::Capabilities;
10 use bgp_rs::Segment;
11 use bgp_rs::Message;
12 use bgp_rs::Reader;
13
14 use tokio::net::TcpStream;
15 use tokio::time;
16
17 use crate::printer::{Printer, Stat};
18
19 const PATH_SUFFIX_LEN: usize = 3;
20 #[derive(Clone)]
21 struct Route { // 32 bytes with a path id u32
22         path_suffix: [u32; PATH_SUFFIX_LEN],
23         path_len: u32,
24         pref: u32,
25         med: u32,
26 }
27 #[allow(dead_code)]
28 const ROUTE_LEN: usize = 36 - std::mem::size_of::<(u32, Route)>();
29
30 // To keep memory tight (and since we dont' need such close alignment), newtype the v4/v6 routing
31 // table entries to make sure they are aligned to single bytes.
32
33 #[repr(packed)]
34 #[derive(PartialEq, Eq, Hash)]
35 struct V4Addr {
36         addr: [u8; 4],
37         pfxlen: u8,
38 }
39 impl From<(Ipv4Addr, u8)> for V4Addr {
40         fn from(p: (Ipv4Addr, u8)) -> Self {
41                 Self {
42                         addr: p.0.octets(),
43                         pfxlen: p.1,
44                 }
45         }
46 }
47 #[allow(dead_code)]
48 const V4_ALIGN: usize = 1 - std::mem::align_of::<V4Addr>();
49 #[allow(dead_code)]
50 const V4_SIZE: usize = 5 - std::mem::size_of::<V4Addr>();
51
52 #[repr(packed)]
53 #[derive(PartialEq, Eq, Hash)]
54 struct V6Addr {
55         addr: [u8; 16],
56         pfxlen: u8,
57 }
58 impl From<(Ipv6Addr, u8)> for V6Addr {
59         fn from(p: (Ipv6Addr, u8)) -> Self {
60                 Self {
61                         addr: p.0.octets(),
62                         pfxlen: p.1,
63                 }
64         }
65 }
66 #[allow(dead_code)]
67 const V6_ALIGN: usize = 1 - std::mem::align_of::<V6Addr>();
68 #[allow(dead_code)]
69 const V6_SIZE: usize = 17 - std::mem::size_of::<V6Addr>();
70
71 struct RoutingTable {
72         // We really want a HashMap for the values here, but they'll only ever contain a few entries,
73         // and Vecs are way more memory-effecient in that case.
74         v4_table: HashMap<V4Addr, Vec<(u32, Route)>>,
75         v6_table: HashMap<V6Addr, Vec<(u32, Route)>>,
76         max_paths: usize,
77         routes_with_max: usize,
78 }
79
80 impl RoutingTable {
81         fn new() -> Self {
82                 Self {
83                         v4_table: HashMap::with_capacity(900_000),
84                         v6_table: HashMap::with_capacity(100_000),
85                         max_paths: 0,
86                         routes_with_max: 0,
87                 }
88         }
89
90         fn get_route_attrs(&self, ip: IpAddr) -> (u8, Vec<&Route>) {
91                 macro_rules! lookup_res {
92                         ($addrty: ty, $addr: expr, $table: expr, $addr_bits: expr) => { {
93                                 //TODO: Optimize this (probably means making the tables btrees)!
94                                 let mut lookup = <$addrty>::from(($addr, $addr_bits));
95                                 for i in 0..$addr_bits {
96                                         if let Some(routes) = $table.get(&lookup) {
97                                                 if routes.len() > 0 {
98                                                         return (lookup.pfxlen, routes.iter().map(|v| &v.1).collect());
99                                                 }
100                                         }
101                                         lookup.addr[lookup.addr.len() - (i/8) - 1] &= !(1u8 << (i % 8));
102                                         lookup.pfxlen -= 1;
103                                 }
104                                 (0, vec![])
105                         } }
106                 }
107                 match ip {
108                         IpAddr::V4(v4a) => lookup_res!(V4Addr, v4a, self.v4_table, 32),
109                         IpAddr::V6(v6a) => lookup_res!(V6Addr, v6a, self.v6_table, 128)
110                 }
111         }
112
113         fn withdraw(&mut self, route: NLRIEncoding) {
114                 macro_rules! remove {
115                         ($rt: expr, $v: expr, $id: expr) => { {
116                                 match $rt.entry($v.into()) {
117                                         hash_map::Entry::Occupied(mut entry) => {
118                                                 if entry.get().len() == self.max_paths {
119                                                         self.routes_with_max -= 1;
120                                                         if self.routes_with_max == 0 {
121                                                                 self.max_paths = 0;
122                                                         }
123                                                 }
124                                                 entry.get_mut().retain(|e| e.0 != $id);
125                                                 if entry.get_mut().is_empty() {
126                                                         entry.remove();
127                                                 }
128                                         },
129                                         _ => {},
130                                 }
131                         } }
132                 }
133                 match route {
134                         NLRIEncoding::IP(p) => {
135                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
136                                 match ip {
137                                         IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), 0),
138                                         IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), 0),
139                                 }
140                         },
141                         NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
142                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
143                                 match ip {
144                                         IpAddr::V4(v4a) => remove!(self.v4_table, (v4a, len), id),
145                                         IpAddr::V6(v6a) => remove!(self.v6_table, (v6a, len), id),
146                                 }
147                         },
148                         NLRIEncoding::IP_MPLS(_) => (),
149                         NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
150                         NLRIEncoding::IP_VPN_MPLS(_) => (),
151                         NLRIEncoding::L2VPN(_) => (),
152                 };
153         }
154
155         fn announce(&mut self, prefix: NLRIEncoding, route: Route) {
156                 macro_rules! insert {
157                         ($rt: expr, $v: expr, $id: expr) => { {
158                                 let old_max_paths = self.max_paths;
159                                 let entry = $rt.entry($v.into()).or_insert_with(|| Vec::with_capacity(old_max_paths));
160                                 let entry_had_max = entry.len() == self.max_paths;
161                                 entry.retain(|e| e.0 != $id);
162                                 if entry_had_max {
163                                         entry.reserve_exact(1);
164                                 } else {
165                                         entry.reserve_exact(cmp::max(self.max_paths, entry.len() + 1) - entry.len());
166                                 }
167                                 entry.push(($id, route));
168                                 if entry.len() > self.max_paths {
169                                         self.max_paths = entry.len();
170                                         self.routes_with_max = 1;
171                                 } else if entry.len() == self.max_paths {
172                                         if !entry_had_max { self.routes_with_max += 1; }
173                                 }
174                         } }
175                 }
176                 match prefix {
177                         NLRIEncoding::IP(p) => {
178                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
179                                 match ip {
180                                         IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), 0),
181                                         IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), 0),
182                                 }
183                         },
184                         NLRIEncoding::IP_WITH_PATH_ID((p, id)) => {
185                                 let (ip, len) = <(IpAddr, u8)>::from(&p);
186                                 match ip {
187                                         IpAddr::V4(v4a) => insert!(self.v4_table, (v4a, len), id),
188                                         IpAddr::V6(v6a) => insert!(self.v6_table, (v6a, len), id),
189                                 }
190                         },
191                         NLRIEncoding::IP_MPLS(_) => (),
192                         NLRIEncoding::IP_MPLS_WITH_PATH_ID(_) => (),
193                         NLRIEncoding::IP_VPN_MPLS(_) => (),
194                         NLRIEncoding::L2VPN(_) => (),
195                 };
196         }
197 }
198
199 /*struct BytesCoder<'a>(&'a mut bytes::BytesMut);
200 impl<'a> std::io::Write for BytesCoder<'a> {
201         fn write(&mut self, b: &[u8]) -> Result<usize, std::io::Error> {
202                 self.0.extend_from_slice(&b);
203                 Ok(b.len())
204         }
205         fn flush(&mut self) -> Result<(), std::io::Error> {
206                 Ok(())
207         }
208 }
209 struct BytesDecoder<'a> {
210         buf: &'a mut bytes::BytesMut,
211         pos: usize,
212 }
213 impl<'a> std::io::Read for BytesDecoder<'a> {
214         fn read(&mut self, b: &mut [u8]) -> Result<usize, std::io::Error> {
215                 let copy_len = cmp::min(b.len(), self.buf.len() - self.pos);
216                 b[..copy_len].copy_from_slice(&self.buf[self.pos..self.pos + copy_len]);
217                 self.pos += copy_len;
218                 Ok(copy_len)
219         }
220 }
221
222 struct MsgCoder(Option<Capabilities>);
223 impl codec::Decoder for MsgCoder {
224         type Item = Message;
225         type Error = std::io::Error;
226
227         fn decode(&mut self, bytes: &mut bytes::BytesMut) -> Result<Option<Message>, std::io::Error> {
228                 let mut decoder = BytesDecoder {
229                         buf: bytes,
230                         pos: 0
231                 };
232                 let def_cap = Default::default();
233                 let mut reader = Reader {
234                         stream: &mut decoder,
235                         capabilities: if let Some(cap) = &self.0 { cap } else { &def_cap },
236                 };
237                 match reader.read() {
238                         Ok((_header, msg)) => {
239                                 decoder.buf.advance(decoder.pos);
240                                 if let Message::Open(ref o) = &msg {
241                                 }
242                                 Ok(Some(msg))
243                         },
244                         Err(e) => match e.kind() {
245                                 std::io::ErrorKind::UnexpectedEof => Ok(None),
246                                 _ => Err(e),
247                         },
248                 }
249         }
250 }
251 impl codec::Encoder for MsgCoder {
252         type Item = Message;
253         type Error = std::io::Error;
254
255         fn encode(&mut self, msg: Message, res: &mut bytes::BytesMut) -> Result<(), std::io::Error> {
256                 msg.encode(&mut BytesCoder(res))?;
257                 Ok(())
258         }
259 }*/
260
261 pub struct BGPClient {
262         routes: Mutex<RoutingTable>,
263         shutdown: AtomicBool,
264 }
265 impl BGPClient {
266         pub fn get_asn(&self, addr: IpAddr) -> u32 {
267                 let lock = self.routes.lock().unwrap();
268                 let mut path_vecs = lock.get_route_attrs(addr).1;
269                 if path_vecs.is_empty() { return 0; }
270
271                 path_vecs.sort_unstable_by(|path_a, path_b| {
272                         path_a.pref.cmp(&path_b.pref)
273                                 .then(path_b.path_len.cmp(&path_a.path_len))
274                                 .then(path_b.med.cmp(&path_a.med))
275                 });
276
277                 let primary_route = path_vecs.pop().unwrap();
278                 if path_vecs.len() > 3 {
279                         // If we have at least 3 paths, try to find the last unique ASN which doesn't show up in other paths
280                         // If we hit a T1 that is reasonably assumed to care about net neutrality, return the
281                         // previous ASN.
282                         let mut prev_asn = 0;
283                         'asn_candidates: for asn in primary_route.path_suffix.iter().rev() {
284                                 if *asn == 0 { continue 'asn_candidates; }
285                                 match *asn {
286                                         // Included: CenturyLink (L3), Cogent, Telia, NTT, GTT, Level3,
287                                         //           GBLX (L3), Zayo, TI Sparkle Seabone, HE, Telefonica
288                                         // Left out from Caida top-20: TATA, PCCW, Vodafone, RETN, Orange, Telstra,
289                                         //                             Singtel, Rostelecom, DTAG
290                                         209|174|1299|2914|3257|3356|3549|6461|6762|6939|12956 if prev_asn != 0 => return prev_asn,
291                                         _ => if path_vecs.iter().any(|route| !route.path_suffix.contains(asn)) {
292                                                 if prev_asn != 0 { return prev_asn } else {
293                                                         // Multi-origin prefix, just give up and take the last AS in the
294                                                         // default path
295                                                         break 'asn_candidates;
296                                                 }
297                                         } else {
298                                                 // We only ever possibly return an ASN if it appears in all paths
299                                                 prev_asn = *asn;
300                                         },
301                                 }
302                         }
303                         // All paths were the same, if the first ASN is non-0, return it.
304                         if prev_asn != 0 {
305                                 return prev_asn;
306                         }
307                 }
308
309                 for asn in primary_route.path_suffix.iter().rev() {
310                         if *asn != 0 {
311                                 return *asn;
312                         }
313                 }
314                 0
315         }
316
317         pub fn get_path(&self, addr: IpAddr) -> (u8, [u32; PATH_SUFFIX_LEN]) {
318                 let lock = self.routes.lock().unwrap();
319                 let (prefixlen, mut path_vecs) = lock.get_route_attrs(addr);
320                 if path_vecs.is_empty() { return (0, [0; PATH_SUFFIX_LEN]); }
321
322                 path_vecs.sort_unstable_by(|path_a, path_b| {
323                         path_a.pref.cmp(&path_b.pref)
324                                 .then(path_b.path_len.cmp(&path_a.path_len))
325                                 .then(path_b.med.cmp(&path_a.med))
326                 });
327
328                 let primary_route = path_vecs.pop().unwrap();
329                 (prefixlen, primary_route.path_suffix)
330         }
331
332         pub fn disconnect(&self) {
333                 self.shutdown.store(true, Ordering::Relaxed);
334         }
335
336         fn map_attrs(mut attrs: Vec<PathAttribute>) -> Option<Route> {
337                 let mut as4_path = None;
338                 let mut as_path = None;
339                 let mut pref = 100;
340                 let mut med = 0;
341                 for attr in attrs.drain(..) {
342                         match attr {
343                                 PathAttribute::AS4_PATH(path) => as4_path = Some(path),
344                                 PathAttribute::AS_PATH(path) => as_path = Some(path),
345                                 PathAttribute::LOCAL_PREF(p) => pref = p,
346                                 PathAttribute::MULTI_EXIT_DISC(m) => med = m,
347                                 _ => {},
348                         }
349                 }
350                 if let Some(mut aspath) = as4_path.or(as_path) {
351                         let mut pathvec = Vec::new();
352                         for seg in aspath.segments.drain(..) {
353                                 match seg {
354                                         Segment::AS_SEQUENCE(mut asn) => pathvec.append(&mut asn),
355                                         Segment::AS_SET(_) => {}, // Ignore sets for now, they're not that common anyway
356                                 }
357                         }
358                         let path_len = pathvec.len() as u32;
359                         pathvec.dedup_by(|a, b| (*a).eq(b)); // Drop prepends, cause we don't care in this case
360
361                         let mut path_suffix = [0; PATH_SUFFIX_LEN];
362                         for (idx, asn) in pathvec.iter().rev().enumerate() {
363                                 path_suffix[PATH_SUFFIX_LEN - idx - 1] = *asn;
364                                 if idx == PATH_SUFFIX_LEN - 1 { break; }
365                         }
366
367                         return Some(Route {
368                                 path_suffix,
369                                 path_len,
370                                 pref,
371                                 med,
372                         })
373                 } else { None }
374         }
375
376         async fn handle_peer(open_msg: Message, stream: TcpStream, timeout: Duration, printer: &'static Printer, client: Arc<BGPClient>) -> Result<(), std::io::Error> {
377                 let mut open_bytes = [0; 64];
378                         let len = {
379                                 let mut write_cursor = std::io::Cursor::new(&mut open_bytes);
380                                 open_msg.encode(&mut write_cursor);
381                                 write_cursor.position()
382                         };
383                         stream.write_all(&open_bytes[..len]).await?;
384                         let mut cap = Default::default();
385
386                         let mut readpending = Vec::new();
387                         let mut readbuf = [0; 8192];
388                         let mut msg_timeout = time::sleep(timeout);
389                         'read_loop: loop {
390                                 if client.shutdown.load(Ordering::Relaxed) {
391                                         return std::io::Error::new(std::io::ErrorKind::Other, "Shutting Down");
392                                 }
393                                 tokio::select! {
394                                         _ = msg_timeout => {
395                                                 return Err(std::io::Error::new(std::io::ErrorKind::TimedOut, "Keepalive expired"));
396                                         },
397                                         res = stream.read(&mut readbuf) => {
398                                                 let mut msg_opt = None;
399                                                 let bytecnt = res?;
400                                                 if readpending.is_empty() {
401                                                         let mut cursor = std::io::Cursor::new(&readbuf[..bytecnt]);
402                                                         let mut reader = Reader { stream: &mut cursor, capabilities: &cap };
403                                                         match reader.read() {
404                                                                 Ok((_header, newmsg)) => { readpending.append(&readbuf[cursor.position()..bytecnt]); newmsg = Some(msg_opt) },
405                                                                 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
406                                                                         readpending.append(&readbuf[..bytecnt]);
407                                                                         continue 'read_loop;
408                                                                 },
409                                                                 Err(e) => return Err(e),
410                                                         }
411                                                 } else { readpending.append(&readbuf[..bytecnt]); }
412                                                 loop {
413                                                         if msg_opt.is_none() {
414                                                                 let mut cursor = std::io::Cursor::new(&readpending);
415                                                                 let mut reader = Reader { stream: &mut cursor, capabilities: &cap };
416                                                                 match reader.read() {
417                                                                         Ok((_header, newmsg)) => { newmsg = Some(msg_opt) },
418                                                                         Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {},
419                                                                         Err(e) => return Err(e),
420                                                                 }
421                                                                 readpending = readpending.split_off(cursor.position());
422                                                         }
423                                                         if let Some(bgp_msg) = msg_opt.take() {
424                                                                 match bgp_msg {
425                                                                         Message::Open(o) => {
426                                                                                 cap = Capabilities::from_parameters(o.parameters.clone());
427                                                                                 client.routes.lock().unwrap().v4_table.clear();
428                                                                                 client.routes.lock().unwrap().v6_table.clear();
429                                                                                 printer.add_line("Connected to BGP route provider".to_string(), false);
430                                                                         },
431                                                                         Message::KeepAlive => {
432                                                                                 msg_timeout = time::sleep(timeout);
433                                                                                 //XXX: let _ = sender.try_send(Message::KeepAlive);
434                                                                         },
435                                                                         Message::Update(mut upd) => {
436                                                                                 upd.normalize();
437                                                                                 let mut route_table = client.routes.lock().unwrap();
438                                                                                 for r in upd.withdrawn_routes {
439                                                                                         route_table.withdraw(r);
440                                                                                 }
441                                                                                 if let Some(path) = Self::map_attrs(upd.attributes) {
442                                                                                         for r in upd.announced_routes {
443                                                                                                 route_table.announce(r, path.clone());
444                                                                                         }
445                                                                                 }
446                                                                                 printer.set_stat(Stat::V4RoutingTableSize(route_table.v4_table.len()));
447                                                                                 printer.set_stat(Stat::V6RoutingTableSize(route_table.v6_table.len()));
448                                                                                 printer.set_stat(Stat::RoutingTablePaths(route_table.max_paths));
449                                                                         },
450                                                                         _ => {}
451                                                                 }
452                                                         } else { break; }
453                                                 }
454                                         }
455                                 };
456                 }
457         }
458
459         fn connect_given_client(remote_asn: u32, addr: SocketAddr, timeout: Duration, printer: &'static Printer, client: Arc<BGPClient>) {
460                 tokio::spawn(async move {
461                         time::sleep(timeout / 4).await;
462
463                         let client_reconn = Arc::clone(&client);
464                         tokio::select! {
465                                 _ = time::sleep(timeout) => {
466                                         time::sleep(timeout / 2).await;
467                                 },
468                                 mut stream = TcpStream::connect(&addr) => {
469                                         let peer_asn = if remote_asn > u16::max_value() as u32 { 23456 } else { remote_asn as u16 };
470                                         let open_msg = Message::Open(Open {
471                                                 version: 4,
472                                                 peer_asn,
473                                                 hold_timer: timeout.as_secs() as u16,
474                                                 identifier: 0x453b1215, // 69.59.18.21. Note that you never actually need to change this.
475                                                 parameters: vec![OpenParameter::Capabilities(vec![
476                                                         OpenCapability::MultiProtocol((AFI::IPV4, SAFI::Unicast)),
477                                                         OpenCapability::MultiProtocol((AFI::IPV6, SAFI::Unicast)),
478                                                         OpenCapability::FourByteASN(remote_asn),
479                                                         OpenCapability::RouteRefresh,
480                                                         OpenCapability::AddPath(vec![
481                                                                 (AFI::IPV4, SAFI::Unicast, AddPathDirection::ReceivePaths),
482                                                                 (AFI::IPV6, SAFI::Unicast, AddPathDirection::ReceivePaths)]),
483                                                 ])],
484                                         });
485                                         let e = Self::handle_peer(open_msg, stream, printer, client);
486                                         printer.add_line(format!("Got error from BGP stream: {:?}", e), true);
487                                 }
488                         };
489                         if !client_reconn.shutdown.load(Ordering::Relaxed) {
490                                 BGPClient::connect_given_client(remote_asn, addr, timeout, printer, client_reconn);
491                         }
492                 });
493         }
494
495         pub fn new(remote_asn: u32, addr: SocketAddr, timeout: Duration, printer: &'static Printer) -> Arc<BGPClient> {
496                 let client = Arc::new(BGPClient {
497                         routes: Mutex::new(RoutingTable::new()),
498                         shutdown: AtomicBool::new(false),
499                 });
500                 BGPClient::connect_given_client(remote_asn, addr, timeout, printer, Arc::clone(&client));
501                 client
502         }
503 }