_ Git - rust-lightning/blob - lightning/src/util/base32.rs

   1 // This is a modification of base32 encoding to support the zbase32 alphabet.
   2 // The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
   3 // The original portions of this software are Copyright (c) 2015 The base32 Developers
   4
   5 // This file is licensed under either of
   6 // Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
   7 // MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) at your option.
   8
   9
  10 use crate::prelude::*;
  11
  12 /// RFC4648 encoding table
  13 const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
  14
  15 /// Zbase encoding alphabet
  16 const ZBASE_ALPHABET: &'static [u8] = b"ybndrfg8ejkmcpqxot1uwisza345h769";
  17
  18 /// RFC4648 decoding table
  19 const RFC4648_INV_ALPHABET: [i8; 43] = [
  20         -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
  21         9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
  22 ];
  23
  24 /// Zbase decoding table
  25 const ZBASE_INV_ALPHABET: [i8; 43] = [
  26         -1, 18, -1, 25, 26, 27, 30, 29, 7, 31, -1, -1, -1, -1, -1, -1, -1, 24, 1, 12, 3, 8, 5, 6, 28,
  27         21, 9, 10, -1, 11, 2, 16, 13, 14, 4, 22, 17, 19, -1, 20, 15, 0, 23,
  28 ];
  29
  30 /// Alphabet used for encoding and decoding.
  31 #[derive(Copy, Clone)]
  32 pub enum Alphabet {
  33         /// RFC4648 encoding.
  34         RFC4648 {
  35                 /// Whether to use padding.
  36                 padding: bool
  37         },
  38         /// Zbase32 encoding.
  39         ZBase32
  40 }
  41
  42 impl Alphabet {
  43         /// Encode bytes into a base32 string.
  44         pub fn encode(&self, data: &[u8]) -> String {
  45                 // output_length is calculated as follows:
  46                 // / 5 divides the data length by the number of bits per chunk (5),
  47                 // * 8 multiplies the result by the number of characters per chunk (8).
  48                 // + 4 rounds up to the nearest character.
  49                 let output_length = (data.len() * 8 + 4) / 5;
  50                 let mut ret = match self {
  51                         Self::RFC4648 { padding } => {
  52                                 let mut ret = Self::encode_data(data, RFC4648_ALPHABET);
  53                                 if *padding {
  54                                         let len = ret.len();
  55                                         for i in output_length..len {
  56                                                 ret[i] = b'=';
  57                                         }
  58
  59                                         return String::from_utf8(ret).expect("Invalid UTF-8");
  60                                 }
  61                                 ret
  62                         },
  63                         Self::ZBase32 => {
  64                                 Self::encode_data(data, ZBASE_ALPHABET)
  65                         },
  66                 };
  67                 ret.truncate(output_length);
  68
  69                 #[cfg(fuzzing)]
  70                 assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);
  71
  72                 String::from_utf8(ret).expect("Invalid UTF-8")
  73         }
  74
  75         /// Decode a base32 string into a byte vector.
  76         pub fn decode(&self, data: &str) -> Result<Vec<u8>, ()> {
  77                 let data = data.as_bytes();
  78                 let (data, alphabet) = match self {
  79                         Self::RFC4648 { padding } => {
  80                                 let mut unpadded_data_length = data.len();
  81                                 if *padding {
  82                                         if data.len() % 8 != 0 { return Err(()); }
  83                                         data.iter().rev().take(6).for_each(|&c| {
  84                                                 if c == b'=' {
  85                                                         unpadded_data_length -= 1;
  86                                                 }
  87                                         });
  88                                 }
  89                                 (&data[..unpadded_data_length], RFC4648_INV_ALPHABET)
  90                         },
  91                         Self::ZBase32 => {
  92                                 (data, ZBASE_INV_ALPHABET)
  93                         }
  94                 };
  95                 // If the string has more characters than are required to alphabet_encode the number of bytes
  96                 // decodable, treat the string as invalid.
  97                 match data.len() % 8 { 1|3|6 => return Err(()), _ => {} }
  98                 Ok(Self::decode_data(data, alphabet)?)
  99         }
 100
 101         /// Encode a byte slice into a base32 string.
 102         fn encode_data(data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
 103                 // cap is calculated as follows:
 104                 // / 5 divides the data length by the number of bits per chunk (5),
 105                 // * 8 multiplies the result by the number of characters per chunk (8).
 106                 // + 4 rounds up to the nearest character.
 107                 let cap = (data.len() + 4) / 5 * 8;
 108                 let mut ret = Vec::with_capacity(cap);
 109                 for chunk in data.chunks(5) {
 110                         let mut buf = [0u8; 5];
 111                         for (i, &b) in chunk.iter().enumerate() {
 112                                 buf[i] = b;
 113                         }
 114                         ret.push(alphabet[((buf[0] & 0xF8) >> 3) as usize]);
 115                         ret.push(alphabet[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize]);
 116                         ret.push(alphabet[((buf[1] & 0x3E) >> 1) as usize]);
 117                         ret.push(alphabet[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize]);
 118                         ret.push(alphabet[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize]);
 119                         ret.push(alphabet[((buf[3] & 0x7C) >> 2) as usize]);
 120                         ret.push(alphabet[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize]);
 121                         ret.push(alphabet[(buf[4] & 0x1F) as usize]);
 122                 }
 123                 #[cfg(fuzzing)]
 124                 assert_eq!(ret.capacity(), cap);
 125
 126                 ret
 127         }
 128
 129         fn decode_data(data: &[u8], alphabet: [i8; 43]) -> Result<Vec<u8>, ()> {
 130                 // cap is calculated as follows:
 131                 // / 8 divides the data length by the number of characters per chunk (8),
 132                 // * 5 multiplies the result by the number of bits per chunk (5),
 133                 // + 7 rounds up to the nearest byte.
 134                 let cap = (data.len() + 7) / 8 * 5;
 135                 let mut ret = Vec::with_capacity(cap);
 136                 for chunk in data.chunks(8) {
 137                         let mut buf = [0u8; 8];
 138                         for (i, &c) in chunk.iter().enumerate() {
 139                                 match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
 140                                         Some(&-1) | None => return Err(()),
 141                                         Some(&value) => buf[i] = value as u8,
 142                                 };
 143                         }
 144                         ret.push((buf[0] << 3) | (buf[1] >> 2));
 145                         ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
 146                         ret.push((buf[3] << 4) | (buf[4] >> 1));
 147                         ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
 148                         ret.push((buf[6] << 5) | buf[7]);
 149                 }
 150                 let output_length = data.len() * 5 / 8;
 151                 for c in ret.drain(output_length..) {
 152                         if c != 0 {
 153                                 // If the original string had any bits set at positions outside of the encoded data,
 154                                 // treat the string as invalid.
 155                                 return Err(());
 156                         }
 157                 }
 158
 159                 // Check that our capacity calculation doesn't under-shoot in fuzzing
 160                 #[cfg(fuzzing)]
 161                 assert_eq!(ret.capacity(), cap);
 162                 Ok(ret)
 163         }
 164 }
 165
 166 #[cfg(test)]
 167 mod tests {
 168         use super::*;
 169
 170         const ZBASE32_TEST_DATA: &[(&str, &[u8])] = &[
 171                 ("", &[]),
 172                 ("yy", &[0x00]),
 173                 ("oy", &[0x80]),
 174                 ("tqrey", &[0x8b, 0x88, 0x80]),
 175                 ("6n9hq", &[0xf0, 0xbf, 0xc7]),
 176                 ("4t7ye", &[0xd4, 0x7a, 0x04]),
 177                 ("6im5sdy", &[0xf5, 0x57, 0xbb, 0x0c]),
 178                 ("ybndrfg8ejkmcpqxot1uwisza345h769", &[0x00, 0x44, 0x32, 0x14, 0xc7, 0x42, 0x54, 0xb6,
 179                 0x35, 0xcf, 0x84, 0x65, 0x3a, 0x56, 0xd7, 0xc6,
 180                 0x75, 0xbe, 0x77, 0xdf])
 181         ];
 182
 183         #[test]
 184         fn test_zbase32_encode() {
 185                 for &(zbase32, data) in ZBASE32_TEST_DATA {
 186                         assert_eq!(Alphabet::ZBase32.encode(data), zbase32);
 187                 }
 188         }
 189
 190         #[test]
 191         fn test_zbase32_decode() {
 192                 for &(zbase32, data) in ZBASE32_TEST_DATA {
 193                         assert_eq!(Alphabet::ZBase32.decode(zbase32).unwrap(), data);
 194                 }
 195         }
 196
 197         #[test]
 198         fn test_decode_wrong() {
 199                 const WRONG_DATA: &[&str] = &["00", "l1", "?", "="];
 200                 for &data in WRONG_DATA {
 201                         match Alphabet::ZBase32.decode(data) {
 202                                 Ok(_) => assert!(false, "Data shouldn't be decodable"),
 203                                 Err(_) => assert!(true),
 204                         }
 205                 }
 206         }
 207
 208         const RFC4648_NON_PADDED_TEST_VECTORS: &[(&[u8], &[u8])] = &[
 209                 (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
 210                 (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
 211                 (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
 212                 (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
 213         ];
 214
 215         const RFC4648_TEST_VECTORS: &[(&[u8], &str)] = &[
 216                 (b"", ""),
 217                 (b"f", "MY======"),
 218                 (b"fo", "MZXQ===="),
 219                 (b"foo", "MZXW6==="),
 220                 (b"foob", "MZXW6YQ="),
 221                 (b"fooba", "MZXW6YTB"),
 222                 (b"foobar", "MZXW6YTBOI======"),
 223                 (&[0xF8, 0x3E, 0x7F, 0x83], "7A7H7AY="),
 224         ];
 225
 226         #[test]
 227         fn test_rfc4648_encode() {
 228                 for (input, encoded) in RFC4648_TEST_VECTORS {
 229                         assert_eq!(&Alphabet::RFC4648 { padding: true }.encode(input), encoded);
 230                 }
 231
 232                 for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
 233                         assert_eq!(&Alphabet::RFC4648 { padding: false }.encode(input).as_bytes(), encoded);
 234                 }
 235         }
 236
 237         #[test]
 238         fn test_rfc4648_decode() {
 239                 for (input, encoded) in RFC4648_TEST_VECTORS {
 240                         let res = &Alphabet::RFC4648 { padding: true }.decode(encoded).unwrap();
 241                         assert_eq!(&res[..], &input[..]);
 242                 }
 243
 244                 for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
 245                         let res = &Alphabet::RFC4648 { padding: false }.decode(std::str::from_utf8(encoded).unwrap()).unwrap();
 246                         assert_eq!(&res[..], &input[..]);
 247                 }
 248         }
 249
 250         #[test]
 251         fn padding() {
 252                 let num_padding = [0, 6, 4, 3, 1];
 253                 for i in 1..6 {
 254                         let encoded = Alphabet::RFC4648 { padding: true }.encode(
 255                                 (0..(i as u8)).collect::<Vec<u8>>().as_ref()
 256                         );
 257                         assert_eq!(encoded.len(), 8);
 258                         for j in 0..(num_padding[i % 5]) {
 259                                 assert_eq!(encoded.as_bytes()[encoded.len() - j - 1], b'=');
 260                         }
 261                         for j in 0..(8 - num_padding[i % 5]) {
 262                                 assert!(encoded.as_bytes()[j] != b'=');
 263                         }
 264                 }
 265         }
 266
 267         #[test]
 268         fn test_decode_rfc4648_errors() {
 269                 assert!(Alphabet::RFC4648 { padding: false }.decode("abc2def===").is_err()); // Invalid char because padding is disabled
 270                 assert!(Alphabet::RFC4648 { padding: true }.decode("abc2def===").is_err()); // Invalid length
 271                 assert!(Alphabet::RFC4648 { padding: true }.decode("MZX=6YTB").is_err()); // Invalid char
 272         }
 273 }