git.bitcoin.ninja Git - rust-lightning/blob - lightning/src/util/base32.rs

   1 // This is a modification of base32 encoding to support the zbase32 alphabet.
   2 // The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
   3 // The original portions of this software are Copyright (c) 2015 The base32 Developers
   4
   5 // This file is licensed under either of
   6 // Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
   7 // MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) at your option.
   8
   9
  10 use crate::prelude::*;
  11
  12 /// RFC4648 encoding table
  13 const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
  14
  15 /// RFC4648 decoding table
  16 const RFC4648_INV_ALPHABET: [i8; 43] = [
  17         -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
  18         9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
  19 ];
  20
  21 /// Alphabet used for encoding and decoding.
  22 #[derive(Copy, Clone)]
  23 pub enum Alphabet {
  24         /// RFC4648 encoding.
  25         RFC4648 {
  26                 /// Whether to use padding.
  27                 padding: bool
  28         }
  29 }
  30
  31 impl Alphabet {
  32         /// Encode bytes into a base32 string.
  33         pub fn encode(&self, data: &[u8]) -> String {
  34                 // output_length is calculated as follows:
  35                 // / 5 divides the data length by the number of bits per chunk (5),
  36                 // * 8 multiplies the result by the number of characters per chunk (8).
  37                 // + 4 rounds up to the nearest character.
  38                 let output_length = (data.len() * 8 + 4) / 5;
  39                 let mut ret = match self {
  40                         Self::RFC4648 { padding } => {
  41                                 let mut ret = Self::encode_data(data, RFC4648_ALPHABET);
  42                                 if *padding {
  43                                         let len = ret.len();
  44                                         for i in output_length..len {
  45                                                 ret[i] = b'=';
  46                                         }
  47
  48                                         return String::from_utf8(ret).expect("Invalid UTF-8");
  49                                 }
  50                                 ret
  51                         }
  52                 };
  53                 ret.truncate(output_length);
  54
  55                 #[cfg(fuzzing)]
  56                 assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);
  57
  58                 String::from_utf8(ret).expect("Invalid UTF-8")
  59         }
  60
  61         /// Decode a base32 string into a byte vector.
  62         pub fn decode(&self, data: &str) -> Result<Vec<u8>, ()> {
  63                 let data = data.as_bytes();
  64                 let (data, alphabet) = match self {
  65                         Self::RFC4648 { padding } => {
  66                                 let mut unpadded_data_length = data.len();
  67                                 if *padding {
  68                                         if data.len() % 8 != 0 { return Err(()); }
  69                                         data.iter().rev().take(6).for_each(|&c| {
  70                                                 if c == b'=' {
  71                                                         unpadded_data_length -= 1;
  72                                                 }
  73                                         });
  74                                 }
  75                                 (&data[..unpadded_data_length], RFC4648_INV_ALPHABET)
  76                         }
  77                 };
  78                 // If the string has more characters than are required to alphabet_encode the number of bytes
  79                 // decodable, treat the string as invalid.
  80                 match data.len() % 8 { 1|3|6 => return Err(()), _ => {} }
  81                 Ok(Self::decode_data(data, alphabet)?)
  82         }
  83
  84         /// Encode a byte slice into a base32 string.
  85         fn encode_data(data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
  86                 // cap is calculated as follows:
  87                 // / 5 divides the data length by the number of bits per chunk (5),
  88                 // * 8 multiplies the result by the number of characters per chunk (8).
  89                 // + 4 rounds up to the nearest character.
  90                 let cap = (data.len() + 4) / 5 * 8;
  91                 let mut ret = Vec::with_capacity(cap);
  92                 for chunk in data.chunks(5) {
  93                         let mut buf = [0u8; 5];
  94                         for (i, &b) in chunk.iter().enumerate() {
  95                                 buf[i] = b;
  96                         }
  97                         ret.push(alphabet[((buf[0] & 0xF8) >> 3) as usize]);
  98                         ret.push(alphabet[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize]);
  99                         ret.push(alphabet[((buf[1] & 0x3E) >> 1) as usize]);
 100                         ret.push(alphabet[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize]);
 101                         ret.push(alphabet[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize]);
 102                         ret.push(alphabet[((buf[3] & 0x7C) >> 2) as usize]);
 103                         ret.push(alphabet[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize]);
 104                         ret.push(alphabet[(buf[4] & 0x1F) as usize]);
 105                 }
 106                 #[cfg(fuzzing)]
 107                 assert_eq!(ret.capacity(), cap);
 108
 109                 ret
 110         }
 111
 112         fn decode_data(data: &[u8], alphabet: [i8; 43]) -> Result<Vec<u8>, ()> {
 113                 // cap is calculated as follows:
 114                 // / 8 divides the data length by the number of characters per chunk (8),
 115                 // * 5 multiplies the result by the number of bits per chunk (5),
 116                 // + 7 rounds up to the nearest byte.
 117                 let cap = (data.len() + 7) / 8 * 5;
 118                 let mut ret = Vec::with_capacity(cap);
 119                 for chunk in data.chunks(8) {
 120                         let mut buf = [0u8; 8];
 121                         for (i, &c) in chunk.iter().enumerate() {
 122                                 match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
 123                                         Some(&-1) | None => return Err(()),
 124                                         Some(&value) => buf[i] = value as u8,
 125                                 };
 126                         }
 127                         ret.push((buf[0] << 3) | (buf[1] >> 2));
 128                         ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
 129                         ret.push((buf[3] << 4) | (buf[4] >> 1));
 130                         ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
 131                         ret.push((buf[6] << 5) | buf[7]);
 132                 }
 133                 let output_length = data.len() * 5 / 8;
 134                 for c in ret.drain(output_length..) {
 135                         if c != 0 {
 136                                 // If the original string had any bits set at positions outside of the encoded data,
 137                                 // treat the string as invalid.
 138                                 return Err(());
 139                         }
 140                 }
 141
 142                 // Check that our capacity calculation doesn't under-shoot in fuzzing
 143                 #[cfg(fuzzing)]
 144                 assert_eq!(ret.capacity(), cap);
 145                 Ok(ret)
 146         }
 147 }
 148
 149 #[cfg(test)]
 150 mod tests {
 151         use super::*;
 152
 153         const RFC4648_NON_PADDED_TEST_VECTORS: &[(&[u8], &[u8])] = &[
 154                 (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
 155                 (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
 156                 (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
 157                 (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
 158         ];
 159
 160         const RFC4648_TEST_VECTORS: &[(&[u8], &str)] = &[
 161                 (b"", ""),
 162                 (b"f", "MY======"),
 163                 (b"fo", "MZXQ===="),
 164                 (b"foo", "MZXW6==="),
 165                 (b"foob", "MZXW6YQ="),
 166                 (b"fooba", "MZXW6YTB"),
 167                 (b"foobar", "MZXW6YTBOI======"),
 168                 (&[0xF8, 0x3E, 0x7F, 0x83], "7A7H7AY="),
 169         ];
 170
 171         #[test]
 172         fn test_rfc4648_encode() {
 173                 for (input, encoded) in RFC4648_TEST_VECTORS {
 174                         assert_eq!(&Alphabet::RFC4648 { padding: true }.encode(input), encoded);
 175                 }
 176
 177                 for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
 178                         assert_eq!(&Alphabet::RFC4648 { padding: false }.encode(input).as_bytes(), encoded);
 179                 }
 180         }
 181
 182         #[test]
 183         fn test_rfc4648_decode() {
 184                 for (input, encoded) in RFC4648_TEST_VECTORS {
 185                         let res = &Alphabet::RFC4648 { padding: true }.decode(encoded).unwrap();
 186                         assert_eq!(&res[..], &input[..]);
 187                 }
 188
 189                 for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
 190                         let res = &Alphabet::RFC4648 { padding: false }.decode(std::str::from_utf8(encoded).unwrap()).unwrap();
 191                         assert_eq!(&res[..], &input[..]);
 192                 }
 193         }
 194
 195         #[test]
 196         fn padding() {
 197                 let num_padding = [0, 6, 4, 3, 1];
 198                 for i in 1..6 {
 199                         let encoded = Alphabet::RFC4648 { padding: true }.encode(
 200                                 (0..(i as u8)).collect::<Vec<u8>>().as_ref()
 201                         );
 202                         assert_eq!(encoded.len(), 8);
 203                         for j in 0..(num_padding[i % 5]) {
 204                                 assert_eq!(encoded.as_bytes()[encoded.len() - j - 1], b'=');
 205                         }
 206                         for j in 0..(8 - num_padding[i % 5]) {
 207                                 assert!(encoded.as_bytes()[j] != b'=');
 208                         }
 209                 }
 210         }
 211
 212         #[test]
 213         fn test_decode_rfc4648_errors() {
 214                 assert!(Alphabet::RFC4648 { padding: false }.decode("abc2def===").is_err()); // Invalid char because padding is disabled
 215                 assert!(Alphabet::RFC4648 { padding: true }.decode("abc2def===").is_err()); // Invalid length
 216                 assert!(Alphabet::RFC4648 { padding: true }.decode("MZX=6YTB").is_err()); // Invalid char
 217         }
 218 }