Add RFC4648 base32 `encode` and `decode` functions
authorjbesraa <jbesraa@gmail.com>
Fri, 25 Aug 2023 02:52:38 +0000 (05:52 +0300)
committerjbesraa <jbesraa@gmail.com>
Wed, 6 Sep 2023 12:07:25 +0000 (15:07 +0300)
fuzz/src/base32.rs [new file with mode: 0644]
fuzz/src/bin/base32_target.rs [new file with mode: 0644]
fuzz/src/bin/gen_target.sh
fuzz/src/lib.rs
fuzz/targets.h
lightning/src/util/base32.rs [new file with mode: 0644]
lightning/src/util/mod.rs

diff --git a/fuzz/src/base32.rs b/fuzz/src/base32.rs
new file mode 100644 (file)
index 0000000..8171f19
--- /dev/null
@@ -0,0 +1,52 @@
+// This file is Copyright its original authors, visible in version control
+// history.
+//
+// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
+// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
+// You may not use this file except in accordance with one or both of these
+// licenses.
+
+use lightning::util::base32;
+
+use crate::utils::test_logger;
+
+#[inline]
+pub fn do_test(data: &[u8]) {
+       if let Ok(s) = std::str::from_utf8(data) {
+               let first_decoding = base32::Alphabet::RFC4648 { padding: true }.decode(s);
+               if let Ok(first_decoding) = first_decoding {
+                       let encoding_response = base32::Alphabet::RFC4648 { padding: true }.encode(&first_decoding);
+                       assert_eq!(encoding_response, s.to_ascii_uppercase());
+                       let second_decoding = base32::Alphabet::RFC4648 { padding: true }.decode(&encoding_response).unwrap();
+                       assert_eq!(first_decoding, second_decoding);
+               }
+       }
+
+       if let Ok(s) = std::str::from_utf8(data) {
+               let first_decoding = base32::Alphabet::RFC4648 { padding: false }.decode(s);
+               if let Ok(first_decoding) = first_decoding {
+                       let encoding_response = base32::Alphabet::RFC4648 { padding: false }.encode(&first_decoding);
+                       assert_eq!(encoding_response, s.to_ascii_uppercase());
+                       let second_decoding = base32::Alphabet::RFC4648 { padding: false }.decode(&encoding_response).unwrap();
+                       assert_eq!(first_decoding, second_decoding);
+               }
+       }
+       
+       let encode_response = base32::Alphabet::RFC4648 { padding: false }.encode(&data);
+       let decode_response = base32::Alphabet::RFC4648 { padding: false }.decode(&encode_response).unwrap();
+       assert_eq!(data, decode_response);
+
+       let encode_response = base32::Alphabet::RFC4648 { padding: true }.encode(&data);
+       let decode_response = base32::Alphabet::RFC4648 { padding: true }.decode(&encode_response).unwrap();
+       assert_eq!(data, decode_response);
+}
+
+pub fn base32_test<Out: test_logger::Output>(data: &[u8], _out: Out) {
+       do_test(data);
+}
+
+#[no_mangle]
+pub extern "C" fn base32_run(data: *const u8, datalen: usize) {
+       do_test(unsafe { std::slice::from_raw_parts(data, datalen) });
+}
diff --git a/fuzz/src/bin/base32_target.rs b/fuzz/src/bin/base32_target.rs
new file mode 100644 (file)
index 0000000..a7951c7
--- /dev/null
@@ -0,0 +1,113 @@
+// This file is Copyright its original authors, visible in version control
+// history.
+//
+// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
+// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
+// You may not use this file except in accordance with one or both of these
+// licenses.
+
+// This file is auto-generated by gen_target.sh based on target_template.txt
+// To modify it, modify target_template.txt and run gen_target.sh instead.
+
+#![cfg_attr(feature = "libfuzzer_fuzz", no_main)]
+
+#[cfg(not(fuzzing))]
+compile_error!("Fuzz targets need cfg=fuzzing");
+
+extern crate lightning_fuzz;
+use lightning_fuzz::base32::*;
+
+#[cfg(feature = "afl")]
+#[macro_use] extern crate afl;
+#[cfg(feature = "afl")]
+fn main() {
+       fuzz!(|data| {
+               base32_run(data.as_ptr(), data.len());
+       });
+}
+
+#[cfg(feature = "honggfuzz")]
+#[macro_use] extern crate honggfuzz;
+#[cfg(feature = "honggfuzz")]
+fn main() {
+       loop {
+               fuzz!(|data| {
+                       base32_run(data.as_ptr(), data.len());
+               });
+       }
+}
+
+#[cfg(feature = "libfuzzer_fuzz")]
+#[macro_use] extern crate libfuzzer_sys;
+#[cfg(feature = "libfuzzer_fuzz")]
+fuzz_target!(|data: &[u8]| {
+       base32_run(data.as_ptr(), data.len());
+});
+
+#[cfg(feature = "stdin_fuzz")]
+fn main() {
+       use std::io::Read;
+
+       let mut data = Vec::with_capacity(8192);
+       std::io::stdin().read_to_end(&mut data).unwrap();
+       base32_run(data.as_ptr(), data.len());
+}
+
+#[test]
+fn run_test_cases() {
+       use std::fs;
+       use std::io::Read;
+       use lightning_fuzz::utils::test_logger::StringBuffer;
+
+       use std::sync::{atomic, Arc};
+       {
+               let data: Vec<u8> = vec![0];
+               base32_run(data.as_ptr(), data.len());
+       }
+       let mut threads = Vec::new();
+       let threads_running = Arc::new(atomic::AtomicUsize::new(0));
+       if let Ok(tests) = fs::read_dir("test_cases/base32") {
+               for test in tests {
+                       let mut data: Vec<u8> = Vec::new();
+                       let path = test.unwrap().path();
+                       fs::File::open(&path).unwrap().read_to_end(&mut data).unwrap();
+                       threads_running.fetch_add(1, atomic::Ordering::AcqRel);
+
+                       let thread_count_ref = Arc::clone(&threads_running);
+                       let main_thread_ref = std::thread::current();
+                       threads.push((path.file_name().unwrap().to_str().unwrap().to_string(),
+                               std::thread::spawn(move || {
+                                       let string_logger = StringBuffer::new();
+
+                                       let panic_logger = string_logger.clone();
+                                       let res = if ::std::panic::catch_unwind(move || {
+                                               base32_test(&data, panic_logger);
+                                       }).is_err() {
+                                               Some(string_logger.into_string())
+                                       } else { None };
+                                       thread_count_ref.fetch_sub(1, atomic::Ordering::AcqRel);
+                                       main_thread_ref.unpark();
+                                       res
+                               })
+                       ));
+                       while threads_running.load(atomic::Ordering::Acquire) > 32 {
+                               std::thread::park();
+                       }
+               }
+       }
+       let mut failed_outputs = Vec::new();
+       for (test, thread) in threads.drain(..) {
+               if let Some(output) = thread.join().unwrap() {
+                       println!("\nOutput of {}:\n{}\n", test, output);
+                       failed_outputs.push(test);
+               }
+       }
+       if !failed_outputs.is_empty() {
+               println!("Test cases which failed: ");
+               for case in failed_outputs {
+                       println!("{}", case);
+               }
+               panic!();
+       }
+}
index fe17e4bab8ff356c599587446a65ec6c1033528a..676bfb82156c58c7f689de8177351900df9b0f68 100755 (executable)
@@ -21,6 +21,7 @@ GEN_TEST router
 GEN_TEST zbase32
 GEN_TEST indexedmap
 GEN_TEST onion_hop_data
+GEN_TEST base32
 
 GEN_TEST msg_accept_channel msg_targets::
 GEN_TEST msg_announcement_signatures msg_targets::
index 6cdeb8ab5d205f66be9083f035ea767d878cfad8..607924eff95874ab31bec1e18f99829389e06ee3 100644 (file)
@@ -29,5 +29,6 @@ pub mod refund_deser;
 pub mod router;
 pub mod zbase32;
 pub mod onion_hop_data;
+pub mod base32;
 
 pub mod msg_targets;
index 9b5a6d4553645215a32f6679c90d1b482d3d5c16..a17231c6d6d60f5518a67d75d64c925b388dc6aa 100644 (file)
@@ -14,6 +14,7 @@ void router_run(const unsigned char* data, size_t data_len);
 void zbase32_run(const unsigned char* data, size_t data_len);
 void indexedmap_run(const unsigned char* data, size_t data_len);
 void onion_hop_data_run(const unsigned char* data, size_t data_len);
+void base32_run(const unsigned char* data, size_t data_len);
 void msg_accept_channel_run(const unsigned char* data, size_t data_len);
 void msg_announcement_signatures_run(const unsigned char* data, size_t data_len);
 void msg_channel_reestablish_run(const unsigned char* data, size_t data_len);
diff --git a/lightning/src/util/base32.rs b/lightning/src/util/base32.rs
new file mode 100644 (file)
index 0000000..ff30a02
--- /dev/null
@@ -0,0 +1,218 @@
+// This is a modification of base32 encoding to support the zbase32 alphabet.
+// The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
+// The original portions of this software are Copyright (c) 2015 The base32 Developers
+
+// This file is licensed under either of
+// Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
+// MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) at your option.
+
+
+use crate::prelude::*;
+
+/// RFC4648 encoding table
+const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
+
+/// RFC4648 decoding table
+const RFC4648_INV_ALPHABET: [i8; 43] = [
+       -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
+       9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+];
+
+/// Alphabet used for encoding and decoding.
+#[derive(Copy, Clone)]
+pub enum Alphabet {
+       /// RFC4648 encoding.
+       RFC4648 {
+               /// Whether to use padding.
+               padding: bool
+       }
+}
+
+impl Alphabet {
+       /// Encode bytes into a base32 string.
+       pub fn encode(&self, data: &[u8]) -> String {
+               // output_length is calculated as follows:
+               // / 5 divides the data length by the number of bits per chunk (5),
+               // * 8 multiplies the result by the number of characters per chunk (8).
+               // + 4 rounds up to the nearest character.
+               let output_length = (data.len() * 8 + 4) / 5;
+               let mut ret = match self {
+                       Self::RFC4648 { padding } => {
+                               let mut ret = Self::encode_data(data, RFC4648_ALPHABET);
+                               if *padding {
+                                       let len = ret.len();
+                                       for i in output_length..len {
+                                               ret[i] = b'=';
+                                       }
+
+                                       return String::from_utf8(ret).expect("Invalid UTF-8");
+                               }
+                               ret
+                       }
+               };
+               ret.truncate(output_length);
+
+               #[cfg(fuzzing)]
+               assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);
+
+               String::from_utf8(ret).expect("Invalid UTF-8")
+       }
+
+       /// Decode a base32 string into a byte vector.
+       pub fn decode(&self, data: &str) -> Result<Vec<u8>, ()> {
+               let data = data.as_bytes();
+               let (data, alphabet) = match self {
+                       Self::RFC4648 { padding } => {
+                               let mut unpadded_data_length = data.len();
+                               if *padding {
+                                       if data.len() % 8 != 0 { return Err(()); }
+                                       data.iter().rev().take(6).for_each(|&c| {
+                                               if c == b'=' {
+                                                       unpadded_data_length -= 1;
+                                               }
+                                       });
+                               }
+                               (&data[..unpadded_data_length], RFC4648_INV_ALPHABET)
+                       }
+               };
+               // If the string has more characters than are required to alphabet_encode the number of bytes
+               // decodable, treat the string as invalid.
+               match data.len() % 8 { 1|3|6 => return Err(()), _ => {} }
+               Ok(Self::decode_data(data, alphabet)?)
+       }
+
+       /// Encode a byte slice into a base32 string.
+       fn encode_data(data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
+               // cap is calculated as follows:
+               // / 5 divides the data length by the number of bits per chunk (5),
+               // * 8 multiplies the result by the number of characters per chunk (8).
+               // + 4 rounds up to the nearest character.
+               let cap = (data.len() + 4) / 5 * 8;
+               let mut ret = Vec::with_capacity(cap);
+               for chunk in data.chunks(5) {
+                       let mut buf = [0u8; 5];
+                       for (i, &b) in chunk.iter().enumerate() {
+                               buf[i] = b;
+                       }
+                       ret.push(alphabet[((buf[0] & 0xF8) >> 3) as usize]);
+                       ret.push(alphabet[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize]);
+                       ret.push(alphabet[((buf[1] & 0x3E) >> 1) as usize]);
+                       ret.push(alphabet[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize]);
+                       ret.push(alphabet[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize]);
+                       ret.push(alphabet[((buf[3] & 0x7C) >> 2) as usize]);
+                       ret.push(alphabet[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize]);
+                       ret.push(alphabet[(buf[4] & 0x1F) as usize]);
+               }
+               #[cfg(fuzzing)]
+               assert_eq!(ret.capacity(), cap);
+
+               ret
+       }
+
+       fn decode_data(data: &[u8], alphabet: [i8; 43]) -> Result<Vec<u8>, ()> {
+               // cap is calculated as follows:
+               // / 8 divides the data length by the number of characters per chunk (8),
+               // * 5 multiplies the result by the number of bits per chunk (5),
+               // + 7 rounds up to the nearest byte.
+               let cap = (data.len() + 7) / 8 * 5;
+               let mut ret = Vec::with_capacity(cap);
+               for chunk in data.chunks(8) {
+                       let mut buf = [0u8; 8];
+                       for (i, &c) in chunk.iter().enumerate() {
+                               match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
+                                       Some(&-1) | None => return Err(()),
+                                       Some(&value) => buf[i] = value as u8,
+                               };
+                       }
+                       ret.push((buf[0] << 3) | (buf[1] >> 2));
+                       ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
+                       ret.push((buf[3] << 4) | (buf[4] >> 1));
+                       ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
+                       ret.push((buf[6] << 5) | buf[7]);
+               }
+               let output_length = data.len() * 5 / 8;
+               for c in ret.drain(output_length..) {
+                       if c != 0 {
+                               // If the original string had any bits set at positions outside of the encoded data,
+                               // treat the string as invalid.
+                               return Err(());
+                       }
+               }
+
+               // Check that our capacity calculation doesn't under-shoot in fuzzing
+               #[cfg(fuzzing)]
+               assert_eq!(ret.capacity(), cap);
+               Ok(ret)
+       }
+}
+
+#[cfg(test)]
+mod tests {
+       use super::*;
+
+       const RFC4648_NON_PADDED_TEST_VECTORS: &[(&[u8], &[u8])] = &[
+               (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
+               (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
+               (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
+               (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
+       ];
+
+       const RFC4648_TEST_VECTORS: &[(&[u8], &str)] = &[
+               (b"", ""),
+               (b"f", "MY======"),
+               (b"fo", "MZXQ===="),
+               (b"foo", "MZXW6==="),
+               (b"foob", "MZXW6YQ="),
+               (b"fooba", "MZXW6YTB"),
+               (b"foobar", "MZXW6YTBOI======"),
+               (&[0xF8, 0x3E, 0x7F, 0x83], "7A7H7AY="),
+       ];
+
+       #[test]
+       fn test_rfc4648_encode() {
+               for (input, encoded) in RFC4648_TEST_VECTORS {
+                       assert_eq!(&Alphabet::RFC4648 { padding: true }.encode(input), encoded);
+               }
+
+               for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
+                       assert_eq!(&Alphabet::RFC4648 { padding: false }.encode(input).as_bytes(), encoded);
+               }
+       }
+
+       #[test]
+       fn test_rfc4648_decode() {
+               for (input, encoded) in RFC4648_TEST_VECTORS {
+                       let res = &Alphabet::RFC4648 { padding: true }.decode(encoded).unwrap();
+                       assert_eq!(&res[..], &input[..]);
+               }
+
+               for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
+                       let res = &Alphabet::RFC4648 { padding: false }.decode(std::str::from_utf8(encoded).unwrap()).unwrap();
+                       assert_eq!(&res[..], &input[..]);
+               }
+       }
+
+       #[test]
+       fn padding() {
+               let num_padding = [0, 6, 4, 3, 1];
+               for i in 1..6 {
+                       let encoded = Alphabet::RFC4648 { padding: true }.encode(
+                               (0..(i as u8)).collect::<Vec<u8>>().as_ref()
+                       );
+                       assert_eq!(encoded.len(), 8);
+                       for j in 0..(num_padding[i % 5]) {
+                               assert_eq!(encoded.as_bytes()[encoded.len() - j - 1], b'=');
+                       }
+                       for j in 0..(8 - num_padding[i % 5]) {
+                               assert!(encoded.as_bytes()[j] != b'=');
+                       }
+               }
+       }
+
+       #[test]
+       fn test_decode_rfc4648_errors() {
+               assert!(Alphabet::RFC4648 { padding: false }.decode("abc2def===").is_err()); // Invalid char because padding is disabled
+               assert!(Alphabet::RFC4648 { padding: true }.decode("abc2def===").is_err()); // Invalid length
+               assert!(Alphabet::RFC4648 { padding: true }.decode("MZX=6YTB").is_err()); // Invalid char
+       }
+}
index cc1b5f581afb233fd1ee5e6c265d9d5f9ea492ae..7eace221779c9ddc6d40bd0382cfd6f5a63762ff 100644 (file)
@@ -22,6 +22,10 @@ pub mod invoice;
 pub mod persist;
 pub mod string;
 pub mod wakers;
+#[cfg(fuzzing)]
+pub mod base32;
+#[cfg(not(fuzzing))]
+pub(crate) mod base32;
 
 pub(crate) mod atomic_counter;
 pub(crate) mod byte_utils;