From d736ca8595ec7de4badae675f37c221e21af33d2 Mon Sep 17 00:00:00 2001
From: jbesraa <jbesraa@gmail.com>
Date: Fri, 25 Aug 2023 05:52:38 +0300
Subject: [PATCH] Add RFC4648 base32 `encode` and `decode` functions

---
 fuzz/src/base32.rs            |  52 ++++++++
 fuzz/src/bin/base32_target.rs | 113 ++++++++++++++++++
 fuzz/src/bin/gen_target.sh    |   1 +
 fuzz/src/lib.rs               |   1 +
 fuzz/targets.h                |   1 +
 lightning/src/util/base32.rs  | 218 ++++++++++++++++++++++++++++++++++
 lightning/src/util/mod.rs     |   4 +
 7 files changed, 390 insertions(+)
 create mode 100644 fuzz/src/base32.rs
 create mode 100644 fuzz/src/bin/base32_target.rs
 create mode 100644 lightning/src/util/base32.rs
diff --git a/fuzz/src/base32.rs b/fuzz/src/base32.rs
new file mode 100644
index 000000000..8171f19f6
--- /dev/null
+++ b/fuzz/src/base32.rs
@@ -0,0 +1,52 @@
+// This file is Copyright its original authors, visible in version control
+// history.
+//
+// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
+// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
+// You may not use this file except in accordance with one or both of these
+// licenses.
+
+use lightning::util::base32;
+
+use crate::utils::test_logger;
+
+#[inline]
+pub fn do_test(data: &[u8]) {
+	if let Ok(s) = std::str::from_utf8(data) {
+		let first_decoding = base32::Alphabet::RFC4648 { padding: true }.decode(s);
+		if let Ok(first_decoding) = first_decoding {
+			let encoding_response = base32::Alphabet::RFC4648 { padding: true }.encode(&first_decoding);
+			assert_eq!(encoding_response, s.to_ascii_uppercase());
+			let second_decoding = base32::Alphabet::RFC4648 { padding: true }.decode(&encoding_response).unwrap();
+			assert_eq!(first_decoding, second_decoding);
+		}
+	}
+
+	if let Ok(s) = std::str::from_utf8(data) {
+		let first_decoding = base32::Alphabet::RFC4648 { padding: false }.decode(s);
+		if let Ok(first_decoding) = first_decoding {
+			let encoding_response = base32::Alphabet::RFC4648 { padding: false }.encode(&first_decoding);
+			assert_eq!(encoding_response, s.to_ascii_uppercase());
+			let second_decoding = base32::Alphabet::RFC4648 { padding: false }.decode(&encoding_response).unwrap();
+			assert_eq!(first_decoding, second_decoding);
+		}
+	}
+	
+	let encode_response = base32::Alphabet::RFC4648 { padding: false }.encode(&data);
+	let decode_response = base32::Alphabet::RFC4648 { padding: false }.decode(&encode_response).unwrap();
+	assert_eq!(data, decode_response);
+
+	let encode_response = base32::Alphabet::RFC4648 { padding: true }.encode(&data);
+	let decode_response = base32::Alphabet::RFC4648 { padding: true }.decode(&encode_response).unwrap();
+	assert_eq!(data, decode_response);
+}
+
+pub fn base32_test<Out: test_logger::Output>(data: &[u8], _out: Out) {
+	do_test(data);
+}
+
+#[no_mangle]
+pub extern "C" fn base32_run(data: *const u8, datalen: usize) {
+	do_test(unsafe { std::slice::from_raw_parts(data, datalen) });
+}
diff --git a/fuzz/src/bin/base32_target.rs b/fuzz/src/bin/base32_target.rs
new file mode 100644
index 000000000..a7951c770
--- /dev/null
+++ b/fuzz/src/bin/base32_target.rs
@@ -0,0 +1,113 @@
+// This file is Copyright its original authors, visible in version control
+// history.
+//
+// This file is licensed under the Apache License, Version 2.0 <LICENSE-APACHE
+// or http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your option.
+// You may not use this file except in accordance with one or both of these
+// licenses.
+
+// This file is auto-generated by gen_target.sh based on target_template.txt
+// To modify it, modify target_template.txt and run gen_target.sh instead.
+
+#![cfg_attr(feature = "libfuzzer_fuzz", no_main)]
+
+#[cfg(not(fuzzing))]
+compile_error!("Fuzz targets need cfg=fuzzing");
+
+extern crate lightning_fuzz;
+use lightning_fuzz::base32::*;
+
+#[cfg(feature = "afl")]
+#[macro_use] extern crate afl;
+#[cfg(feature = "afl")]
+fn main() {
+	fuzz!(|data| {
+		base32_run(data.as_ptr(), data.len());
+	});
+}
+
+#[cfg(feature = "honggfuzz")]
+#[macro_use] extern crate honggfuzz;
+#[cfg(feature = "honggfuzz")]
+fn main() {
+	loop {
+		fuzz!(|data| {
+			base32_run(data.as_ptr(), data.len());
+		});
+	}
+}
+
+#[cfg(feature = "libfuzzer_fuzz")]
+#[macro_use] extern crate libfuzzer_sys;
+#[cfg(feature = "libfuzzer_fuzz")]
+fuzz_target!(|data: &[u8]| {
+	base32_run(data.as_ptr(), data.len());
+});
+
+#[cfg(feature = "stdin_fuzz")]
+fn main() {
+	use std::io::Read;
+
+	let mut data = Vec::with_capacity(8192);
+	std::io::stdin().read_to_end(&mut data).unwrap();
+	base32_run(data.as_ptr(), data.len());
+}
+
+#[test]
+fn run_test_cases() {
+	use std::fs;
+	use std::io::Read;
+	use lightning_fuzz::utils::test_logger::StringBuffer;
+
+	use std::sync::{atomic, Arc};
+	{
+		let data: Vec<u8> = vec![0];
+		base32_run(data.as_ptr(), data.len());
+	}
+	let mut threads = Vec::new();
+	let threads_running = Arc::new(atomic::AtomicUsize::new(0));
+	if let Ok(tests) = fs::read_dir("test_cases/base32") {
+		for test in tests {
+			let mut data: Vec<u8> = Vec::new();
+			let path = test.unwrap().path();
+			fs::File::open(&path).unwrap().read_to_end(&mut data).unwrap();
+			threads_running.fetch_add(1, atomic::Ordering::AcqRel);
+
+			let thread_count_ref = Arc::clone(&threads_running);
+			let main_thread_ref = std::thread::current();
+			threads.push((path.file_name().unwrap().to_str().unwrap().to_string(),
+				std::thread::spawn(move || {
+					let string_logger = StringBuffer::new();
+
+					let panic_logger = string_logger.clone();
+					let res = if ::std::panic::catch_unwind(move || {
+						base32_test(&data, panic_logger);
+					}).is_err() {
+						Some(string_logger.into_string())
+					} else { None };
+					thread_count_ref.fetch_sub(1, atomic::Ordering::AcqRel);
+					main_thread_ref.unpark();
+					res
+				})
+			));
+			while threads_running.load(atomic::Ordering::Acquire) > 32 {
+				std::thread::park();
+			}
+		}
+	}
+	let mut failed_outputs = Vec::new();
+	for (test, thread) in threads.drain(..) {
+		if let Some(output) = thread.join().unwrap() {
+			println!("\nOutput of {}:\n{}\n", test, output);
+			failed_outputs.push(test);
+		}
+	}
+	if !failed_outputs.is_empty() {
+		println!("Test cases which failed: ");
+		for case in failed_outputs {
+			println!("{}", case);
+		}
+		panic!();
+	}
+}
diff --git a/fuzz/src/bin/gen_target.sh b/fuzz/src/bin/gen_target.sh
index fe17e4bab..676bfb821 100755
--- a/fuzz/src/bin/gen_target.sh
+++ b/fuzz/src/bin/gen_target.sh
@@ -21,6 +21,7 @@ GEN_TEST router
 GEN_TEST zbase32
 GEN_TEST indexedmap
 GEN_TEST onion_hop_data
+GEN_TEST base32
 
 GEN_TEST msg_accept_channel msg_targets::
 GEN_TEST msg_announcement_signatures msg_targets::
diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs
index 6cdeb8ab5..607924eff 100644
--- a/fuzz/src/lib.rs
+++ b/fuzz/src/lib.rs
@@ -29,5 +29,6 @@ pub mod refund_deser;
 pub mod router;
 pub mod zbase32;
 pub mod onion_hop_data;
+pub mod base32;
 
 pub mod msg_targets;
diff --git a/fuzz/targets.h b/fuzz/targets.h
index 9b5a6d455..a17231c6d 100644
--- a/fuzz/targets.h
+++ b/fuzz/targets.h
@@ -14,6 +14,7 @@ void router_run(const unsigned char* data, size_t data_len);
 void zbase32_run(const unsigned char* data, size_t data_len);
 void indexedmap_run(const unsigned char* data, size_t data_len);
 void onion_hop_data_run(const unsigned char* data, size_t data_len);
+void base32_run(const unsigned char* data, size_t data_len);
 void msg_accept_channel_run(const unsigned char* data, size_t data_len);
 void msg_announcement_signatures_run(const unsigned char* data, size_t data_len);
 void msg_channel_reestablish_run(const unsigned char* data, size_t data_len);
diff --git a/lightning/src/util/base32.rs b/lightning/src/util/base32.rs
new file mode 100644
index 000000000..ff30a024f
--- /dev/null
+++ b/lightning/src/util/base32.rs
@@ -0,0 +1,218 @@
+// This is a modification of base32 encoding to support the zbase32 alphabet.
+// The original piece of software can be found at https://crates.io/crates/base32(v0.4.0)
+// The original portions of this software are Copyright (c) 2015 The base32 Developers
+
+// This file is licensed under either of
+// Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or
+// MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) at your option.
+
+
+use crate::prelude::*;
+
+/// RFC4648 encoding table
+const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
+
+/// RFC4648 decoding table
+const RFC4648_INV_ALPHABET: [i8; 43] = [
+	-1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8,
+	9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+];
+
+/// Alphabet used for encoding and decoding.
+#[derive(Copy, Clone)]
+pub enum Alphabet {
+	/// RFC4648 encoding.
+	RFC4648 {
+		/// Whether to use padding.
+		padding: bool
+	}
+}
+
+impl Alphabet {
+	/// Encode bytes into a base32 string.
+	pub fn encode(&self, data: &[u8]) -> String {
+		// output_length is calculated as follows:
+		// / 5 divides the data length by the number of bits per chunk (5),
+		// * 8 multiplies the result by the number of characters per chunk (8).
+		// + 4 rounds up to the nearest character.
+		let output_length = (data.len() * 8 + 4) / 5;
+		let mut ret = match self {
+			Self::RFC4648 { padding } => {
+				let mut ret = Self::encode_data(data, RFC4648_ALPHABET);
+				if *padding {
+					let len = ret.len();
+					for i in output_length..len {
+						ret[i] = b'=';
+					}
+
+					return String::from_utf8(ret).expect("Invalid UTF-8");
+				}
+				ret
+			}
+		};
+		ret.truncate(output_length);
+
+		#[cfg(fuzzing)]
+		assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8);
+
+		String::from_utf8(ret).expect("Invalid UTF-8")
+	}
+
+	/// Decode a base32 string into a byte vector.
+	pub fn decode(&self, data: &str) -> Result<Vec<u8>, ()> {
+		let data = data.as_bytes();
+		let (data, alphabet) = match self {
+			Self::RFC4648 { padding } => {
+				let mut unpadded_data_length = data.len();
+				if *padding {
+					if data.len() % 8 != 0 { return Err(()); }
+					data.iter().rev().take(6).for_each(|&c| {
+						if c == b'=' {
+							unpadded_data_length -= 1;
+						}
+					});
+				}
+				(&data[..unpadded_data_length], RFC4648_INV_ALPHABET)
+			}
+		};
+		// If the string has more characters than are required to alphabet_encode the number of bytes
+		// decodable, treat the string as invalid.
+		match data.len() % 8 { 1|3|6 => return Err(()), _ => {} }
+		Ok(Self::decode_data(data, alphabet)?)
+	}
+
+	/// Encode a byte slice into a base32 string.
+	fn encode_data(data: &[u8], alphabet: &'static [u8]) -> Vec<u8> {
+		// cap is calculated as follows:
+		// / 5 divides the data length by the number of bits per chunk (5),
+		// * 8 multiplies the result by the number of characters per chunk (8).
+		// + 4 rounds up to the nearest character.
+		let cap = (data.len() + 4) / 5 * 8;
+		let mut ret = Vec::with_capacity(cap);
+		for chunk in data.chunks(5) {
+			let mut buf = [0u8; 5];
+			for (i, &b) in chunk.iter().enumerate() {
+				buf[i] = b;
+			}
+			ret.push(alphabet[((buf[0] & 0xF8) >> 3) as usize]);
+			ret.push(alphabet[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize]);
+			ret.push(alphabet[((buf[1] & 0x3E) >> 1) as usize]);
+			ret.push(alphabet[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize]);
+			ret.push(alphabet[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize]);
+			ret.push(alphabet[((buf[3] & 0x7C) >> 2) as usize]);
+			ret.push(alphabet[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize]);
+			ret.push(alphabet[(buf[4] & 0x1F) as usize]);
+		}
+		#[cfg(fuzzing)]
+		assert_eq!(ret.capacity(), cap);
+
+		ret
+	}
+
+	fn decode_data(data: &[u8], alphabet: [i8; 43]) -> Result<Vec<u8>, ()> {
+		// cap is calculated as follows:
+		// / 8 divides the data length by the number of characters per chunk (8),
+		// * 5 multiplies the result by the number of bits per chunk (5),
+		// + 7 rounds up to the nearest byte.
+		let cap = (data.len() + 7) / 8 * 5;
+		let mut ret = Vec::with_capacity(cap);
+		for chunk in data.chunks(8) {
+			let mut buf = [0u8; 8];
+			for (i, &c) in chunk.iter().enumerate() {
+				match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) {
+					Some(&-1) | None => return Err(()),
+					Some(&value) => buf[i] = value as u8,
+				};
+			}
+			ret.push((buf[0] << 3) | (buf[1] >> 2));
+			ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4));
+			ret.push((buf[3] << 4) | (buf[4] >> 1));
+			ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3));
+			ret.push((buf[6] << 5) | buf[7]);
+		}
+		let output_length = data.len() * 5 / 8;
+		for c in ret.drain(output_length..) {
+			if c != 0 {
+				// If the original string had any bits set at positions outside of the encoded data,
+				// treat the string as invalid.
+				return Err(());
+			}
+		}
+
+		// Check that our capacity calculation doesn't under-shoot in fuzzing
+		#[cfg(fuzzing)]
+		assert_eq!(ret.capacity(), cap);
+		Ok(ret)
+	}
+}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+
+	const RFC4648_NON_PADDED_TEST_VECTORS: &[(&[u8], &[u8])] = &[
+		(&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
+		(&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
+		(&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"),
+		(&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"),
+	];
+
+	const RFC4648_TEST_VECTORS: &[(&[u8], &str)] = &[
+		(b"", ""),
+		(b"f", "MY======"),
+		(b"fo", "MZXQ===="),
+		(b"foo", "MZXW6==="),
+		(b"foob", "MZXW6YQ="),
+		(b"fooba", "MZXW6YTB"),
+		(b"foobar", "MZXW6YTBOI======"),
+		(&[0xF8, 0x3E, 0x7F, 0x83], "7A7H7AY="),
+	];
+
+	#[test]
+	fn test_rfc4648_encode() {
+		for (input, encoded) in RFC4648_TEST_VECTORS {
+			assert_eq!(&Alphabet::RFC4648 { padding: true }.encode(input), encoded);
+		}
+
+		for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
+			assert_eq!(&Alphabet::RFC4648 { padding: false }.encode(input).as_bytes(), encoded);
+		}
+	}
+
+	#[test]
+	fn test_rfc4648_decode() {
+		for (input, encoded) in RFC4648_TEST_VECTORS {
+			let res = &Alphabet::RFC4648 { padding: true }.decode(encoded).unwrap();
+			assert_eq!(&res[..], &input[..]);
+		}
+
+		for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS {
+			let res = &Alphabet::RFC4648 { padding: false }.decode(std::str::from_utf8(encoded).unwrap()).unwrap();
+			assert_eq!(&res[..], &input[..]);
+		}
+	}
+
+	#[test]
+	fn padding() {
+		let num_padding = [0, 6, 4, 3, 1];
+		for i in 1..6 {
+			let encoded = Alphabet::RFC4648 { padding: true }.encode(
+				(0..(i as u8)).collect::<Vec<u8>>().as_ref()
+			);
+			assert_eq!(encoded.len(), 8);
+			for j in 0..(num_padding[i % 5]) {
+				assert_eq!(encoded.as_bytes()[encoded.len() - j - 1], b'=');
+			}
+			for j in 0..(8 - num_padding[i % 5]) {
+				assert!(encoded.as_bytes()[j] != b'=');
+			}
+		}
+	}
+
+	#[test]
+	fn test_decode_rfc4648_errors() {
+		assert!(Alphabet::RFC4648 { padding: false }.decode("abc2def===").is_err()); // Invalid char because padding is disabled
+		assert!(Alphabet::RFC4648 { padding: true }.decode("abc2def===").is_err()); // Invalid length
+		assert!(Alphabet::RFC4648 { padding: true }.decode("MZX=6YTB").is_err()); // Invalid char
+	}
+}
diff --git a/lightning/src/util/mod.rs b/lightning/src/util/mod.rs
index cc1b5f581..7eace2217 100644
--- a/lightning/src/util/mod.rs
+++ b/lightning/src/util/mod.rs
@@ -22,6 +22,10 @@ pub mod invoice;
 pub mod persist;
 pub mod string;
 pub mod wakers;
+#[cfg(fuzzing)]
+pub mod base32;
+#[cfg(not(fuzzing))]
+pub(crate) mod base32;
 
 pub(crate) mod atomic_counter;
 pub(crate) mod byte_utils;
-- 
2.39.5