From d736ca8595ec7de4badae675f37c221e21af33d2 Mon Sep 17 00:00:00 2001 From: jbesraa Date: Fri, 25 Aug 2023 05:52:38 +0300 Subject: [PATCH] Add RFC4648 base32 `encode` and `decode` functions --- fuzz/src/base32.rs | 52 ++++++++ fuzz/src/bin/base32_target.rs | 113 ++++++++++++++++++ fuzz/src/bin/gen_target.sh | 1 + fuzz/src/lib.rs | 1 + fuzz/targets.h | 1 + lightning/src/util/base32.rs | 218 ++++++++++++++++++++++++++++++++++ lightning/src/util/mod.rs | 4 + 7 files changed, 390 insertions(+) create mode 100644 fuzz/src/base32.rs create mode 100644 fuzz/src/bin/base32_target.rs create mode 100644 lightning/src/util/base32.rs diff --git a/fuzz/src/base32.rs b/fuzz/src/base32.rs new file mode 100644 index 000000000..8171f19f6 --- /dev/null +++ b/fuzz/src/base32.rs @@ -0,0 +1,52 @@ +// This file is Copyright its original authors, visible in version control +// history. +// +// This file is licensed under the Apache License, Version 2.0 or the MIT license +// , at your option. +// You may not use this file except in accordance with one or both of these +// licenses. + +use lightning::util::base32; + +use crate::utils::test_logger; + +#[inline] +pub fn do_test(data: &[u8]) { + if let Ok(s) = std::str::from_utf8(data) { + let first_decoding = base32::Alphabet::RFC4648 { padding: true }.decode(s); + if let Ok(first_decoding) = first_decoding { + let encoding_response = base32::Alphabet::RFC4648 { padding: true }.encode(&first_decoding); + assert_eq!(encoding_response, s.to_ascii_uppercase()); + let second_decoding = base32::Alphabet::RFC4648 { padding: true }.decode(&encoding_response).unwrap(); + assert_eq!(first_decoding, second_decoding); + } + } + + if let Ok(s) = std::str::from_utf8(data) { + let first_decoding = base32::Alphabet::RFC4648 { padding: false }.decode(s); + if let Ok(first_decoding) = first_decoding { + let encoding_response = base32::Alphabet::RFC4648 { padding: false }.encode(&first_decoding); + assert_eq!(encoding_response, s.to_ascii_uppercase()); + let second_decoding = base32::Alphabet::RFC4648 { padding: false }.decode(&encoding_response).unwrap(); + assert_eq!(first_decoding, second_decoding); + } + } + + let encode_response = base32::Alphabet::RFC4648 { padding: false }.encode(&data); + let decode_response = base32::Alphabet::RFC4648 { padding: false }.decode(&encode_response).unwrap(); + assert_eq!(data, decode_response); + + let encode_response = base32::Alphabet::RFC4648 { padding: true }.encode(&data); + let decode_response = base32::Alphabet::RFC4648 { padding: true }.decode(&encode_response).unwrap(); + assert_eq!(data, decode_response); +} + +pub fn base32_test(data: &[u8], _out: Out) { + do_test(data); +} + +#[no_mangle] +pub extern "C" fn base32_run(data: *const u8, datalen: usize) { + do_test(unsafe { std::slice::from_raw_parts(data, datalen) }); +} diff --git a/fuzz/src/bin/base32_target.rs b/fuzz/src/bin/base32_target.rs new file mode 100644 index 000000000..a7951c770 --- /dev/null +++ b/fuzz/src/bin/base32_target.rs @@ -0,0 +1,113 @@ +// This file is Copyright its original authors, visible in version control +// history. +// +// This file is licensed under the Apache License, Version 2.0 or the MIT license +// , at your option. +// You may not use this file except in accordance with one or both of these +// licenses. + +// This file is auto-generated by gen_target.sh based on target_template.txt +// To modify it, modify target_template.txt and run gen_target.sh instead. + +#![cfg_attr(feature = "libfuzzer_fuzz", no_main)] + +#[cfg(not(fuzzing))] +compile_error!("Fuzz targets need cfg=fuzzing"); + +extern crate lightning_fuzz; +use lightning_fuzz::base32::*; + +#[cfg(feature = "afl")] +#[macro_use] extern crate afl; +#[cfg(feature = "afl")] +fn main() { + fuzz!(|data| { + base32_run(data.as_ptr(), data.len()); + }); +} + +#[cfg(feature = "honggfuzz")] +#[macro_use] extern crate honggfuzz; +#[cfg(feature = "honggfuzz")] +fn main() { + loop { + fuzz!(|data| { + base32_run(data.as_ptr(), data.len()); + }); + } +} + +#[cfg(feature = "libfuzzer_fuzz")] +#[macro_use] extern crate libfuzzer_sys; +#[cfg(feature = "libfuzzer_fuzz")] +fuzz_target!(|data: &[u8]| { + base32_run(data.as_ptr(), data.len()); +}); + +#[cfg(feature = "stdin_fuzz")] +fn main() { + use std::io::Read; + + let mut data = Vec::with_capacity(8192); + std::io::stdin().read_to_end(&mut data).unwrap(); + base32_run(data.as_ptr(), data.len()); +} + +#[test] +fn run_test_cases() { + use std::fs; + use std::io::Read; + use lightning_fuzz::utils::test_logger::StringBuffer; + + use std::sync::{atomic, Arc}; + { + let data: Vec = vec![0]; + base32_run(data.as_ptr(), data.len()); + } + let mut threads = Vec::new(); + let threads_running = Arc::new(atomic::AtomicUsize::new(0)); + if let Ok(tests) = fs::read_dir("test_cases/base32") { + for test in tests { + let mut data: Vec = Vec::new(); + let path = test.unwrap().path(); + fs::File::open(&path).unwrap().read_to_end(&mut data).unwrap(); + threads_running.fetch_add(1, atomic::Ordering::AcqRel); + + let thread_count_ref = Arc::clone(&threads_running); + let main_thread_ref = std::thread::current(); + threads.push((path.file_name().unwrap().to_str().unwrap().to_string(), + std::thread::spawn(move || { + let string_logger = StringBuffer::new(); + + let panic_logger = string_logger.clone(); + let res = if ::std::panic::catch_unwind(move || { + base32_test(&data, panic_logger); + }).is_err() { + Some(string_logger.into_string()) + } else { None }; + thread_count_ref.fetch_sub(1, atomic::Ordering::AcqRel); + main_thread_ref.unpark(); + res + }) + )); + while threads_running.load(atomic::Ordering::Acquire) > 32 { + std::thread::park(); + } + } + } + let mut failed_outputs = Vec::new(); + for (test, thread) in threads.drain(..) { + if let Some(output) = thread.join().unwrap() { + println!("\nOutput of {}:\n{}\n", test, output); + failed_outputs.push(test); + } + } + if !failed_outputs.is_empty() { + println!("Test cases which failed: "); + for case in failed_outputs { + println!("{}", case); + } + panic!(); + } +} diff --git a/fuzz/src/bin/gen_target.sh b/fuzz/src/bin/gen_target.sh index fe17e4bab..676bfb821 100755 --- a/fuzz/src/bin/gen_target.sh +++ b/fuzz/src/bin/gen_target.sh @@ -21,6 +21,7 @@ GEN_TEST router GEN_TEST zbase32 GEN_TEST indexedmap GEN_TEST onion_hop_data +GEN_TEST base32 GEN_TEST msg_accept_channel msg_targets:: GEN_TEST msg_announcement_signatures msg_targets:: diff --git a/fuzz/src/lib.rs b/fuzz/src/lib.rs index 6cdeb8ab5..607924eff 100644 --- a/fuzz/src/lib.rs +++ b/fuzz/src/lib.rs @@ -29,5 +29,6 @@ pub mod refund_deser; pub mod router; pub mod zbase32; pub mod onion_hop_data; +pub mod base32; pub mod msg_targets; diff --git a/fuzz/targets.h b/fuzz/targets.h index 9b5a6d455..a17231c6d 100644 --- a/fuzz/targets.h +++ b/fuzz/targets.h @@ -14,6 +14,7 @@ void router_run(const unsigned char* data, size_t data_len); void zbase32_run(const unsigned char* data, size_t data_len); void indexedmap_run(const unsigned char* data, size_t data_len); void onion_hop_data_run(const unsigned char* data, size_t data_len); +void base32_run(const unsigned char* data, size_t data_len); void msg_accept_channel_run(const unsigned char* data, size_t data_len); void msg_announcement_signatures_run(const unsigned char* data, size_t data_len); void msg_channel_reestablish_run(const unsigned char* data, size_t data_len); diff --git a/lightning/src/util/base32.rs b/lightning/src/util/base32.rs new file mode 100644 index 000000000..ff30a024f --- /dev/null +++ b/lightning/src/util/base32.rs @@ -0,0 +1,218 @@ +// This is a modification of base32 encoding to support the zbase32 alphabet. +// The original piece of software can be found at https://crates.io/crates/base32(v0.4.0) +// The original portions of this software are Copyright (c) 2015 The base32 Developers + +// This file is licensed under either of +// Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0) or +// MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT) at your option. + + +use crate::prelude::*; + +/// RFC4648 encoding table +const RFC4648_ALPHABET: &'static [u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"; + +/// RFC4648 decoding table +const RFC4648_INV_ALPHABET: [i8; 43] = [ + -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, +]; + +/// Alphabet used for encoding and decoding. +#[derive(Copy, Clone)] +pub enum Alphabet { + /// RFC4648 encoding. + RFC4648 { + /// Whether to use padding. + padding: bool + } +} + +impl Alphabet { + /// Encode bytes into a base32 string. + pub fn encode(&self, data: &[u8]) -> String { + // output_length is calculated as follows: + // / 5 divides the data length by the number of bits per chunk (5), + // * 8 multiplies the result by the number of characters per chunk (8). + // + 4 rounds up to the nearest character. + let output_length = (data.len() * 8 + 4) / 5; + let mut ret = match self { + Self::RFC4648 { padding } => { + let mut ret = Self::encode_data(data, RFC4648_ALPHABET); + if *padding { + let len = ret.len(); + for i in output_length..len { + ret[i] = b'='; + } + + return String::from_utf8(ret).expect("Invalid UTF-8"); + } + ret + } + }; + ret.truncate(output_length); + + #[cfg(fuzzing)] + assert_eq!(ret.capacity(), (data.len() + 4) / 5 * 8); + + String::from_utf8(ret).expect("Invalid UTF-8") + } + + /// Decode a base32 string into a byte vector. + pub fn decode(&self, data: &str) -> Result, ()> { + let data = data.as_bytes(); + let (data, alphabet) = match self { + Self::RFC4648 { padding } => { + let mut unpadded_data_length = data.len(); + if *padding { + if data.len() % 8 != 0 { return Err(()); } + data.iter().rev().take(6).for_each(|&c| { + if c == b'=' { + unpadded_data_length -= 1; + } + }); + } + (&data[..unpadded_data_length], RFC4648_INV_ALPHABET) + } + }; + // If the string has more characters than are required to alphabet_encode the number of bytes + // decodable, treat the string as invalid. + match data.len() % 8 { 1|3|6 => return Err(()), _ => {} } + Ok(Self::decode_data(data, alphabet)?) + } + + /// Encode a byte slice into a base32 string. + fn encode_data(data: &[u8], alphabet: &'static [u8]) -> Vec { + // cap is calculated as follows: + // / 5 divides the data length by the number of bits per chunk (5), + // * 8 multiplies the result by the number of characters per chunk (8). + // + 4 rounds up to the nearest character. + let cap = (data.len() + 4) / 5 * 8; + let mut ret = Vec::with_capacity(cap); + for chunk in data.chunks(5) { + let mut buf = [0u8; 5]; + for (i, &b) in chunk.iter().enumerate() { + buf[i] = b; + } + ret.push(alphabet[((buf[0] & 0xF8) >> 3) as usize]); + ret.push(alphabet[(((buf[0] & 0x07) << 2) | ((buf[1] & 0xC0) >> 6)) as usize]); + ret.push(alphabet[((buf[1] & 0x3E) >> 1) as usize]); + ret.push(alphabet[(((buf[1] & 0x01) << 4) | ((buf[2] & 0xF0) >> 4)) as usize]); + ret.push(alphabet[(((buf[2] & 0x0F) << 1) | (buf[3] >> 7)) as usize]); + ret.push(alphabet[((buf[3] & 0x7C) >> 2) as usize]); + ret.push(alphabet[(((buf[3] & 0x03) << 3) | ((buf[4] & 0xE0) >> 5)) as usize]); + ret.push(alphabet[(buf[4] & 0x1F) as usize]); + } + #[cfg(fuzzing)] + assert_eq!(ret.capacity(), cap); + + ret + } + + fn decode_data(data: &[u8], alphabet: [i8; 43]) -> Result, ()> { + // cap is calculated as follows: + // / 8 divides the data length by the number of characters per chunk (8), + // * 5 multiplies the result by the number of bits per chunk (5), + // + 7 rounds up to the nearest byte. + let cap = (data.len() + 7) / 8 * 5; + let mut ret = Vec::with_capacity(cap); + for chunk in data.chunks(8) { + let mut buf = [0u8; 8]; + for (i, &c) in chunk.iter().enumerate() { + match alphabet.get(c.to_ascii_uppercase().wrapping_sub(b'0') as usize) { + Some(&-1) | None => return Err(()), + Some(&value) => buf[i] = value as u8, + }; + } + ret.push((buf[0] << 3) | (buf[1] >> 2)); + ret.push((buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4)); + ret.push((buf[3] << 4) | (buf[4] >> 1)); + ret.push((buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3)); + ret.push((buf[6] << 5) | buf[7]); + } + let output_length = data.len() * 5 / 8; + for c in ret.drain(output_length..) { + if c != 0 { + // If the original string had any bits set at positions outside of the encoded data, + // treat the string as invalid. + return Err(()); + } + } + + // Check that our capacity calculation doesn't under-shoot in fuzzing + #[cfg(fuzzing)] + assert_eq!(ret.capacity(), cap); + Ok(ret) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const RFC4648_NON_PADDED_TEST_VECTORS: &[(&[u8], &[u8])] = &[ + (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"), + (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"), + (&[0xF8, 0x3E, 0x7F, 0x83, 0xE7], b"7A7H7A7H"), + (&[0x77, 0xC1, 0xF7, 0x7C, 0x1F], b"O7A7O7A7"), + ]; + + const RFC4648_TEST_VECTORS: &[(&[u8], &str)] = &[ + (b"", ""), + (b"f", "MY======"), + (b"fo", "MZXQ===="), + (b"foo", "MZXW6==="), + (b"foob", "MZXW6YQ="), + (b"fooba", "MZXW6YTB"), + (b"foobar", "MZXW6YTBOI======"), + (&[0xF8, 0x3E, 0x7F, 0x83], "7A7H7AY="), + ]; + + #[test] + fn test_rfc4648_encode() { + for (input, encoded) in RFC4648_TEST_VECTORS { + assert_eq!(&Alphabet::RFC4648 { padding: true }.encode(input), encoded); + } + + for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS { + assert_eq!(&Alphabet::RFC4648 { padding: false }.encode(input).as_bytes(), encoded); + } + } + + #[test] + fn test_rfc4648_decode() { + for (input, encoded) in RFC4648_TEST_VECTORS { + let res = &Alphabet::RFC4648 { padding: true }.decode(encoded).unwrap(); + assert_eq!(&res[..], &input[..]); + } + + for (input, encoded) in RFC4648_NON_PADDED_TEST_VECTORS { + let res = &Alphabet::RFC4648 { padding: false }.decode(std::str::from_utf8(encoded).unwrap()).unwrap(); + assert_eq!(&res[..], &input[..]); + } + } + + #[test] + fn padding() { + let num_padding = [0, 6, 4, 3, 1]; + for i in 1..6 { + let encoded = Alphabet::RFC4648 { padding: true }.encode( + (0..(i as u8)).collect::>().as_ref() + ); + assert_eq!(encoded.len(), 8); + for j in 0..(num_padding[i % 5]) { + assert_eq!(encoded.as_bytes()[encoded.len() - j - 1], b'='); + } + for j in 0..(8 - num_padding[i % 5]) { + assert!(encoded.as_bytes()[j] != b'='); + } + } + } + + #[test] + fn test_decode_rfc4648_errors() { + assert!(Alphabet::RFC4648 { padding: false }.decode("abc2def===").is_err()); // Invalid char because padding is disabled + assert!(Alphabet::RFC4648 { padding: true }.decode("abc2def===").is_err()); // Invalid length + assert!(Alphabet::RFC4648 { padding: true }.decode("MZX=6YTB").is_err()); // Invalid char + } +} diff --git a/lightning/src/util/mod.rs b/lightning/src/util/mod.rs index cc1b5f581..7eace2217 100644 --- a/lightning/src/util/mod.rs +++ b/lightning/src/util/mod.rs @@ -22,6 +22,10 @@ pub mod invoice; pub mod persist; pub mod string; pub mod wakers; +#[cfg(fuzzing)] +pub mod base32; +#[cfg(not(fuzzing))] +pub(crate) mod base32; pub(crate) mod atomic_counter; pub(crate) mod byte_utils; -- 2.39.5