From: Matt Corallo <git@bluematt.me>
Date: Tue, 30 Apr 2024 17:11:54 +0000 (+0000)
Subject: Use consistent byte/char offsets when parsing invoice HRPs
X-Git-Tag: v0.0.124-beta~129^2
X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=8db1226ae46b9c63275dcba1227e5d3c123fb258;p=rust-lightning

Use consistent byte/char offsets when parsing invoice HRPs

When parsing lightning-invoice HRPs we want to read them
char-by-char, tracking at which offset different fields were. Prior
to this commit this was done first by reading char-by-char and then
by indexing using the byte offset which works for ASCII strings but
fails on multi-byte characters.

This commit fixes this issue by simply always walking byte-by-byte
and rejecting multi-byte characters which don't belong in HRPs.
---

diff --git a/lightning-invoice/src/de.rs b/lightning-invoice/src/de.rs
index 56e5c53ba..c38aeb8a5 100644
--- a/lightning-invoice/src/de.rs
+++ b/lightning-invoice/src/de.rs
@@ -43,7 +43,11 @@ mod hrp_sm {
 	}
 
 	impl States {
-		fn next_state(&self, read_symbol: char) -> Result<States, super::Bolt11ParseError> {
+		fn next_state(&self, read_byte: u8) -> Result<States, super::Bolt11ParseError> {
+			let read_symbol = match char::from_u32(read_byte.into()) {
+				Some(symb) if symb.is_ascii() => symb,
+				_ => return Err(super::Bolt11ParseError::MalformedHRP),
+			};
 			match *self {
 				States::Start => {
 					if read_symbol == 'l' {
@@ -119,7 +123,7 @@ mod hrp_sm {
 			*range = Some(new_range);
 		}
 
-		fn step(&mut self, c: char) -> Result<(), super::Bolt11ParseError> {
+		fn step(&mut self, c: u8) -> Result<(), super::Bolt11ParseError> {
 			let next_state = self.state.next_state(c)?;
 			match next_state {
 				States::ParseCurrencyPrefix => {
@@ -158,7 +162,7 @@ mod hrp_sm {
 
 	pub fn parse_hrp(input: &str) -> Result<(&str, &str, &str), super::Bolt11ParseError> {
 		let mut sm = StateMachine::new();
-		for c in input.chars() {
+		for c in input.bytes() {
 			sm.step(c)?;
 		}