+const fn u64_from_bytes_a_panicking(b: &[u8]) -> u64 {
+ match b {
+ [a, b, c, d, e, f, g, h, ..] => {
+ ((*a as u64) << 8*7) |
+ ((*b as u64) << 8*6) |
+ ((*c as u64) << 8*5) |
+ ((*d as u64) << 8*4) |
+ ((*e as u64) << 8*3) |
+ ((*f as u64) << 8*2) |
+ ((*g as u64) << 8*1) |
+ ((*h as u64) << 8*0)
+ },
+ _ => panic!(),
+ }
+}
+
+const fn u64_from_bytes_b_panicking(b: &[u8]) -> u64 {
+ match b {
+ [_, _, _, _, _, _, _, _,
+ a, b, c, d, e, f, g, h, ..] => {
+ ((*a as u64) << 8*7) |
+ ((*b as u64) << 8*6) |
+ ((*c as u64) << 8*5) |
+ ((*d as u64) << 8*4) |
+ ((*e as u64) << 8*3) |
+ ((*f as u64) << 8*2) |
+ ((*g as u64) << 8*1) |
+ ((*h as u64) << 8*0)
+ },
+ _ => panic!(),
+ }
+}
+
+const fn u64_from_bytes_c_panicking(b: &[u8]) -> u64 {
+ match b {
+ [_, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ a, b, c, d, e, f, g, h, ..] => {
+ ((*a as u64) << 8*7) |
+ ((*b as u64) << 8*6) |
+ ((*c as u64) << 8*5) |
+ ((*d as u64) << 8*4) |
+ ((*e as u64) << 8*3) |
+ ((*f as u64) << 8*2) |
+ ((*g as u64) << 8*1) |
+ ((*h as u64) << 8*0)
+ },
+ _ => panic!(),
+ }
+}
+
+const fn u64_from_bytes_d_panicking(b: &[u8]) -> u64 {
+ match b {
+ [_, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ a, b, c, d, e, f, g, h, ..] => {
+ ((*a as u64) << 8*7) |
+ ((*b as u64) << 8*6) |
+ ((*c as u64) << 8*5) |
+ ((*d as u64) << 8*4) |
+ ((*e as u64) << 8*3) |
+ ((*f as u64) << 8*2) |
+ ((*g as u64) << 8*1) |
+ ((*h as u64) << 8*0)
+ },
+ _ => panic!(),
+ }
+}
+
+const fn u64_from_bytes_e_panicking(b: &[u8]) -> u64 {
+ match b {
+ [_, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ a, b, c, d, e, f, g, h, ..] => {
+ ((*a as u64) << 8*7) |
+ ((*b as u64) << 8*6) |
+ ((*c as u64) << 8*5) |
+ ((*d as u64) << 8*4) |
+ ((*e as u64) << 8*3) |
+ ((*f as u64) << 8*2) |
+ ((*g as u64) << 8*1) |
+ ((*h as u64) << 8*0)
+ },
+ _ => panic!(),
+ }
+}
+
+const fn u64_from_bytes_f_panicking(b: &[u8]) -> u64 {
+ match b {
+ [_, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ _, _, _, _, _, _, _, _,
+ a, b, c, d, e, f, g, h, ..] => {
+ ((*a as u64) << 8*7) |
+ ((*b as u64) << 8*6) |
+ ((*c as u64) << 8*5) |
+ ((*d as u64) << 8*4) |
+ ((*e as u64) << 8*3) |
+ ((*f as u64) << 8*2) |
+ ((*g as u64) << 8*1) |
+ ((*h as u64) << 8*0)
+ },
+ _ => panic!(),
+ }
+}
+
+impl U256 {
+ /// Constructs a new [`U256`] from a variable number of big-endian bytes.
+ pub(super) fn from_be_bytes(bytes: &[u8]) -> Result<U256, ()> {
+ if bytes.len() > 256/8 { return Err(()); }
+ let u64s = (bytes.len() + 7) / 8;
+ let mut res = [0; WORD_COUNT_256];
+ for i in 0..u64s {
+ let mut b = [0; 8];
+ let pos = (u64s - i) * 8;
+ let start = bytes.len().saturating_sub(pos);
+ let end = bytes.len() + 8 - pos;
+ b[8 + start - end..].copy_from_slice(&bytes[start..end]);
+ res[i + WORD_COUNT_256 - u64s] = u64::from_be_bytes(b);
+ }
+ Ok(U256(res))
+ }
+
+ /// Constructs a new [`U256`] from a fixed number of big-endian bytes.
+ pub(super) const fn from_32_be_bytes_panicking(bytes: &[u8; 32]) -> U256 {
+ let res = [
+ u64_from_bytes_a_panicking(bytes),
+ u64_from_bytes_b_panicking(bytes),
+ u64_from_bytes_c_panicking(bytes),
+ u64_from_bytes_d_panicking(bytes),
+ ];
+ U256(res)
+ }
+
+ pub(super) const fn zero() -> U256 { U256([0, 0, 0, 0]) }
+ pub(super) const fn one() -> U256 { U256([0, 0, 0, 1]) }
+ pub(super) const fn three() -> U256 { U256([0, 0, 0, 3]) }
+}
+
+impl<M: PrimeModulus<U256>> U256Mod<M> {
+ const fn mont_reduction(mu: [u64; 8]) -> Self {
+ #[cfg(debug_assertions)] {
+ // Check NEGATIVE_PRIME_INV_MOD_R is correct. Since this is all const, the compiler
+ // should be able to do it at compile time alone.
+ let minus_one_mod_r = mul_4(&M::PRIME.0, &M::NEGATIVE_PRIME_INV_MOD_R.0);
+ assert!(slice_equal(const_subslice(&minus_one_mod_r, 4, 8), &[0xffff_ffff_ffff_ffff; 4]));
+ }
+
+ #[cfg(debug_assertions)] {
+ // Check R_SQUARED_MOD_PRIME is correct. Since this is all const, the compiler
+ // should be able to do it at compile time alone.
+ let r_minus_one = [0xffff_ffff_ffff_ffff; 4];
+ let (mut r_mod_prime, _) = sub_4(&r_minus_one, &M::PRIME.0);
+ add_one!(r_mod_prime);
+ let r_squared = sqr_4(&r_mod_prime);
+ let mut prime_extended = [0; 8];
+ let prime = M::PRIME.0;
+ copy_from_slice!(prime_extended, 4, 8, prime);
+ let (_, r_squared_mod_prime) = if let Ok(v) = div_rem_8(&r_squared, &prime_extended) { v } else { panic!() };
+ assert!(slice_greater_than(&prime_extended, &r_squared_mod_prime));
+ assert!(slice_equal(const_subslice(&r_squared_mod_prime, 4, 8), &M::R_SQUARED_MOD_PRIME.0));
+ }
+
+ let mu_mod_r = const_subslice(&mu, 4, 8);
+ let mut v = mul_4(&mu_mod_r, &M::NEGATIVE_PRIME_INV_MOD_R.0);
+ const ZEROS: &[u64; 4] = &[0; 4];
+ copy_from_slice!(v, 0, 4, ZEROS); // mod R
+ let t0 = mul_4(const_subslice(&v, 4, 8), &M::PRIME.0);
+ let (t1, t1_extra_bit) = add_8(&t0, &mu);
+ let t1_on_r = const_subslice(&t1, 0, 4);
+ let mut res = [0; 4];
+ if t1_extra_bit || slice_greater_than(&t1_on_r, &M::PRIME.0) {
+ let underflow;
+ (res, underflow) = sub_4(&t1_on_r, &M::PRIME.0);
+ debug_assert!(t1_extra_bit == underflow);
+ } else {
+ copy_from_slice!(res, 0, 4, t1_on_r);
+ }
+ Self(U256(res), PhantomData)
+ }
+
+ pub(super) const fn from_u256_panicking(v: U256) -> Self {
+ assert!(v.0[0] <= M::PRIME.0[0]);
+ if v.0[0] == M::PRIME.0[0] {
+ assert!(v.0[1] <= M::PRIME.0[1]);
+ if v.0[1] == M::PRIME.0[1] {
+ assert!(v.0[2] <= M::PRIME.0[2]);
+ if v.0[2] == M::PRIME.0[2] {
+ assert!(v.0[3] < M::PRIME.0[3]);
+ }
+ }
+ }
+ assert!(M::PRIME.0[0] != 0 || M::PRIME.0[1] != 0 || M::PRIME.0[2] != 0 || M::PRIME.0[3] != 0);
+ Self::mont_reduction(mul_4(&M::R_SQUARED_MOD_PRIME.0, &v.0))
+ }
+
+ pub(super) fn from_u256(mut v: U256) -> Self {
+ debug_assert!(M::PRIME.0 != [0; 4]);
+ debug_assert!(M::PRIME.0[0] > (1 << 63), "PRIME should have the top bit set");
+ while v >= M::PRIME {
+ let (new_v, spurious_underflow) = sub_4(&v.0, &M::PRIME.0);
+ debug_assert!(!spurious_underflow);
+ v = U256(new_v);
+ }
+ Self::mont_reduction(mul_4(&M::R_SQUARED_MOD_PRIME.0, &v.0))
+ }
+
+ pub(super) fn from_modinv_of(v: U256) -> Result<Self, ()> {
+ Ok(Self::from_u256(U256(mod_inv_4(&v.0, &M::PRIME.0)?)))
+ }
+
+ /// Multiplies `self` * `b` mod `m`.
+ ///
+ /// Panics if `self`'s modulus is not equal to `b`'s
+ pub(super) fn mul(&self, b: &Self) -> Self {
+ Self::mont_reduction(mul_4(&self.0.0, &b.0.0))
+ }
+
+ /// Doubles `self` mod `m`.
+ pub(super) fn double(&self) -> Self {
+ let mut res = self.0.0;
+ let overflow = double!(res);
+ if overflow || !slice_greater_than(&M::PRIME.0, &res) {
+ let underflow;
+ (res, underflow) = sub_4(&res, &M::PRIME.0);
+ debug_assert_eq!(overflow, underflow);
+ }
+ Self(U256(res), PhantomData)
+ }
+
+ /// Multiplies `self` by 3 mod `m`.
+ pub(super) fn times_three(&self) -> Self {
+ // TODO: Optimize this a lot
+ self.mul(&U256Mod::from_u256(U256::three()))
+ }
+
+ /// Multiplies `self` by 4 mod `m`.
+ pub(super) fn times_four(&self) -> Self {
+ // TODO: Optimize this somewhat?
+ self.double().double()
+ }
+
+ /// Multiplies `self` by 8 mod `m`.
+ pub(super) fn times_eight(&self) -> Self {
+ // TODO: Optimize this somewhat?
+ self.double().double().double()
+ }
+
+ /// Multiplies `self` by 8 mod `m`.
+ pub(super) fn square(&self) -> Self {
+ Self::mont_reduction(sqr_4(&self.0.0))
+ }
+
+ /// Subtracts `b` from `self` % `m`.
+ pub(super) fn sub(&self, b: &Self) -> Self {
+ let (mut val, underflow) = sub_4(&self.0.0, &b.0.0);
+ if underflow {
+ let overflow;
+ (val, overflow) = add_4(&val, &M::PRIME.0);
+ debug_assert_eq!(overflow, underflow);
+ }
+ Self(U256(val), PhantomData)
+ }
+
+ /// Adds `b` to `self` % `m`.
+ pub(super) fn add(&self, b: &Self) -> Self {
+ let (mut val, overflow) = add_4(&self.0.0, &b.0.0);
+ if overflow || !slice_greater_than(&M::PRIME.0, &val) {
+ let underflow;
+ (val, underflow) = sub_4(&val, &M::PRIME.0);
+ debug_assert_eq!(overflow, underflow);
+ }
+ Self(U256(val), PhantomData)
+ }
+
+ /// Returns the underlying [`U256`].
+ pub(super) fn into_u256(self) -> U256 {
+ let mut expanded_self = [0; 8];
+ expanded_self[4..].copy_from_slice(&self.0.0);
+ Self::mont_reduction(expanded_self).0
+ }
+}
+
+impl U384 {
+ /// Constructs a new [`U384`] from a variable number of big-endian bytes.
+ pub(super) fn from_be_bytes(bytes: &[u8]) -> Result<U384, ()> {
+ if bytes.len() > 384/8 { return Err(()); }
+ let u64s = (bytes.len() + 7) / 8;
+ let mut res = [0; WORD_COUNT_384];
+ for i in 0..u64s {
+ let mut b = [0; 8];
+ let pos = (u64s - i) * 8;
+ let start = bytes.len().saturating_sub(pos);
+ let end = bytes.len() + 8 - pos;
+ b[8 + start - end..].copy_from_slice(&bytes[start..end]);
+ res[i + WORD_COUNT_384 - u64s] = u64::from_be_bytes(b);
+ }
+ Ok(U384(res))
+ }
+
+ /// Constructs a new [`U384`] from a fixed number of big-endian bytes.
+ pub(super) const fn from_48_be_bytes_panicking(bytes: &[u8; 48]) -> U384 {
+ let res = [
+ u64_from_bytes_a_panicking(bytes),
+ u64_from_bytes_b_panicking(bytes),
+ u64_from_bytes_c_panicking(bytes),
+ u64_from_bytes_d_panicking(bytes),
+ u64_from_bytes_e_panicking(bytes),
+ u64_from_bytes_f_panicking(bytes),
+ ];
+ U384(res)
+ }
+
+ pub(super) const fn zero() -> U384 { U384([0, 0, 0, 0, 0, 0]) }
+ pub(super) const fn one() -> U384 { U384([0, 0, 0, 0, 0, 1]) }
+ pub(super) const fn three() -> U384 { U384([0, 0, 0, 0, 0, 3]) }
+}
+
+impl<M: PrimeModulus<U384>> U384Mod<M> {
+ const fn mont_reduction(mu: [u64; 12]) -> Self {
+ #[cfg(debug_assertions)] {
+ // Check NEGATIVE_PRIME_INV_MOD_R is correct. Since this is all const, the compiler
+ // should be able to do it at compile time alone.
+ let minus_one_mod_r = mul_6(&M::PRIME.0, &M::NEGATIVE_PRIME_INV_MOD_R.0);
+ assert!(slice_equal(const_subslice(&minus_one_mod_r, 6, 12), &[0xffff_ffff_ffff_ffff; 6]));
+ }
+
+ #[cfg(debug_assertions)] {
+ // Check R_SQUARED_MOD_PRIME is correct. Since this is all const, the compiler
+ // should be able to do it at compile time alone.
+ let r_minus_one = [0xffff_ffff_ffff_ffff; 6];
+ let (mut r_mod_prime, _) = sub_6(&r_minus_one, &M::PRIME.0);
+ add_one!(r_mod_prime);
+ let r_squared = sqr_6(&r_mod_prime);
+ let mut prime_extended = [0; 12];
+ let prime = M::PRIME.0;
+ copy_from_slice!(prime_extended, 6, 12, prime);
+ let (_, r_squared_mod_prime) = if let Ok(v) = div_rem_12(&r_squared, &prime_extended) { v } else { panic!() };
+ assert!(slice_greater_than(&prime_extended, &r_squared_mod_prime));
+ assert!(slice_equal(const_subslice(&r_squared_mod_prime, 6, 12), &M::R_SQUARED_MOD_PRIME.0));
+ }
+
+ let mu_mod_r = const_subslice(&mu, 6, 12);
+ let mut v = mul_6(&mu_mod_r, &M::NEGATIVE_PRIME_INV_MOD_R.0);
+ const ZEROS: &[u64; 6] = &[0; 6];
+ copy_from_slice!(v, 0, 6, ZEROS); // mod R
+ let t0 = mul_6(const_subslice(&v, 6, 12), &M::PRIME.0);
+ let (t1, t1_extra_bit) = add_12(&t0, &mu);
+ let t1_on_r = const_subslice(&t1, 0, 6);
+ let mut res = [0; 6];
+ if t1_extra_bit || slice_greater_than(&t1_on_r, &M::PRIME.0) {
+ let underflow;
+ (res, underflow) = sub_6(&t1_on_r, &M::PRIME.0);
+ debug_assert!(t1_extra_bit == underflow);
+ } else {
+ copy_from_slice!(res, 0, 6, t1_on_r);
+ }
+ Self(U384(res), PhantomData)
+ }
+
+ pub(super) const fn from_u384_panicking(v: U384) -> Self {
+ assert!(v.0[0] <= M::PRIME.0[0]);
+ if v.0[0] == M::PRIME.0[0] {
+ assert!(v.0[1] <= M::PRIME.0[1]);
+ if v.0[1] == M::PRIME.0[1] {
+ assert!(v.0[2] <= M::PRIME.0[2]);
+ if v.0[2] == M::PRIME.0[2] {
+ assert!(v.0[3] <= M::PRIME.0[3]);
+ if v.0[3] == M::PRIME.0[3] {
+ assert!(v.0[4] <= M::PRIME.0[4]);
+ if v.0[4] == M::PRIME.0[4] {
+ assert!(v.0[5] < M::PRIME.0[5]);
+ }
+ }
+ }
+ }
+ }
+ assert!(M::PRIME.0[0] != 0 || M::PRIME.0[1] != 0 || M::PRIME.0[2] != 0
+ || M::PRIME.0[3] != 0|| M::PRIME.0[4] != 0|| M::PRIME.0[5] != 0);
+ Self::mont_reduction(mul_6(&M::R_SQUARED_MOD_PRIME.0, &v.0))
+ }
+
+ pub(super) fn from_u384(mut v: U384) -> Self {
+ debug_assert!(M::PRIME.0 != [0; 6]);
+ debug_assert!(M::PRIME.0[0] > (1 << 63), "PRIME should have the top bit set");
+ while v >= M::PRIME {
+ let (new_v, spurious_underflow) = sub_6(&v.0, &M::PRIME.0);
+ debug_assert!(!spurious_underflow);
+ v = U384(new_v);
+ }
+ Self::mont_reduction(mul_6(&M::R_SQUARED_MOD_PRIME.0, &v.0))
+ }
+
+ pub(super) fn from_modinv_of(v: U384) -> Result<Self, ()> {
+ Ok(Self::from_u384(U384(mod_inv_6(&v.0, &M::PRIME.0)?)))
+ }
+
+ /// Multiplies `self` * `b` mod `m`.
+ ///
+ /// Panics if `self`'s modulus is not equal to `b`'s
+ pub(super) fn mul(&self, b: &Self) -> Self {
+ Self::mont_reduction(mul_6(&self.0.0, &b.0.0))
+ }
+
+ /// Doubles `self` mod `m`.
+ pub(super) fn double(&self) -> Self {
+ let mut res = self.0.0;
+ let overflow = double!(res);
+ if overflow || !slice_greater_than(&M::PRIME.0, &res) {
+ let underflow;
+ (res, underflow) = sub_6(&res, &M::PRIME.0);
+ debug_assert_eq!(overflow, underflow);
+ }
+ Self(U384(res), PhantomData)
+ }
+
+ /// Multiplies `self` by 3 mod `m`.
+ pub(super) fn times_three(&self) -> Self {
+ // TODO: Optimize this a lot
+ self.mul(&U384Mod::from_u384(U384::three()))
+ }
+
+ /// Multiplies `self` by 4 mod `m`.
+ pub(super) fn times_four(&self) -> Self {
+ // TODO: Optimize this somewhat?
+ self.double().double()
+ }
+
+ /// Multiplies `self` by 8 mod `m`.
+ pub(super) fn times_eight(&self) -> Self {
+ // TODO: Optimize this somewhat?
+ self.double().double().double()
+ }
+
+ /// Multiplies `self` by 8 mod `m`.
+ pub(super) fn square(&self) -> Self {
+ Self::mont_reduction(sqr_6(&self.0.0))
+ }
+
+ /// Subtracts `b` from `self` % `m`.
+ pub(super) fn sub(&self, b: &Self) -> Self {
+ let (mut val, underflow) = sub_6(&self.0.0, &b.0.0);
+ if underflow {
+ let overflow;
+ (val, overflow) = add_6(&val, &M::PRIME.0);
+ debug_assert_eq!(overflow, underflow);
+ }
+ Self(U384(val), PhantomData)
+ }
+
+ /// Adds `b` to `self` % `m`.
+ pub(super) fn add(&self, b: &Self) -> Self {
+ let (mut val, overflow) = add_6(&self.0.0, &b.0.0);
+ if overflow || !slice_greater_than(&M::PRIME.0, &val) {
+ let underflow;
+ (val, underflow) = sub_6(&val, &M::PRIME.0);
+ debug_assert_eq!(overflow, underflow);
+ }
+ Self(U384(val), PhantomData)
+ }
+
+ /// Returns the underlying [`U384`].
+ pub(super) fn into_u384(self) -> U384 {
+ let mut expanded_self = [0; 12];
+ expanded_self[6..].copy_from_slice(&self.0.0);
+ Self::mont_reduction(expanded_self).0
+ }
+}
+
+#[cfg(fuzzing)]
+mod fuzz_moduli {
+ use super::*;
+
+ pub struct P256();
+ impl PrimeModulus<U256> for P256 {
+ const PRIME: U256 = U256::from_32_be_bytes_panicking(&hex_lit::hex!(
+ "ffffffff00000001000000000000000000000000ffffffffffffffffffffffff"));
+ const R_SQUARED_MOD_PRIME: U256 = U256::from_32_be_bytes_panicking(&hex_lit::hex!(
+ "00000004fffffffdfffffffffffffffefffffffbffffffff0000000000000003"));
+ const NEGATIVE_PRIME_INV_MOD_R: U256 = U256::from_32_be_bytes_panicking(&hex_lit::hex!(
+ "ffffffff00000002000000000000000000000001000000000000000000000001"));
+ }
+
+ pub struct P384();
+ impl PrimeModulus<U384> for P384 {
+ const PRIME: U384 = U384::from_48_be_bytes_panicking(&hex_lit::hex!(
+ "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff"));
+ const R_SQUARED_MOD_PRIME: U384 = U384::from_48_be_bytes_panicking(&hex_lit::hex!(
+ "000000000000000000000000000000010000000200000000fffffffe000000000000000200000000fffffffe00000001"));
+ const NEGATIVE_PRIME_INV_MOD_R: U384 = U384::from_48_be_bytes_panicking(&hex_lit::hex!(
+ "00000014000000140000000c00000002fffffffcfffffffafffffffbfffffffe00000000000000010000000100000001"));
+ }
+}
+