1 //! Simple variable-time big integer implementation
4 use core::marker::PhantomData;
6 const WORD_COUNT_4096: usize = 4096 / 64;
7 const WORD_COUNT_256: usize = 256 / 64;
8 const WORD_COUNT_384: usize = 384 / 64;
10 // RFC 5702 indicates RSA keys can be up to 4096 bits
11 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
12 pub(super) struct U4096([u64; WORD_COUNT_4096]);
14 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
15 pub(super) struct U256([u64; WORD_COUNT_256]);
17 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
18 pub(super) struct U384([u64; WORD_COUNT_384]);
20 pub(super) trait Int: Clone + Ord + Sized {
23 fn from_be_bytes(b: &[u8]) -> Result<Self, ()>;
24 fn limbs(&self) -> &[u64];
27 const ZERO: U256 = U256([0; 4]);
28 const BYTES: usize = 32;
29 fn from_be_bytes(b: &[u8]) -> Result<Self, ()> { Self::from_be_bytes(b) }
30 fn limbs(&self) -> &[u64] { &self.0 }
33 const ZERO: U384 = U384([0; 6]);
34 const BYTES: usize = 48;
35 fn from_be_bytes(b: &[u8]) -> Result<Self, ()> { Self::from_be_bytes(b) }
36 fn limbs(&self) -> &[u64] { &self.0 }
39 /// Defines a *PRIME* Modulus
40 pub(super) trait PrimeModulus<I: Int> {
42 const R_SQUARED_MOD_PRIME: I;
43 const NEGATIVE_PRIME_INV_MOD_R: I;
46 #[derive(Clone, Debug, PartialEq, Eq)] // Ord doesn't make sense cause we have an R factor
47 pub(super) struct U256Mod<M: PrimeModulus<U256>>(U256, PhantomData<M>);
49 #[derive(Clone, Debug, PartialEq, Eq)] // Ord doesn't make sense cause we have an R factor
50 pub(super) struct U384Mod<M: PrimeModulus<U384>>(U384, PhantomData<M>);
52 macro_rules! debug_unwrap { ($v: expr) => { {
54 debug_assert!(v.is_ok());
57 Err(e) => return Err(e),
61 // Various const versions of existing slice utilities
62 /// Const version of `&a[start..end]`
63 const fn const_subslice<'a, T>(a: &'a [T], start: usize, end: usize) -> &'a [T] {
64 assert!(start <= a.len());
65 assert!(end <= a.len());
66 assert!(end >= start);
67 let mut startptr = a.as_ptr();
68 startptr = unsafe { startptr.add(start) };
69 let len = end - start;
70 // The docs for from_raw_parts do not mention any requirements that the pointer be valid if the
71 // length is zero, aside from requiring proper alignment (which is met here). Thus,
72 // one-past-the-end should be an acceptable pointer for a 0-length slice.
73 unsafe { alloc::slice::from_raw_parts(startptr, len) }
76 /// Const version of `dest[dest_start..dest_end].copy_from_slice(source)`
78 /// Once `const_mut_refs` is stable we can convert this to a function
79 macro_rules! copy_from_slice {
80 ($dest: ident, $dest_start: expr, $dest_end: expr, $source: ident) => { {
81 let dest_start = $dest_start;
82 let dest_end = $dest_end;
83 assert!(dest_start <= $dest.len());
84 assert!(dest_end <= $dest.len());
85 assert!(dest_end >= dest_start);
86 assert!(dest_end - dest_start == $source.len());
88 while i < $source.len() {
89 $dest[i + dest_start] = $source[i];
95 /// Const version of a > b
96 const fn slice_greater_than(a: &[u64], b: &[u64]) -> bool {
97 debug_assert!(a.len() == b.len());
98 let len = if a.len() <= b.len() { a.len() } else { b.len() };
101 if a[i] > b[i] { return true; }
102 else if a[i] < b[i] { return false; }
108 /// Const version of a == b
109 const fn slice_equal(a: &[u64], b: &[u64]) -> bool {
110 debug_assert!(a.len() == b.len());
111 let len = if a.len() <= b.len() { a.len() } else { b.len() };
114 if a[i] != b[i] { return false; }
120 /// Adds one in-place, returning an overflow flag, in which case one out-of-bounds high bit is
121 /// implicitly included in the result.
123 /// Once `const_mut_refs` is stable we can convert this to a function
124 macro_rules! add_one { ($a: ident) => { {
129 let (v, carry) = $a[len - 1 - i].overflowing_add(1);
131 if !carry { res = false; break; }
137 /// Negates the given u64 slice.
139 /// Once `const_mut_refs` is stable we can convert this to a function
140 macro_rules! negate { ($v: ident) => { {
143 $v[i] ^= 0xffff_ffff_ffff_ffff;
146 let overflow = add_one!($v);
147 debug_assert!(!overflow);
150 /// Doubles in-place, returning an overflow flag, in which case one out-of-bounds high bit is
151 /// implicitly included in the result.
153 /// Once `const_mut_refs` is stable we can convert this to a function
154 macro_rules! double { ($a: ident) => { {
155 { let _: &[u64] = &$a; } // Force type resolution
157 let mut carry = false;
160 let mut next_carry = ($a[len - 1 - i] & (1 << 63)) != 0;
161 let (v, next_carry_2) = ($a[len - 1 - i] << 1).overflowing_add(carry as u64);
163 debug_assert!(!next_carry || !next_carry_2);
164 next_carry |= next_carry_2;
171 macro_rules! define_add { ($name: ident, $len: expr) => {
172 /// Adds two $len-64-bit integers together, returning a new $len-64-bit integer and an overflow
173 /// bit, with the same semantics as the std [`u64::overflowing_add`] method.
174 const fn $name(a: &[u64], b: &[u64]) -> ([u64; $len], bool) {
175 debug_assert!(a.len() == $len);
176 debug_assert!(b.len() == $len);
177 let mut r = [0; $len];
178 let mut carry = false;
181 let pos = $len - 1 - i;
182 let (v, mut new_carry) = a[pos].overflowing_add(b[pos]);
183 let (v2, new_new_carry) = v.overflowing_add(carry as u64);
184 new_carry |= new_new_carry;
193 define_add!(add_2, 2);
194 define_add!(add_3, 3);
195 define_add!(add_4, 4);
196 define_add!(add_6, 6);
197 define_add!(add_8, 8);
198 define_add!(add_12, 12);
199 define_add!(add_16, 16);
200 define_add!(add_32, 32);
201 define_add!(add_64, 64);
202 define_add!(add_128, 128);
204 macro_rules! define_sub { ($name: ident, $name_abs: ident, $len: expr) => {
205 /// Subtracts the `b` $len-64-bit integer from the `a` $len-64-bit integer, returning a new
206 /// $len-64-bit integer and an overflow bit, with the same semantics as the std
207 /// [`u64::overflowing_sub`] method.
208 const fn $name(a: &[u64], b: &[u64]) -> ([u64; $len], bool) {
209 debug_assert!(a.len() == $len);
210 debug_assert!(b.len() == $len);
211 let mut r = [0; $len];
212 let mut carry = false;
215 let pos = $len - 1 - i;
216 let (v, mut new_carry) = a[pos].overflowing_sub(b[pos]);
217 let (v2, new_new_carry) = v.overflowing_sub(carry as u64);
218 new_carry |= new_new_carry;
226 /// Subtracts the `b` $len-64-bit integer from the `a` $len-64-bit integer, returning a new
227 /// $len-64-bit integer representing the absolute value of the result, as well as a sign bit.
229 const fn $name_abs(a: &[u64], b: &[u64]) -> ([u64; $len], bool) {
230 let (mut res, neg) = $name(a, b);
238 define_sub!(sub_2, sub_abs_2, 2);
239 define_sub!(sub_3, sub_abs_3, 3);
240 define_sub!(sub_4, sub_abs_4, 4);
241 define_sub!(sub_6, sub_abs_6, 6);
242 define_sub!(sub_8, sub_abs_8, 8);
243 define_sub!(sub_12, sub_abs_12, 12);
244 define_sub!(sub_16, sub_abs_16, 16);
245 define_sub!(sub_32, sub_abs_32, 32);
246 define_sub!(sub_64, sub_abs_64, 64);
247 define_sub!(sub_128, sub_abs_128, 128);
249 /// Multiplies two 128-bit integers together, returning a new 256-bit integer.
251 /// This is the base case for our multiplication, taking advantage of Rust's native 128-bit int
252 /// types to do multiplication (potentially) natively.
253 const fn mul_2(a: &[u64], b: &[u64]) -> [u64; 4] {
254 debug_assert!(a.len() == 2);
255 debug_assert!(b.len() == 2);
257 // Gradeschool multiplication is way faster here.
258 let (a0, a1) = (a[0] as u128, a[1] as u128);
259 let (b0, b1) = (b[0] as u128, b[1] as u128);
263 let (z1, i_carry) = z1i.overflowing_add(z1j);
266 let z2a = ((z2 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
267 let z1a = ((z1 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
268 let z0a = ((z0 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
269 let z2b = (z2 & 0xffff_ffff_ffff_ffff) as u64;
270 let z1b = (z1 & 0xffff_ffff_ffff_ffff) as u64;
271 let z0b = (z0 & 0xffff_ffff_ffff_ffff) as u64;
274 let (k, j_carry) = z0a.overflowing_add(z1b);
275 let (mut j, mut second_i_carry) = z1a.overflowing_add(z2b);
278 (j, new_i_carry) = j.overflowing_add(j_carry as u64);
279 debug_assert!(!second_i_carry || !new_i_carry);
280 second_i_carry |= new_i_carry;
283 let mut spurious_overflow;
284 (i, spurious_overflow) = i.overflowing_add(i_carry as u64);
285 debug_assert!(!spurious_overflow);
286 (i, spurious_overflow) = i.overflowing_add(second_i_carry as u64);
287 debug_assert!(!spurious_overflow);
292 const fn mul_3(a: &[u64], b: &[u64]) -> [u64; 6] {
293 debug_assert!(a.len() == 3);
294 debug_assert!(b.len() == 3);
296 let (a0, a1, a2) = (a[0] as u128, a[1] as u128, a[2] as u128);
297 let (b0, b1, b2) = (b[0] as u128, b[1] as u128, b[2] as u128);
309 let r5 = ((m4 >> 0) & 0xffff_ffff_ffff_ffff) as u64;
311 let r4a = ((m4 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
312 let r4b = ((m3a >> 0) & 0xffff_ffff_ffff_ffff) as u64;
313 let r4c = ((m3b >> 0) & 0xffff_ffff_ffff_ffff) as u64;
315 let r3a = ((m3a >> 64) & 0xffff_ffff_ffff_ffff) as u64;
316 let r3b = ((m3b >> 64) & 0xffff_ffff_ffff_ffff) as u64;
317 let r3c = ((m2a >> 0 ) & 0xffff_ffff_ffff_ffff) as u64;
318 let r3d = ((m2b >> 0 ) & 0xffff_ffff_ffff_ffff) as u64;
319 let r3e = ((m2c >> 0 ) & 0xffff_ffff_ffff_ffff) as u64;
321 let r2a = ((m2a >> 64) & 0xffff_ffff_ffff_ffff) as u64;
322 let r2b = ((m2b >> 64) & 0xffff_ffff_ffff_ffff) as u64;
323 let r2c = ((m2c >> 64) & 0xffff_ffff_ffff_ffff) as u64;
324 let r2d = ((m1a >> 0 ) & 0xffff_ffff_ffff_ffff) as u64;
325 let r2e = ((m1b >> 0 ) & 0xffff_ffff_ffff_ffff) as u64;
327 let r1a = ((m1a >> 64) & 0xffff_ffff_ffff_ffff) as u64;
328 let r1b = ((m1b >> 64) & 0xffff_ffff_ffff_ffff) as u64;
329 let r1c = ((m0 >> 0 ) & 0xffff_ffff_ffff_ffff) as u64;
331 let r0a = ((m0 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
333 let (r4, r3_ca) = r4a.overflowing_add(r4b);
334 let (r4, r3_cb) = r4.overflowing_add(r4c);
335 let r3_c = r3_ca as u64 + r3_cb as u64;
337 let (r3, r2_ca) = r3a.overflowing_add(r3b);
338 let (r3, r2_cb) = r3.overflowing_add(r3c);
339 let (r3, r2_cc) = r3.overflowing_add(r3d);
340 let (r3, r2_cd) = r3.overflowing_add(r3e);
341 let (r3, r2_ce) = r3.overflowing_add(r3_c);
342 let r2_c = r2_ca as u64 + r2_cb as u64 + r2_cc as u64 + r2_cd as u64 + r2_ce as u64;
344 let (r2, r1_ca) = r2a.overflowing_add(r2b);
345 let (r2, r1_cb) = r2.overflowing_add(r2c);
346 let (r2, r1_cc) = r2.overflowing_add(r2d);
347 let (r2, r1_cd) = r2.overflowing_add(r2e);
348 let (r2, r1_ce) = r2.overflowing_add(r2_c);
349 let r1_c = r1_ca as u64 + r1_cb as u64 + r1_cc as u64 + r1_cd as u64 + r1_ce as u64;
351 let (r1, r0_ca) = r1a.overflowing_add(r1b);
352 let (r1, r0_cb) = r1.overflowing_add(r1c);
353 let (r1, r0_cc) = r1.overflowing_add(r1_c);
354 let r0_c = r0_ca as u64 + r0_cb as u64 + r0_cc as u64;
356 let (r0, must_not_overflow) = r0a.overflowing_add(r0_c);
357 debug_assert!(!must_not_overflow);
359 [r0, r1, r2, r3, r4, r5]
362 macro_rules! define_mul { ($name: ident, $len: expr, $submul: ident, $add: ident, $subadd: ident, $sub: ident, $subsub: ident) => {
363 /// Multiplies two $len-64-bit integers together, returning a new $len*2-64-bit integer.
364 const fn $name(a: &[u64], b: &[u64]) -> [u64; $len * 2] {
365 // We could probably get a bit faster doing gradeschool multiplication for some smaller
366 // sizes, but its easier to just have one variable-length multiplication, so we do
367 // Karatsuba always here.
368 debug_assert!(a.len() == $len);
369 debug_assert!(b.len() == $len);
371 let a0 = const_subslice(a, 0, $len / 2);
372 let a1 = const_subslice(a, $len / 2, $len);
373 let b0 = const_subslice(b, 0, $len / 2);
374 let b1 = const_subslice(b, $len / 2, $len);
376 let z2 = $submul(a0, b0);
377 let z0 = $submul(a1, b1);
379 let (z1a_max, z1a_min, z1a_sign) =
380 if slice_greater_than(&a1, &a0) { (a1, a0, true) } else { (a0, a1, false) };
381 let (z1b_max, z1b_min, z1b_sign) =
382 if slice_greater_than(&b1, &b0) { (b1, b0, true) } else { (b0, b1, false) };
384 let z1a = $subsub(z1a_max, z1a_min);
385 debug_assert!(!z1a.1);
386 let z1b = $subsub(z1b_max, z1b_min);
387 debug_assert!(!z1b.1);
388 let z1m_sign = z1a_sign == z1b_sign;
390 let z1m = $submul(&z1a.0, &z1b.0);
391 let z1n = $add(&z0, &z2);
392 let mut z1_carry = z1n.1;
393 let z1 = if z1m_sign {
394 let r = $sub(&z1n.0, &z1m);
395 if r.1 { z1_carry ^= true; }
398 let r = $add(&z1n.0, &z1m);
399 if r.1 { z1_carry = true; }
403 let l = const_subslice(&z0, $len / 2, $len);
404 let (k, j_carry) = $subadd(const_subslice(&z0, 0, $len / 2), const_subslice(&z1, $len / 2, $len));
405 let (mut j, mut i_carry) = $subadd(const_subslice(&z1, 0, $len / 2), const_subslice(&z2, $len / 2, $len));
407 let new_i_carry = add_one!(j);
408 debug_assert!(!i_carry || !new_i_carry);
409 i_carry |= new_i_carry;
411 let mut i = [0; $len / 2];
412 let i_source = const_subslice(&z2, 0, $len / 2);
413 copy_from_slice!(i, 0, $len / 2, i_source);
415 let spurious_carry = add_one!(i);
416 debug_assert!(!spurious_carry);
419 let spurious_carry = add_one!(i);
420 debug_assert!(!spurious_carry);
423 let mut res = [0; $len * 2];
424 copy_from_slice!(res, $len * 2 * 0 / 4, $len * 2 * 1 / 4, i);
425 copy_from_slice!(res, $len * 2 * 1 / 4, $len * 2 * 2 / 4, j);
426 copy_from_slice!(res, $len * 2 * 2 / 4, $len * 2 * 3 / 4, k);
427 copy_from_slice!(res, $len * 2 * 3 / 4, $len * 2 * 4 / 4, l);
432 define_mul!(mul_4, 4, mul_2, add_4, add_2, sub_4, sub_2);
433 define_mul!(mul_6, 6, mul_3, add_6, add_3, sub_6, sub_3);
434 define_mul!(mul_8, 8, mul_4, add_8, add_4, sub_8, sub_4);
435 define_mul!(mul_16, 16, mul_8, add_16, add_8, sub_16, sub_8);
436 define_mul!(mul_32, 32, mul_16, add_32, add_16, sub_32, sub_16);
437 define_mul!(mul_64, 64, mul_32, add_64, add_32, sub_64, sub_32);
440 /// Squares a 128-bit integer, returning a new 256-bit integer.
442 /// This is the base case for our squaring, taking advantage of Rust's native 128-bit int
443 /// types to do multiplication (potentially) natively.
444 const fn sqr_2(a: &[u64]) -> [u64; 4] {
445 debug_assert!(a.len() == 2);
447 let (a0, a1) = (a[0] as u128, a[1] as u128);
449 let mut z1 = a0 * a1;
450 let i_carry = z1 & (1u128 << 127) != 0;
454 let z2a = ((z2 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
455 let z1a = ((z1 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
456 let z0a = ((z0 >> 64) & 0xffff_ffff_ffff_ffff) as u64;
457 let z2b = (z2 & 0xffff_ffff_ffff_ffff) as u64;
458 let z1b = (z1 & 0xffff_ffff_ffff_ffff) as u64;
459 let z0b = (z0 & 0xffff_ffff_ffff_ffff) as u64;
462 let (k, j_carry) = z0a.overflowing_add(z1b);
463 let (mut j, mut second_i_carry) = z1a.overflowing_add(z2b);
466 (j, new_i_carry) = j.overflowing_add(j_carry as u64);
467 debug_assert!(!second_i_carry || !new_i_carry);
468 second_i_carry |= new_i_carry;
471 let mut spurious_overflow;
472 (i, spurious_overflow) = i.overflowing_add(i_carry as u64);
473 debug_assert!(!spurious_overflow);
474 (i, spurious_overflow) = i.overflowing_add(second_i_carry as u64);
475 debug_assert!(!spurious_overflow);
480 macro_rules! define_sqr { ($name: ident, $len: expr, $submul: ident, $subsqr: ident, $subadd: ident) => {
481 /// Squares a $len-64-bit integers, returning a new $len*2-64-bit integer.
482 const fn $name(a: &[u64]) -> [u64; $len * 2] {
483 debug_assert!(a.len() == $len);
485 let hi = const_subslice(a, 0, $len / 2);
486 let lo = const_subslice(a, $len / 2, $len);
488 let v0 = $subsqr(lo);
489 let mut v1 = $submul(hi, lo);
490 let i_carry = double!(v1);
491 let v2 = $subsqr(hi);
493 let l = const_subslice(&v0, $len / 2, $len);
494 let (k, j_carry) = $subadd(const_subslice(&v0, 0, $len / 2), const_subslice(&v1, $len / 2, $len));
495 let (mut j, mut i_carry_2) = $subadd(const_subslice(&v1, 0, $len / 2), const_subslice(&v2, $len / 2, $len));
497 let mut i = [0; $len / 2];
498 let i_source = const_subslice(&v2, 0, $len / 2);
499 copy_from_slice!(i, 0, $len / 2, i_source);
502 let new_i_carry = add_one!(j);
503 debug_assert!(!i_carry_2 || !new_i_carry);
504 i_carry_2 |= new_i_carry;
507 let spurious_carry = add_one!(i);
508 debug_assert!(!spurious_carry);
511 let spurious_carry = add_one!(i);
512 debug_assert!(!spurious_carry);
515 let mut res = [0; $len * 2];
516 copy_from_slice!(res, $len * 2 * 0 / 4, $len * 2 * 1 / 4, i);
517 copy_from_slice!(res, $len * 2 * 1 / 4, $len * 2 * 2 / 4, j);
518 copy_from_slice!(res, $len * 2 * 2 / 4, $len * 2 * 3 / 4, k);
519 copy_from_slice!(res, $len * 2 * 3 / 4, $len * 2 * 4 / 4, l);
524 // TODO: Write an optimized sqr_3 (though secp384r1 is barely used)
525 const fn sqr_3(a: &[u64]) -> [u64; 6] { mul_3(a, a) }
527 define_sqr!(sqr_4, 4, mul_2, sqr_2, add_2);
528 define_sqr!(sqr_6, 6, mul_3, sqr_3, add_3);
529 define_sqr!(sqr_8, 8, mul_4, sqr_4, add_4);
530 define_sqr!(sqr_16, 16, mul_8, sqr_8, add_8);
531 define_sqr!(sqr_32, 32, mul_16, sqr_16, add_16);
532 define_sqr!(sqr_64, 64, mul_32, sqr_32, add_32);
534 macro_rules! dummy_pre_push { ($name: ident, $len: expr) => {} }
535 macro_rules! vec_pre_push { ($name: ident, $len: expr) => { $name.push([0; $len]); } }
537 macro_rules! define_div_rem { ($name: ident, $len: expr, $sub: ident, $heap_init: expr, $pre_push: ident $(, $const_opt: tt)?) => {
538 /// Divides two $len-64-bit integers, `a` by `b`, returning the quotient and remainder
540 /// Fails iff `b` is zero.
541 $($const_opt)? fn $name(a: &[u64; $len], b: &[u64; $len]) -> Result<([u64; $len], [u64; $len]), ()> {
542 if slice_equal(b, &[0; $len]) { return Err(()); }
545 let mut pow2s = $heap_init;
546 let mut pow2s_count = 0;
547 while slice_greater_than(a, &b_pow) {
548 $pre_push!(pow2s, $len);
549 pow2s[pow2s_count] = b_pow;
551 let double_overflow = double!(b_pow);
552 if double_overflow { break; }
554 let mut quot = [0; $len];
556 let mut pow2 = pow2s_count as isize - 1;
558 let b_pow = pow2s[pow2 as usize];
559 let overflow = double!(quot);
560 debug_assert!(!overflow);
561 if slice_greater_than(&rem, &b_pow) {
562 let (r, carry) = $sub(&rem, &b_pow);
563 debug_assert!(!carry);
569 if slice_equal(&rem, b) {
570 let overflow = add_one!(quot);
571 debug_assert!(!overflow);
572 Ok((quot, [0; $len]))
580 define_div_rem!(div_rem_2, 2, sub_2, [[0; 2]; 2 * 64], dummy_pre_push, const);
581 define_div_rem!(div_rem_4, 4, sub_4, [[0; 4]; 4 * 64], dummy_pre_push, const); // Uses 8 KiB of stack
582 define_div_rem!(div_rem_6, 6, sub_6, [[0; 6]; 6 * 64], dummy_pre_push, const); // Uses 18 KiB of stack!
583 #[cfg(debug_assertions)]
584 define_div_rem!(div_rem_8, 8, sub_8, [[0; 8]; 8 * 64], dummy_pre_push, const); // Uses 32 KiB of stack!
585 #[cfg(debug_assertions)]
586 define_div_rem!(div_rem_12, 12, sub_12, [[0; 12]; 12 * 64], dummy_pre_push, const); // Uses 72 KiB of stack!
587 define_div_rem!(div_rem_64, 64, sub_64, Vec::new(), vec_pre_push); // Uses up to 2 MiB of heap
588 #[cfg(debug_assertions)]
589 define_div_rem!(div_rem_128, 128, sub_128, Vec::new(), vec_pre_push); // Uses up to 8 MiB of heap
591 macro_rules! define_mod_inv { ($name: ident, $len: expr, $div: ident, $add: ident, $sub_abs: ident, $mul: ident) => {
592 /// Calculates the modular inverse of a $len-64-bit number with respect to the given modulus,
594 const fn $name(a: &[u64; $len], m: &[u64; $len]) -> Result<[u64; $len], ()> {
595 if slice_equal(a, &[0; $len]) || slice_equal(m, &[0; $len]) { return Err(()); }
597 let (mut s, mut old_s) = ([0; $len], [0; $len]);
602 let (mut old_s_neg, mut s_neg) = (false, false);
604 while !slice_equal(&r, &[0; $len]) {
605 let (quot, new_r) = debug_unwrap!($div(&old_r, &r));
607 let new_sa = $mul(", &s);
608 debug_assert!(slice_equal(const_subslice(&new_sa, 0, $len), &[0; $len]), "S overflowed");
609 let (new_s, new_s_neg) = match (old_s_neg, s_neg) {
611 let (new_s, overflow) = $add(&old_s, const_subslice(&new_sa, $len, new_sa.len()));
612 debug_assert!(!overflow);
616 let (new_s, overflow) = $add(&old_s, const_subslice(&new_sa, $len, new_sa.len()));
617 debug_assert!(!overflow);
621 let (new_s, overflow) = $add(&old_s, const_subslice(&new_sa, $len, new_sa.len()));
622 debug_assert!(!overflow);
625 (false, false) => $sub_abs(&old_s, const_subslice(&new_sa, $len, new_sa.len())),
637 // At this point old_r contains our GCD and old_s our first Bézout's identity coefficient.
638 if !slice_equal(const_subslice(&old_r, 0, $len - 1), &[0; $len - 1]) || old_r[$len - 1] != 1 {
641 debug_assert!(slice_greater_than(m, &old_s));
643 let (modinv, underflow) = $sub_abs(m, &old_s);
644 debug_assert!(!underflow);
645 debug_assert!(slice_greater_than(m, &modinv));
654 define_mod_inv!(mod_inv_2, 2, div_rem_2, add_2, sub_abs_2, mul_2);
655 define_mod_inv!(mod_inv_4, 4, div_rem_4, add_4, sub_abs_4, mul_4);
656 define_mod_inv!(mod_inv_6, 6, div_rem_6, add_6, sub_abs_6, mul_6);
658 define_mod_inv!(mod_inv_8, 8, div_rem_8, add_8, sub_abs_8, mul_8);
661 /// Constructs a new [`U4096`] from a variable number of big-endian bytes.
662 pub(super) fn from_be_bytes(bytes: &[u8]) -> Result<U4096, ()> {
663 if bytes.len() > 4096/8 { return Err(()); }
664 let u64s = (bytes.len() + 7) / 8;
665 let mut res = [0; WORD_COUNT_4096];
668 let pos = (u64s - i) * 8;
669 let start = bytes.len().saturating_sub(pos);
670 let end = bytes.len() + 8 - pos;
671 b[8 + start - end..].copy_from_slice(&bytes[start..end]);
672 res[i + WORD_COUNT_4096 - u64s] = u64::from_be_bytes(b);
677 /// Naively multiplies `self` * `b` mod `m`, returning a new [`U4096`].
679 /// Fails iff m is 0 or self or b are greater than m.
680 #[cfg(debug_assertions)]
681 fn mulmod_naive(&self, b: &U4096, m: &U4096) -> Result<U4096, ()> {
682 if m.0 == [0; WORD_COUNT_4096] { return Err(()); }
683 if self > m || b > m { return Err(()); }
685 let mul = mul_64(&self.0, &b.0);
687 let mut m_zeros = [0; 128];
688 m_zeros[WORD_COUNT_4096..].copy_from_slice(&m.0);
689 let (_, rem) = div_rem_128(&mul, &m_zeros)?;
690 let mut res = [0; WORD_COUNT_4096];
691 debug_assert_eq!(&rem[..WORD_COUNT_4096], &[0; WORD_COUNT_4096]);
692 res.copy_from_slice(&rem[WORD_COUNT_4096..]);
696 /// Calculates `self` ^ `exp` mod `m`, returning a new [`U4096`].
698 /// Fails iff m is 0, even, or self or b are greater than m.
699 pub(super) fn expmod_odd_mod(&self, mut exp: u32, m: &U4096) -> Result<U4096, ()> {
700 #![allow(non_camel_case_types)]
702 if m.0 == [0; WORD_COUNT_4096] { return Err(()); }
703 if m.0[WORD_COUNT_4096 - 1] & 1 == 0 { return Err(()); }
704 if self > m { return Err(()); }
706 let mut t = [0; WORD_COUNT_4096];
707 if &m.0[..WORD_COUNT_4096 - 1] == &[0; WORD_COUNT_4096 - 1] && m.0[WORD_COUNT_4096 - 1] == 1 {
710 t[WORD_COUNT_4096 - 1] = 1;
711 if exp == 0 { return Ok(U4096(t)); }
713 // Because m is not even, using 2^4096 as the Montgomery R value is always safe - it is
714 // guaranteed to be co-prime with any non-even integer.
716 type mul_ty = fn(&[u64], &[u64]) -> [u64; WORD_COUNT_4096 * 2];
717 type sqr_ty = fn(&[u64]) -> [u64; WORD_COUNT_4096 * 2];
718 type add_double_ty = fn(&[u64], &[u64]) -> ([u64; WORD_COUNT_4096 * 2], bool);
719 type sub_ty = fn(&[u64], &[u64]) -> ([u64; WORD_COUNT_4096], bool);
720 let (word_count, log_bits, mul, sqr, add_double, sub) =
721 if m.0[..WORD_COUNT_4096 / 2] == [0; WORD_COUNT_4096 / 2] {
722 if m.0[..WORD_COUNT_4096 * 3 / 4] == [0; WORD_COUNT_4096 * 3 / 4] {
723 fn mul_16_subarr(a: &[u64], b: &[u64]) -> [u64; WORD_COUNT_4096 * 2] {
724 debug_assert_eq!(a.len(), WORD_COUNT_4096);
725 debug_assert_eq!(b.len(), WORD_COUNT_4096);
726 debug_assert_eq!(&a[..WORD_COUNT_4096 * 3 / 4], &[0; WORD_COUNT_4096 * 3 / 4]);
727 debug_assert_eq!(&b[..WORD_COUNT_4096 * 3 / 4], &[0; WORD_COUNT_4096 * 3 / 4]);
728 let mut res = [0; WORD_COUNT_4096 * 2];
729 res[WORD_COUNT_4096 + WORD_COUNT_4096 / 2..].copy_from_slice(
730 &mul_16(&a[WORD_COUNT_4096 * 3 / 4..], &b[WORD_COUNT_4096 * 3 / 4..]));
733 fn sqr_16_subarr(a: &[u64]) -> [u64; WORD_COUNT_4096 * 2] {
734 debug_assert_eq!(a.len(), WORD_COUNT_4096);
735 debug_assert_eq!(&a[..WORD_COUNT_4096 * 3 / 4], &[0; WORD_COUNT_4096 * 3 / 4]);
736 let mut res = [0; WORD_COUNT_4096 * 2];
737 res[WORD_COUNT_4096 + WORD_COUNT_4096 / 2..].copy_from_slice(
738 &sqr_16(&a[WORD_COUNT_4096 * 3 / 4..]));
741 fn add_32_subarr(a: &[u64], b: &[u64]) -> ([u64; WORD_COUNT_4096 * 2], bool) {
742 debug_assert_eq!(a.len(), WORD_COUNT_4096 * 2);
743 debug_assert_eq!(b.len(), WORD_COUNT_4096 * 2);
744 debug_assert_eq!(&a[..WORD_COUNT_4096 * 3 / 2], &[0; WORD_COUNT_4096 * 3 / 2]);
745 debug_assert_eq!(&b[..WORD_COUNT_4096 * 3 / 2], &[0; WORD_COUNT_4096 * 3 / 2]);
746 let (add, overflow) = add_32(&a[WORD_COUNT_4096 * 3 / 2..], &b[WORD_COUNT_4096 * 3 / 2..]);
747 let mut res = [0; WORD_COUNT_4096 * 2];
748 res[WORD_COUNT_4096 * 3 / 2..].copy_from_slice(&add);
751 fn sub_16_subarr(a: &[u64], b: &[u64]) -> ([u64; WORD_COUNT_4096], bool) {
752 debug_assert_eq!(a.len(), WORD_COUNT_4096);
753 debug_assert_eq!(b.len(), WORD_COUNT_4096);
754 debug_assert_eq!(&a[..WORD_COUNT_4096 * 3 / 4], &[0; WORD_COUNT_4096 * 3 / 4]);
755 debug_assert_eq!(&b[..WORD_COUNT_4096 * 3 / 4], &[0; WORD_COUNT_4096 * 3 / 4]);
756 let (sub, underflow) = sub_16(&a[WORD_COUNT_4096 * 3 / 4..], &b[WORD_COUNT_4096 * 3 / 4..]);
757 let mut res = [0; WORD_COUNT_4096];
758 res[WORD_COUNT_4096 * 3 / 4..].copy_from_slice(&sub);
761 (16, 10, mul_16_subarr as mul_ty, sqr_16_subarr as sqr_ty, add_32_subarr as add_double_ty, sub_16_subarr as sub_ty)
763 fn mul_32_subarr(a: &[u64], b: &[u64]) -> [u64; WORD_COUNT_4096 * 2] {
764 debug_assert_eq!(a.len(), WORD_COUNT_4096);
765 debug_assert_eq!(b.len(), WORD_COUNT_4096);
766 debug_assert_eq!(&a[..WORD_COUNT_4096 / 2], &[0; WORD_COUNT_4096 / 2]);
767 debug_assert_eq!(&b[..WORD_COUNT_4096 / 2], &[0; WORD_COUNT_4096 / 2]);
768 let mut res = [0; WORD_COUNT_4096 * 2];
769 res[WORD_COUNT_4096..].copy_from_slice(
770 &mul_32(&a[WORD_COUNT_4096 / 2..], &b[WORD_COUNT_4096 / 2..]));
773 fn sqr_32_subarr(a: &[u64]) -> [u64; WORD_COUNT_4096 * 2] {
774 debug_assert_eq!(a.len(), WORD_COUNT_4096);
775 debug_assert_eq!(&a[..WORD_COUNT_4096 / 2], &[0; WORD_COUNT_4096 / 2]);
776 let mut res = [0; WORD_COUNT_4096 * 2];
777 res[WORD_COUNT_4096..].copy_from_slice(
778 &sqr_32(&a[WORD_COUNT_4096 / 2..]));
781 fn add_64_subarr(a: &[u64], b: &[u64]) -> ([u64; WORD_COUNT_4096 * 2], bool) {
782 debug_assert_eq!(a.len(), WORD_COUNT_4096 * 2);
783 debug_assert_eq!(b.len(), WORD_COUNT_4096 * 2);
784 debug_assert_eq!(&a[..WORD_COUNT_4096], &[0; WORD_COUNT_4096]);
785 debug_assert_eq!(&b[..WORD_COUNT_4096], &[0; WORD_COUNT_4096]);
786 let (add, overflow) = add_64(&a[WORD_COUNT_4096..], &b[WORD_COUNT_4096..]);
787 let mut res = [0; WORD_COUNT_4096 * 2];
788 res[WORD_COUNT_4096..].copy_from_slice(&add);
791 fn sub_32_subarr(a: &[u64], b: &[u64]) -> ([u64; WORD_COUNT_4096], bool) {
792 debug_assert_eq!(a.len(), WORD_COUNT_4096);
793 debug_assert_eq!(b.len(), WORD_COUNT_4096);
794 debug_assert_eq!(&a[..WORD_COUNT_4096 / 2], &[0; WORD_COUNT_4096 / 2]);
795 debug_assert_eq!(&b[..WORD_COUNT_4096 / 2], &[0; WORD_COUNT_4096 / 2]);
796 let (sub, underflow) = sub_32(&a[WORD_COUNT_4096 / 2..], &b[WORD_COUNT_4096 / 2..]);
797 let mut res = [0; WORD_COUNT_4096];
798 res[WORD_COUNT_4096 / 2..].copy_from_slice(&sub);
801 (32, 11, mul_32_subarr as mul_ty, sqr_32_subarr as sqr_ty, add_64_subarr as add_double_ty, sub_32_subarr as sub_ty)
804 (64, 12, mul_64 as mul_ty, sqr_64 as sqr_ty, add_128 as add_double_ty, sub_64 as sub_ty)
807 let mut r = [0; WORD_COUNT_4096 * 2];
808 r[WORD_COUNT_4096 * 2 - word_count - 1] = 1;
810 let mut m_inv_pos = [0; WORD_COUNT_4096];
811 m_inv_pos[WORD_COUNT_4096 - 1] = 1;
812 let mut two = [0; WORD_COUNT_4096];
813 two[WORD_COUNT_4096 - 1] = 2;
814 for _ in 0..log_bits {
815 let mut m_m_inv = mul(&m_inv_pos, &m.0);
816 m_m_inv[..WORD_COUNT_4096 * 2 - word_count].fill(0);
817 let m_inv = mul(&sub(&two, &m_m_inv[WORD_COUNT_4096..]).0, &m_inv_pos);
818 m_inv_pos[WORD_COUNT_4096 - word_count..].copy_from_slice(&m_inv[WORD_COUNT_4096 * 2 - word_count..]);
820 m_inv_pos[..WORD_COUNT_4096 - word_count].fill(0);
822 // We want the negative modular inverse of m mod R, so subtract m_inv from R.
823 let mut m_inv = m_inv_pos;
825 m_inv[..WORD_COUNT_4096 - word_count].fill(0);
826 debug_assert_eq!(&mul(&m_inv, &m.0)[WORD_COUNT_4096 * 2 - word_count..],
828 &[0xffff_ffff_ffff_ffff; WORD_COUNT_4096][WORD_COUNT_4096 - word_count..]);
830 debug_assert_eq!(&m_inv[..WORD_COUNT_4096 - word_count], &[0; WORD_COUNT_4096][..WORD_COUNT_4096 - word_count]);
832 let mont_reduction = |mu: [u64; WORD_COUNT_4096 * 2]| -> [u64; WORD_COUNT_4096] {
833 debug_assert_eq!(&mu[..WORD_COUNT_4096 * 2 - word_count * 2],
834 &[0; WORD_COUNT_4096 * 2][..WORD_COUNT_4096 * 2 - word_count * 2]);
835 let mut mu_mod_r = [0; WORD_COUNT_4096];
836 mu_mod_r[WORD_COUNT_4096 - word_count..].copy_from_slice(&mu[WORD_COUNT_4096 * 2 - word_count..]);
837 let mut v = mul(&mu_mod_r, &m_inv);
838 v[..WORD_COUNT_4096 * 2 - word_count].fill(0); // mod R
839 let t0 = mul(&v[WORD_COUNT_4096..], &m.0);
840 let (t1, t1_extra_bit) = add_double(&t0, &mu);
841 let mut t1_on_r = [0; WORD_COUNT_4096];
842 debug_assert_eq!(&t1[WORD_COUNT_4096 * 2 - word_count..], &[0; WORD_COUNT_4096][WORD_COUNT_4096 - word_count..],
843 "t1 should be divisible by r");
844 t1_on_r[WORD_COUNT_4096 - word_count..].copy_from_slice(&t1[WORD_COUNT_4096 * 2 - word_count * 2..WORD_COUNT_4096 * 2 - word_count]);
845 if t1_extra_bit || t1_on_r >= m.0 {
847 (t1_on_r, underflow) = sub(&t1_on_r, &m.0);
848 debug_assert_eq!(t1_extra_bit, underflow);
853 // Calculate R^2 mod m as ((2^DOUBLES * R) mod m)^(log_bits - LOG2_DOUBLES) mod R
854 let mut r_minus_one = [0xffff_ffff_ffff_ffffu64; WORD_COUNT_4096];
855 r_minus_one[..WORD_COUNT_4096 - word_count].fill(0);
856 // While we do a full div here, in general R should be less than 2x m (assuming the RSA
857 // modulus used its full bit range and is 1024, 2048, or 4096 bits), so it should be cheap.
858 // In cases with a nonstandard RSA modulus we may end up being pretty slow here, but we'll
860 // If we ever find a problem with this we should reduce R to be tigher on m, as we're
861 // wasting extra bits of calculation if R is too far from m.
862 let (_, mut r_mod_m) = debug_unwrap!(div_rem_64(&r_minus_one, &m.0));
863 let r_mod_m_overflow = add_one!(r_mod_m);
864 if r_mod_m_overflow || r_mod_m >= m.0 {
865 (r_mod_m, _) = sub_64(&r_mod_m, &m.0);
868 let mut r2_mod_m: [u64; 64] = r_mod_m;
869 const DOUBLES: usize = 32;
870 const LOG2_DOUBLES: usize = 5;
872 for _ in 0..DOUBLES {
873 let overflow = double!(r2_mod_m);
874 if overflow || r2_mod_m > m.0 {
875 (r2_mod_m, _) = sub_64(&r2_mod_m, &m.0);
878 for _ in 0..log_bits - LOG2_DOUBLES {
879 r2_mod_m = mont_reduction(sqr(&r2_mod_m));
881 // Clear excess high bits
882 for (m_limb, r2_limb) in m.0.iter().zip(r2_mod_m.iter_mut()) {
883 let clear_bits = m_limb.leading_zeros();
884 if clear_bits == 0 { break; }
885 *r2_limb &= !(0xffff_ffff_ffff_ffffu64 << (64 - clear_bits));
886 if *m_limb != 0 { break; }
888 debug_assert!(r2_mod_m < m.0);
890 // Calculate t * R and a * R as mont multiplications by R^2 mod m
891 let mut tr = mont_reduction(mul(&r2_mod_m, &t));
892 let mut ar = mont_reduction(mul(&r2_mod_m, &self.0));
894 #[cfg(debug_assertions)] {
895 debug_assert_eq!(r2_mod_m, U4096(r_mod_m).mulmod_naive(&U4096(r_mod_m), &m).unwrap().0);
896 debug_assert_eq!(&tr, &U4096(t).mulmod_naive(&U4096(r_mod_m), &m).unwrap().0);
897 debug_assert_eq!(&ar, &self.mulmod_naive(&U4096(r_mod_m), &m).unwrap().0);
902 tr = mont_reduction(mul(&tr, &ar));
905 ar = mont_reduction(sqr(&ar));
908 ar = mont_reduction(mul(&ar, &tr));
909 let mut resr = [0; WORD_COUNT_4096 * 2];
910 resr[WORD_COUNT_4096..].copy_from_slice(&ar);
911 Ok(U4096(mont_reduction(resr)))
915 const fn u64_from_bytes_a_panicking(b: &[u8]) -> u64 {
917 [a, b, c, d, e, f, g, h, ..] => {
918 ((*a as u64) << 8*7) |
919 ((*b as u64) << 8*6) |
920 ((*c as u64) << 8*5) |
921 ((*d as u64) << 8*4) |
922 ((*e as u64) << 8*3) |
923 ((*f as u64) << 8*2) |
924 ((*g as u64) << 8*1) |
931 const fn u64_from_bytes_b_panicking(b: &[u8]) -> u64 {
933 [_, _, _, _, _, _, _, _,
934 a, b, c, d, e, f, g, h, ..] => {
935 ((*a as u64) << 8*7) |
936 ((*b as u64) << 8*6) |
937 ((*c as u64) << 8*5) |
938 ((*d as u64) << 8*4) |
939 ((*e as u64) << 8*3) |
940 ((*f as u64) << 8*2) |
941 ((*g as u64) << 8*1) |
948 const fn u64_from_bytes_c_panicking(b: &[u8]) -> u64 {
950 [_, _, _, _, _, _, _, _,
951 _, _, _, _, _, _, _, _,
952 a, b, c, d, e, f, g, h, ..] => {
953 ((*a as u64) << 8*7) |
954 ((*b as u64) << 8*6) |
955 ((*c as u64) << 8*5) |
956 ((*d as u64) << 8*4) |
957 ((*e as u64) << 8*3) |
958 ((*f as u64) << 8*2) |
959 ((*g as u64) << 8*1) |
966 const fn u64_from_bytes_d_panicking(b: &[u8]) -> u64 {
968 [_, _, _, _, _, _, _, _,
969 _, _, _, _, _, _, _, _,
970 _, _, _, _, _, _, _, _,
971 a, b, c, d, e, f, g, h, ..] => {
972 ((*a as u64) << 8*7) |
973 ((*b as u64) << 8*6) |
974 ((*c as u64) << 8*5) |
975 ((*d as u64) << 8*4) |
976 ((*e as u64) << 8*3) |
977 ((*f as u64) << 8*2) |
978 ((*g as u64) << 8*1) |
985 const fn u64_from_bytes_e_panicking(b: &[u8]) -> u64 {
987 [_, _, _, _, _, _, _, _,
988 _, _, _, _, _, _, _, _,
989 _, _, _, _, _, _, _, _,
990 _, _, _, _, _, _, _, _,
991 a, b, c, d, e, f, g, h, ..] => {
992 ((*a as u64) << 8*7) |
993 ((*b as u64) << 8*6) |
994 ((*c as u64) << 8*5) |
995 ((*d as u64) << 8*4) |
996 ((*e as u64) << 8*3) |
997 ((*f as u64) << 8*2) |
998 ((*g as u64) << 8*1) |
1005 const fn u64_from_bytes_f_panicking(b: &[u8]) -> u64 {
1007 [_, _, _, _, _, _, _, _,
1008 _, _, _, _, _, _, _, _,
1009 _, _, _, _, _, _, _, _,
1010 _, _, _, _, _, _, _, _,
1011 _, _, _, _, _, _, _, _,
1012 a, b, c, d, e, f, g, h, ..] => {
1013 ((*a as u64) << 8*7) |
1014 ((*b as u64) << 8*6) |
1015 ((*c as u64) << 8*5) |
1016 ((*d as u64) << 8*4) |
1017 ((*e as u64) << 8*3) |
1018 ((*f as u64) << 8*2) |
1019 ((*g as u64) << 8*1) |
1020 ((*h as u64) << 8*0)
1027 /// Constructs a new [`U256`] from a variable number of big-endian bytes.
1028 pub(super) fn from_be_bytes(bytes: &[u8]) -> Result<U256, ()> {
1029 if bytes.len() > 256/8 { return Err(()); }
1030 let u64s = (bytes.len() + 7) / 8;
1031 let mut res = [0; WORD_COUNT_256];
1034 let pos = (u64s - i) * 8;
1035 let start = bytes.len().saturating_sub(pos);
1036 let end = bytes.len() + 8 - pos;
1037 b[8 + start - end..].copy_from_slice(&bytes[start..end]);
1038 res[i + WORD_COUNT_256 - u64s] = u64::from_be_bytes(b);
1043 /// Constructs a new [`U256`] from a fixed number of big-endian bytes.
1044 pub(super) const fn from_32_be_bytes_panicking(bytes: &[u8; 32]) -> U256 {
1046 u64_from_bytes_a_panicking(bytes),
1047 u64_from_bytes_b_panicking(bytes),
1048 u64_from_bytes_c_panicking(bytes),
1049 u64_from_bytes_d_panicking(bytes),
1054 pub(super) const fn zero() -> U256 { U256([0, 0, 0, 0]) }
1055 pub(super) const fn one() -> U256 { U256([0, 0, 0, 1]) }
1056 pub(super) const fn three() -> U256 { U256([0, 0, 0, 3]) }
1059 impl<M: PrimeModulus<U256>> U256Mod<M> {
1060 const fn mont_reduction(mu: [u64; 8]) -> Self {
1061 #[cfg(debug_assertions)] {
1062 // Check NEGATIVE_PRIME_INV_MOD_R is correct. Since this is all const, the compiler
1063 // should be able to do it at compile time alone.
1064 let minus_one_mod_r = mul_4(&M::PRIME.0, &M::NEGATIVE_PRIME_INV_MOD_R.0);
1065 assert!(slice_equal(const_subslice(&minus_one_mod_r, 4, 8), &[0xffff_ffff_ffff_ffff; 4]));
1068 #[cfg(debug_assertions)] {
1069 // Check R_SQUARED_MOD_PRIME is correct. Since this is all const, the compiler
1070 // should be able to do it at compile time alone.
1071 let r_minus_one = [0xffff_ffff_ffff_ffff; 4];
1072 let (mut r_mod_prime, _) = sub_4(&r_minus_one, &M::PRIME.0);
1073 add_one!(r_mod_prime);
1074 let r_squared = sqr_4(&r_mod_prime);
1075 let mut prime_extended = [0; 8];
1076 let prime = M::PRIME.0;
1077 copy_from_slice!(prime_extended, 4, 8, prime);
1078 let (_, r_squared_mod_prime) = if let Ok(v) = div_rem_8(&r_squared, &prime_extended) { v } else { panic!() };
1079 assert!(slice_greater_than(&prime_extended, &r_squared_mod_prime));
1080 assert!(slice_equal(const_subslice(&r_squared_mod_prime, 4, 8), &M::R_SQUARED_MOD_PRIME.0));
1083 let mu_mod_r = const_subslice(&mu, 4, 8);
1084 let mut v = mul_4(&mu_mod_r, &M::NEGATIVE_PRIME_INV_MOD_R.0);
1085 const ZEROS: &[u64; 4] = &[0; 4];
1086 copy_from_slice!(v, 0, 4, ZEROS); // mod R
1087 let t0 = mul_4(const_subslice(&v, 4, 8), &M::PRIME.0);
1088 let (t1, t1_extra_bit) = add_8(&t0, &mu);
1089 let t1_on_r = const_subslice(&t1, 0, 4);
1090 let mut res = [0; 4];
1091 if t1_extra_bit || slice_greater_than(&t1_on_r, &M::PRIME.0) {
1093 (res, underflow) = sub_4(&t1_on_r, &M::PRIME.0);
1094 debug_assert!(t1_extra_bit == underflow);
1096 copy_from_slice!(res, 0, 4, t1_on_r);
1098 Self(U256(res), PhantomData)
1101 pub(super) const fn from_u256_panicking(v: U256) -> Self {
1102 assert!(v.0[0] <= M::PRIME.0[0]);
1103 if v.0[0] == M::PRIME.0[0] {
1104 assert!(v.0[1] <= M::PRIME.0[1]);
1105 if v.0[1] == M::PRIME.0[1] {
1106 assert!(v.0[2] <= M::PRIME.0[2]);
1107 if v.0[2] == M::PRIME.0[2] {
1108 assert!(v.0[3] < M::PRIME.0[3]);
1112 assert!(M::PRIME.0[0] != 0 || M::PRIME.0[1] != 0 || M::PRIME.0[2] != 0 || M::PRIME.0[3] != 0);
1113 Self::mont_reduction(mul_4(&M::R_SQUARED_MOD_PRIME.0, &v.0))
1116 pub(super) fn from_u256(mut v: U256) -> Self {
1117 debug_assert!(M::PRIME.0 != [0; 4]);
1118 debug_assert!(M::PRIME.0[0] > (1 << 63), "PRIME should have the top bit set");
1119 while v >= M::PRIME {
1120 let (new_v, spurious_underflow) = sub_4(&v.0, &M::PRIME.0);
1121 debug_assert!(!spurious_underflow);
1124 Self::mont_reduction(mul_4(&M::R_SQUARED_MOD_PRIME.0, &v.0))
1127 pub(super) fn from_modinv_of(v: U256) -> Result<Self, ()> {
1128 Ok(Self::from_u256(U256(mod_inv_4(&v.0, &M::PRIME.0)?)))
1131 /// Multiplies `self` * `b` mod `m`.
1133 /// Panics if `self`'s modulus is not equal to `b`'s
1134 pub(super) fn mul(&self, b: &Self) -> Self {
1135 Self::mont_reduction(mul_4(&self.0.0, &b.0.0))
1138 /// Doubles `self` mod `m`.
1139 pub(super) fn double(&self) -> Self {
1140 let mut res = self.0.0;
1141 let overflow = double!(res);
1142 if overflow || !slice_greater_than(&M::PRIME.0, &res) {
1144 (res, underflow) = sub_4(&res, &M::PRIME.0);
1145 debug_assert_eq!(overflow, underflow);
1147 Self(U256(res), PhantomData)
1150 /// Multiplies `self` by 3 mod `m`.
1151 pub(super) fn times_three(&self) -> Self {
1152 // TODO: Optimize this a lot
1153 self.mul(&U256Mod::from_u256(U256::three()))
1156 /// Multiplies `self` by 4 mod `m`.
1157 pub(super) fn times_four(&self) -> Self {
1158 // TODO: Optimize this somewhat?
1159 self.double().double()
1162 /// Multiplies `self` by 8 mod `m`.
1163 pub(super) fn times_eight(&self) -> Self {
1164 // TODO: Optimize this somewhat?
1165 self.double().double().double()
1168 /// Multiplies `self` by 8 mod `m`.
1169 pub(super) fn square(&self) -> Self {
1170 Self::mont_reduction(sqr_4(&self.0.0))
1173 /// Subtracts `b` from `self` % `m`.
1174 pub(super) fn sub(&self, b: &Self) -> Self {
1175 let (mut val, underflow) = sub_4(&self.0.0, &b.0.0);
1178 (val, overflow) = add_4(&val, &M::PRIME.0);
1179 debug_assert_eq!(overflow, underflow);
1181 Self(U256(val), PhantomData)
1184 /// Adds `b` to `self` % `m`.
1185 pub(super) fn add(&self, b: &Self) -> Self {
1186 let (mut val, overflow) = add_4(&self.0.0, &b.0.0);
1187 if overflow || !slice_greater_than(&M::PRIME.0, &val) {
1189 (val, underflow) = sub_4(&val, &M::PRIME.0);
1190 debug_assert_eq!(overflow, underflow);
1192 Self(U256(val), PhantomData)
1195 /// Returns the underlying [`U256`].
1196 pub(super) fn into_u256(self) -> U256 {
1197 let mut expanded_self = [0; 8];
1198 expanded_self[4..].copy_from_slice(&self.0.0);
1199 Self::mont_reduction(expanded_self).0
1204 /// Constructs a new [`U384`] from a variable number of big-endian bytes.
1205 pub(super) fn from_be_bytes(bytes: &[u8]) -> Result<U384, ()> {
1206 if bytes.len() > 384/8 { return Err(()); }
1207 let u64s = (bytes.len() + 7) / 8;
1208 let mut res = [0; WORD_COUNT_384];
1211 let pos = (u64s - i) * 8;
1212 let start = bytes.len().saturating_sub(pos);
1213 let end = bytes.len() + 8 - pos;
1214 b[8 + start - end..].copy_from_slice(&bytes[start..end]);
1215 res[i + WORD_COUNT_384 - u64s] = u64::from_be_bytes(b);
1220 /// Constructs a new [`U384`] from a fixed number of big-endian bytes.
1221 pub(super) const fn from_48_be_bytes_panicking(bytes: &[u8; 48]) -> U384 {
1223 u64_from_bytes_a_panicking(bytes),
1224 u64_from_bytes_b_panicking(bytes),
1225 u64_from_bytes_c_panicking(bytes),
1226 u64_from_bytes_d_panicking(bytes),
1227 u64_from_bytes_e_panicking(bytes),
1228 u64_from_bytes_f_panicking(bytes),
1233 pub(super) const fn zero() -> U384 { U384([0, 0, 0, 0, 0, 0]) }
1234 pub(super) const fn one() -> U384 { U384([0, 0, 0, 0, 0, 1]) }
1235 pub(super) const fn three() -> U384 { U384([0, 0, 0, 0, 0, 3]) }
1238 impl<M: PrimeModulus<U384>> U384Mod<M> {
1239 const fn mont_reduction(mu: [u64; 12]) -> Self {
1240 #[cfg(debug_assertions)] {
1241 // Check NEGATIVE_PRIME_INV_MOD_R is correct. Since this is all const, the compiler
1242 // should be able to do it at compile time alone.
1243 let minus_one_mod_r = mul_6(&M::PRIME.0, &M::NEGATIVE_PRIME_INV_MOD_R.0);
1244 assert!(slice_equal(const_subslice(&minus_one_mod_r, 6, 12), &[0xffff_ffff_ffff_ffff; 6]));
1247 #[cfg(debug_assertions)] {
1248 // Check R_SQUARED_MOD_PRIME is correct. Since this is all const, the compiler
1249 // should be able to do it at compile time alone.
1250 let r_minus_one = [0xffff_ffff_ffff_ffff; 6];
1251 let (mut r_mod_prime, _) = sub_6(&r_minus_one, &M::PRIME.0);
1252 add_one!(r_mod_prime);
1253 let r_squared = sqr_6(&r_mod_prime);
1254 let mut prime_extended = [0; 12];
1255 let prime = M::PRIME.0;
1256 copy_from_slice!(prime_extended, 6, 12, prime);
1257 let (_, r_squared_mod_prime) = if let Ok(v) = div_rem_12(&r_squared, &prime_extended) { v } else { panic!() };
1258 assert!(slice_greater_than(&prime_extended, &r_squared_mod_prime));
1259 assert!(slice_equal(const_subslice(&r_squared_mod_prime, 6, 12), &M::R_SQUARED_MOD_PRIME.0));
1262 let mu_mod_r = const_subslice(&mu, 6, 12);
1263 let mut v = mul_6(&mu_mod_r, &M::NEGATIVE_PRIME_INV_MOD_R.0);
1264 const ZEROS: &[u64; 6] = &[0; 6];
1265 copy_from_slice!(v, 0, 6, ZEROS); // mod R
1266 let t0 = mul_6(const_subslice(&v, 6, 12), &M::PRIME.0);
1267 let (t1, t1_extra_bit) = add_12(&t0, &mu);
1268 let t1_on_r = const_subslice(&t1, 0, 6);
1269 let mut res = [0; 6];
1270 if t1_extra_bit || slice_greater_than(&t1_on_r, &M::PRIME.0) {
1272 (res, underflow) = sub_6(&t1_on_r, &M::PRIME.0);
1273 debug_assert!(t1_extra_bit == underflow);
1275 copy_from_slice!(res, 0, 6, t1_on_r);
1277 Self(U384(res), PhantomData)
1280 pub(super) const fn from_u384_panicking(v: U384) -> Self {
1281 assert!(v.0[0] <= M::PRIME.0[0]);
1282 if v.0[0] == M::PRIME.0[0] {
1283 assert!(v.0[1] <= M::PRIME.0[1]);
1284 if v.0[1] == M::PRIME.0[1] {
1285 assert!(v.0[2] <= M::PRIME.0[2]);
1286 if v.0[2] == M::PRIME.0[2] {
1287 assert!(v.0[3] <= M::PRIME.0[3]);
1288 if v.0[3] == M::PRIME.0[3] {
1289 assert!(v.0[4] <= M::PRIME.0[4]);
1290 if v.0[4] == M::PRIME.0[4] {
1291 assert!(v.0[5] < M::PRIME.0[5]);
1297 assert!(M::PRIME.0[0] != 0 || M::PRIME.0[1] != 0 || M::PRIME.0[2] != 0
1298 || M::PRIME.0[3] != 0|| M::PRIME.0[4] != 0|| M::PRIME.0[5] != 0);
1299 Self::mont_reduction(mul_6(&M::R_SQUARED_MOD_PRIME.0, &v.0))
1302 pub(super) fn from_u384(mut v: U384) -> Self {
1303 debug_assert!(M::PRIME.0 != [0; 6]);
1304 debug_assert!(M::PRIME.0[0] > (1 << 63), "PRIME should have the top bit set");
1305 while v >= M::PRIME {
1306 let (new_v, spurious_underflow) = sub_6(&v.0, &M::PRIME.0);
1307 debug_assert!(!spurious_underflow);
1310 Self::mont_reduction(mul_6(&M::R_SQUARED_MOD_PRIME.0, &v.0))
1313 pub(super) fn from_modinv_of(v: U384) -> Result<Self, ()> {
1314 Ok(Self::from_u384(U384(mod_inv_6(&v.0, &M::PRIME.0)?)))
1317 /// Multiplies `self` * `b` mod `m`.
1319 /// Panics if `self`'s modulus is not equal to `b`'s
1320 pub(super) fn mul(&self, b: &Self) -> Self {
1321 Self::mont_reduction(mul_6(&self.0.0, &b.0.0))
1324 /// Doubles `self` mod `m`.
1325 pub(super) fn double(&self) -> Self {
1326 let mut res = self.0.0;
1327 let overflow = double!(res);
1328 if overflow || !slice_greater_than(&M::PRIME.0, &res) {
1330 (res, underflow) = sub_6(&res, &M::PRIME.0);
1331 debug_assert_eq!(overflow, underflow);
1333 Self(U384(res), PhantomData)
1336 /// Multiplies `self` by 3 mod `m`.
1337 pub(super) fn times_three(&self) -> Self {
1338 // TODO: Optimize this a lot
1339 self.mul(&U384Mod::from_u384(U384::three()))
1342 /// Multiplies `self` by 4 mod `m`.
1343 pub(super) fn times_four(&self) -> Self {
1344 // TODO: Optimize this somewhat?
1345 self.double().double()
1348 /// Multiplies `self` by 8 mod `m`.
1349 pub(super) fn times_eight(&self) -> Self {
1350 // TODO: Optimize this somewhat?
1351 self.double().double().double()
1354 /// Multiplies `self` by 8 mod `m`.
1355 pub(super) fn square(&self) -> Self {
1356 Self::mont_reduction(sqr_6(&self.0.0))
1359 /// Subtracts `b` from `self` % `m`.
1360 pub(super) fn sub(&self, b: &Self) -> Self {
1361 let (mut val, underflow) = sub_6(&self.0.0, &b.0.0);
1364 (val, overflow) = add_6(&val, &M::PRIME.0);
1365 debug_assert_eq!(overflow, underflow);
1367 Self(U384(val), PhantomData)
1370 /// Adds `b` to `self` % `m`.
1371 pub(super) fn add(&self, b: &Self) -> Self {
1372 let (mut val, overflow) = add_6(&self.0.0, &b.0.0);
1373 if overflow || !slice_greater_than(&M::PRIME.0, &val) {
1375 (val, underflow) = sub_6(&val, &M::PRIME.0);
1376 debug_assert_eq!(overflow, underflow);
1378 Self(U384(val), PhantomData)
1381 /// Returns the underlying [`U384`].
1382 pub(super) fn into_u384(self) -> U384 {
1383 let mut expanded_self = [0; 12];
1384 expanded_self[6..].copy_from_slice(&self.0.0);
1385 Self::mont_reduction(expanded_self).0
1394 impl PrimeModulus<U256> for P256 {
1395 const PRIME: U256 = U256::from_32_be_bytes_panicking(&hex_lit::hex!(
1396 "ffffffff00000001000000000000000000000000ffffffffffffffffffffffff"));
1397 const R_SQUARED_MOD_PRIME: U256 = U256::from_32_be_bytes_panicking(&hex_lit::hex!(
1398 "00000004fffffffdfffffffffffffffefffffffbffffffff0000000000000003"));
1399 const NEGATIVE_PRIME_INV_MOD_R: U256 = U256::from_32_be_bytes_panicking(&hex_lit::hex!(
1400 "ffffffff00000002000000000000000000000001000000000000000000000001"));
1404 impl PrimeModulus<U384> for P384 {
1405 const PRIME: U384 = U384::from_48_be_bytes_panicking(&hex_lit::hex!(
1406 "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffeffffffff0000000000000000ffffffff"));
1407 const R_SQUARED_MOD_PRIME: U384 = U384::from_48_be_bytes_panicking(&hex_lit::hex!(
1408 "000000000000000000000000000000010000000200000000fffffffe000000000000000200000000fffffffe00000001"));
1409 const NEGATIVE_PRIME_INV_MOD_R: U384 = U384::from_48_be_bytes_panicking(&hex_lit::hex!(
1410 "00000014000000140000000c00000002fffffffcfffffffafffffffbfffffffe00000000000000010000000100000001"));
1417 /// Read some bytes and use them to test bigint math by comparing results against the `ibig` crate.
1418 pub fn fuzz_math(input: &[u8]) {
1419 if input.len() < 32 || input.len() % 16 != 0 { return; }
1420 let split = core::cmp::min(input.len() / 2, 512);
1421 let (a, b) = input.split_at(core::cmp::min(input.len() / 2, 512));
1422 let b = &b[..split];
1424 let ai = ibig::UBig::from_be_bytes(&a);
1425 let bi = ibig::UBig::from_be_bytes(&b);
1427 let mut a_u64s = Vec::with_capacity(split / 8);
1428 for chunk in a.chunks(8) {
1429 a_u64s.push(u64::from_be_bytes(chunk.try_into().unwrap()));
1431 let mut b_u64s = Vec::with_capacity(split / 8);
1432 for chunk in b.chunks(8) {
1433 b_u64s.push(u64::from_be_bytes(chunk.try_into().unwrap()));
1436 macro_rules! test { ($mul: ident, $sqr: ident, $add: ident, $sub: ident, $div_rem: ident, $mod_inv: ident) => {
1437 let res = $mul(&a_u64s, &b_u64s);
1438 let mut res_bytes = Vec::with_capacity(input.len() / 2);
1440 res_bytes.extend_from_slice(&i.to_be_bytes());
1442 assert_eq!(ibig::UBig::from_be_bytes(&res_bytes), ai.clone() * bi.clone());
1444 debug_assert_eq!($mul(&a_u64s, &a_u64s), $sqr(&a_u64s));
1445 debug_assert_eq!($mul(&b_u64s, &b_u64s), $sqr(&b_u64s));
1447 let (res, carry) = $add(&a_u64s, &b_u64s);
1448 let mut res_bytes = Vec::with_capacity(input.len() / 2 + 1);
1449 if carry { res_bytes.push(1); } else { res_bytes.push(0); }
1451 res_bytes.extend_from_slice(&i.to_be_bytes());
1453 assert_eq!(ibig::UBig::from_be_bytes(&res_bytes), ai.clone() + bi.clone());
1455 let mut add_u64s = a_u64s.clone();
1456 let carry = add_one!(add_u64s);
1457 let mut res_bytes = Vec::with_capacity(input.len() / 2 + 1);
1458 if carry { res_bytes.push(1); } else { res_bytes.push(0); }
1459 for i in &add_u64s {
1460 res_bytes.extend_from_slice(&i.to_be_bytes());
1462 assert_eq!(ibig::UBig::from_be_bytes(&res_bytes), ai.clone() + 1);
1464 let mut double_u64s = b_u64s.clone();
1465 let carry = double!(double_u64s);
1466 let mut res_bytes = Vec::with_capacity(input.len() / 2 + 1);
1467 if carry { res_bytes.push(1); } else { res_bytes.push(0); }
1468 for i in &double_u64s {
1469 res_bytes.extend_from_slice(&i.to_be_bytes());
1471 assert_eq!(ibig::UBig::from_be_bytes(&res_bytes), bi.clone() * 2);
1473 let (quot, rem) = if let Ok(res) =
1474 $div_rem(&a_u64s[..].try_into().unwrap(), &b_u64s[..].try_into().unwrap()) {
1477 let mut quot_bytes = Vec::with_capacity(input.len() / 2);
1479 quot_bytes.extend_from_slice(&i.to_be_bytes());
1481 let mut rem_bytes = Vec::with_capacity(input.len() / 2);
1483 rem_bytes.extend_from_slice(&i.to_be_bytes());
1485 let (quoti, remi) = ibig::ops::DivRem::div_rem(ai.clone(), &bi);
1486 assert_eq!(ibig::UBig::from_be_bytes("_bytes), quoti);
1487 assert_eq!(ibig::UBig::from_be_bytes(&rem_bytes), remi);
1489 if ai != ibig::UBig::from(0u32) { // ibig provides a spurious modular inverse for 0
1490 let ring = ibig::modular::ModuloRing::new(&bi);
1491 let ar = ring.from(ai.clone());
1492 let invi = ar.inverse().map(|i| i.residue());
1494 if let Ok(modinv) = $mod_inv(&a_u64s[..].try_into().unwrap(), &b_u64s[..].try_into().unwrap()) {
1495 let mut modinv_bytes = Vec::with_capacity(input.len() / 2);
1497 modinv_bytes.extend_from_slice(&i.to_be_bytes());
1499 assert_eq!(invi.unwrap(), ibig::UBig::from_be_bytes(&modinv_bytes));
1501 assert!(invi.is_none());
1506 macro_rules! test_mod { ($amodp: expr, $bmodp: expr, $PRIME: expr, $len: expr, $into: ident, $div_rem_double: ident, $div_rem: ident, $mul: ident, $add: ident, $sub: ident) => {
1507 // Test the U256/U384Mod wrapper, which operates in Montgomery representation
1508 let mut p_extended = [0; $len * 2];
1509 p_extended[$len..].copy_from_slice(&$PRIME);
1511 let amodp_squared = $div_rem_double(&$mul(&a_u64s, &a_u64s), &p_extended).unwrap().1;
1512 assert_eq!(&amodp_squared[..$len], &[0; $len]);
1513 assert_eq!(&$amodp.square().$into().0, &amodp_squared[$len..]);
1515 let abmodp = $div_rem_double(&$mul(&a_u64s, &b_u64s), &p_extended).unwrap().1;
1516 assert_eq!(&abmodp[..$len], &[0; $len]);
1517 assert_eq!(&$amodp.mul(&$bmodp).$into().0, &abmodp[$len..]);
1519 let (aplusb, aplusb_overflow) = $add(&a_u64s, &b_u64s);
1520 let mut aplusb_extended = [0; $len * 2];
1521 aplusb_extended[$len..].copy_from_slice(&aplusb);
1522 if aplusb_overflow { aplusb_extended[$len - 1] = 1; }
1523 let aplusbmodp = $div_rem_double(&aplusb_extended, &p_extended).unwrap().1;
1524 assert_eq!(&aplusbmodp[..$len], &[0; $len]);
1525 assert_eq!(&$amodp.add(&$bmodp).$into().0, &aplusbmodp[$len..]);
1527 let (mut aminusb, aminusb_underflow) = $sub(&a_u64s, &b_u64s);
1528 if aminusb_underflow {
1530 (aminusb, overflow) = $add(&aminusb, &$PRIME);
1532 (aminusb, overflow) = $add(&aminusb, &$PRIME);
1536 let aminusbmodp = $div_rem(&aminusb, &$PRIME).unwrap().1;
1537 assert_eq!(&$amodp.sub(&$bmodp).$into().0, &aminusbmodp);
1540 if a_u64s.len() == 2 {
1541 test!(mul_2, sqr_2, add_2, sub_2, div_rem_2, mod_inv_2);
1542 } else if a_u64s.len() == 4 {
1543 test!(mul_4, sqr_4, add_4, sub_4, div_rem_4, mod_inv_4);
1544 let amodp = U256Mod::<fuzz_moduli::P256>::from_u256(U256(a_u64s[..].try_into().unwrap()));
1545 let bmodp = U256Mod::<fuzz_moduli::P256>::from_u256(U256(b_u64s[..].try_into().unwrap()));
1546 test_mod!(amodp, bmodp, fuzz_moduli::P256::PRIME.0, 4, into_u256, div_rem_8, div_rem_4, mul_4, add_4, sub_4);
1547 } else if a_u64s.len() == 6 {
1548 test!(mul_6, sqr_6, add_6, sub_6, div_rem_6, mod_inv_6);
1549 let amodp = U384Mod::<fuzz_moduli::P384>::from_u384(U384(a_u64s[..].try_into().unwrap()));
1550 let bmodp = U384Mod::<fuzz_moduli::P384>::from_u384(U384(b_u64s[..].try_into().unwrap()));
1551 test_mod!(amodp, bmodp, fuzz_moduli::P384::PRIME.0, 6, into_u384, div_rem_12, div_rem_6, mul_6, add_6, sub_6);
1552 } else if a_u64s.len() == 8 {
1553 test!(mul_8, sqr_8, add_8, sub_8, div_rem_8, mod_inv_8);
1554 } else if input.len() == 512*2 + 4 {
1555 let mut e_bytes = [0; 4];
1556 e_bytes.copy_from_slice(&input[512 * 2..512 * 2 + 4]);
1557 let e = u32::from_le_bytes(e_bytes);
1558 let a = U4096::from_be_bytes(&a).unwrap();
1559 let b = U4096::from_be_bytes(&b).unwrap();
1561 let res = if let Ok(r) = a.expmod_odd_mod(e, &b) { r } else { return };
1562 let mut res_bytes = Vec::with_capacity(512);
1564 res_bytes.extend_from_slice(&i.to_be_bytes());
1567 let ring = ibig::modular::ModuloRing::new(&bi);
1568 let ar = ring.from(ai.clone());
1569 assert_eq!(ar.pow(&e.into()).residue(), ibig::UBig::from_be_bytes(&res_bytes));
1578 fn mul_min_simple_tests() {
1581 let res = mul_2(&a, &b);
1582 assert_eq!(res, [0, 3, 10, 8]);
1584 let a = [0x1bad_cafe_dead_beef, 2424];
1585 let b = [0x2bad_beef_dead_cafe, 4242];
1586 let res = mul_2(&a, &b);
1587 assert_eq!(res, [340296855556511776, 15015369169016130186, 4248480538569992542, 10282608]);
1589 let a = [0xf6d9_f8eb_8b60_7a6d, 0x4b93_833e_2194_fc2e];
1590 let b = [0xfdab_0000_6952_8ab4, 0xd302_0000_8282_0000];
1591 let res = mul_2(&a, &b);
1592 assert_eq!(res, [17625486516939878681, 18390748118453258282, 2695286104209847530, 1510594524414214144]);
1594 let a = [0x8b8b_8b8b_8b8b_8b8b, 0x8b8b_8b8b_8b8b_8b8b];
1595 let b = [0x8b8b_8b8b_8b8b_8b8b, 0x8b8b_8b8b_8b8b_8b8b];
1596 let res = mul_2(&a, &b);
1597 assert_eq!(res, [5481115605507762349, 8230042173354675923, 16737530186064798, 15714555036048702841]);
1599 let a = [0x0000_0000_0000_0020, 0x002d_362c_005b_7753];
1600 let b = [0x0900_0000_0030_0003, 0xb708_00fe_0000_00cd];
1601 let res = mul_2(&a, &b);
1602 assert_eq!(res, [1, 2306290405521702946, 17647397529888728169, 10271802099389861239]);
1604 let a = [0x0000_0000_7fff_ffff, 0xffff_ffff_0000_0000];
1605 let b = [0x0000_0800_0000_0000, 0x0000_1000_0000_00e1];
1606 let res = mul_2(&a, &b);
1607 assert_eq!(res, [1024, 0, 483183816703, 18446743107341910016]);
1609 let a = [0xf6d9_f8eb_ebeb_eb6d, 0x4b93_83a0_bb35_0680];
1610 let b = [0xfd02_b9b9_b9b9_b9b9, 0xb9b9_b9b9_b9b9_b9b9];
1611 let res = mul_2(&a, &b);
1612 assert_eq!(res, [17579814114991930107, 15033987447865175985, 488855932380801351, 5453318140933190272]);
1614 let a = [u64::MAX; 2];
1615 let b = [u64::MAX; 2];
1616 let res = mul_2(&a, &b);
1617 assert_eq!(res, [18446744073709551615, 18446744073709551614, 0, 1]);
1621 fn add_simple_tests() {
1622 let a = [u64::MAX; 2];
1623 let b = [u64::MAX; 2];
1624 assert_eq!(add_2(&a, &b), ([18446744073709551615, 18446744073709551614], true));
1626 let a = [0x1bad_cafe_dead_beef, 2424];
1627 let b = [0x2bad_beef_dead_cafe, 4242];
1628 assert_eq!(add_2(&a, &b), ([5141855058045667821, 6666], false));
1632 fn mul_4_simple_tests() {
1635 assert_eq!(mul_4(&a, &b),
1636 [0, 2, 4, 6, 8, 6, 4, 2]);
1638 let a = [0x1bad_cafe_dead_beef, 2424, 0x1bad_cafe_dead_beef, 2424];
1639 let b = [0x2bad_beef_dead_cafe, 4242, 0x2bad_beef_dead_cafe, 4242];
1640 assert_eq!(mul_4(&a, &b),
1641 [340296855556511776, 15015369169016130186, 4929074249683016095, 11583994264332991364,
1642 8837257932696496860, 15015369169036695402, 4248480538569992542, 10282608]);
1644 let a = [u64::MAX; 4];
1645 let b = [u64::MAX; 4];
1646 assert_eq!(mul_4(&a, &b),
1647 [18446744073709551615, 18446744073709551615, 18446744073709551615,
1648 18446744073709551614, 0, 0, 0, 1]);
1652 fn double_simple_tests() {
1653 let mut a = [0xfff5_b32d_01ff_0000, 0x00e7_e7e7_e7e7_e7e7];
1654 assert!(double!(a));
1655 assert_eq!(a, [18440945635998695424, 130551405668716494]);
1657 let mut a = [u64::MAX, u64::MAX];
1658 assert!(double!(a));
1659 assert_eq!(a, [18446744073709551615, 18446744073709551614]);