From 893cc013b5bafd8fda51f17c0e6bec6fb35f0e15 Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 21 Aug 2021 14:00:31 +0200 Subject: [PATCH] big: Add Montgomery reduction. --- core/math/big/prime.odin | 74 +++++++++++++++++++++++ core/math/big/private.odin | 119 +++++++++++++++++++++++++++++++++++++ 2 files changed, 193 insertions(+) diff --git a/core/math/big/prime.odin b/core/math/big/prime.odin index 7d33a4068..1d3a2980e 100644 --- a/core/math/big/prime.odin +++ b/core/math/big/prime.odin @@ -33,6 +33,80 @@ int_prime_is_divisible :: proc(a: ^Int, allocator := context.allocator) -> (res: return false, nil; } + +/* + Shifts with subtractions when the result is greater than b. + + The method is slightly modified to shift B unconditionally upto just under + the leading bit of b. This saves alot of multiple precision shifting. +*/ +/* +internal_int_montgomery_calc_normalization :: proc(a, b: ^Int) -> (err: Error) { + + int x, bits; + mp_err err; + + /* how many bits of last digit does b use */ + bits = mp_count_bits(b) % MP_DIGIT_BIT; + + if (b->used > 1) { + if ((err = mp_2expt(a, ((b->used - 1) * MP_DIGIT_BIT) + bits - 1)) != MP_OKAY) { + return err; + } + } else { + mp_set(a, 1uL); + bits = 1; + } + + /* now compute C = A * B mod b */ + for (x = bits - 1; x < (int)MP_DIGIT_BIT; x++) { + if ((err = mp_mul_2(a, a)) != MP_OKAY) { + return err; + } + if (mp_cmp_mag(a, b) != MP_LT) { + if ((err = s_mp_sub(a, b, a)) != MP_OKAY) { + return err; + } + } + } + + return nil; +} +*/ + +/* + Sets up the Montgomery reduction stuff. +*/ +internal_int_montgomery_setup :: proc(n: ^Int) -> (rho: DIGIT, err: Error) { + /* + Fast inversion mod 2**k + Based on the fact that: + + XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) + => 2*X*A - X*X*A*A = 1 + => 2*(1) - (1) = 1 + */ + b := n.digit[0]; + if b & 1 == 0 { return 0, .Invalid_Argument; } + + x := (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ + x *= 2 - (b * x); /* here x*a==1 mod 2**8 */ + x *= 2 - (b * x); /* here x*a==1 mod 2**16 */ + when _WORD_TYPE_BITS == 64 { + x *= 2 - (b * x); /* here x*a==1 mod 2**32 */ + x *= 2 - (b * x); /* here x*a==1 mod 2**64 */ + } + + /* + rho = -1/m mod b + */ + rho = DIGIT(((_WORD(1) << _WORD(_DIGIT_BITS)) - _WORD(x)) & _WORD(_MASK)); + return rho, nil; +} + +/* + Returns the number of Rabin-Miller trials needed for a given bit size. +*/ number_of_rabin_miller_trials :: proc(bit_size: int) -> (number_of_trials: int) { switch { case bit_size <= 80: diff --git a/core/math/big/private.odin b/core/math/big/private.odin index ca2d016de..89bd402f1 100644 --- a/core/math/big/private.odin +++ b/core/math/big/private.odin @@ -1542,6 +1542,125 @@ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) - +/* + Computes xR**-1 == x (mod N) via Montgomery Reduction. + This is an optimized implementation of `internal_montgomery_reduce` + which uses the comba method to quickly calculate the columns of the reduction. + Based on Algorithm 14.32 on pp.601 of HAC. +*/ +_private_montgomery_reduce_comba :: proc(x, n: ^Int, rho: DIGIT) -> (err: Error) { + W: [_WARRAY]_WORD = ---; + + if x.used > _WARRAY { return .Invalid_Argument; } + + /* + Get old used count. + */ + old_used := x.used; + + /* + Grow `x` as required. + */ + internal_grow(x, n.used + 1) or_return; + + /* + First we have to get the digits of the input into an array of double precision words W[...] + Copy the digits of `x` into W[0..`x.used` - 1] + */ + ix: int; + for ix = 0; ix < x.used; ix += 1 { + W[ix] = _WORD(x.digit[ix]); + } + + /* + Zero the high words of W[a->used..m->used*2]. + */ + zero_upper := (n.used * 2) + 1; + if ix < zero_upper { + for ix = x.used; ix < zero_upper; ix += 1 { + W[ix] = {}; + } + } + + /* + Now we proceed to zero successive digits from the least significant upwards. + */ + for ix = 0; ix < n.used; ix += 1 { + /* + `mu = ai * m' mod b` + + We avoid a double precision multiplication (which isn't required) + by casting the value down to a DIGIT. Note this requires + that W[ix-1] have the carry cleared (see after the inner loop) + */ + mu := ((W[ix] & _WORD(_MASK)) * _WORD(rho)) & _WORD(_MASK); + + /* + `a = a + mu * m * b**i` + + This is computed in place and on the fly. The multiplication + by b**i is handled by offseting which columns the results + are added to. + + Note the comba method normally doesn't handle carries in the + inner loop In this case we fix the carry from the previous + column since the Montgomery reduction requires digits of the + result (so far) [see above] to work. + + This is handled by fixing up one carry after the inner loop. + The carry fixups are done in order so after these loops the + first m->used words of W[] have the carries fixed. + */ + for iy := 0; iy < n.used; iy += 1 { + W[ix + iy] += mu * _WORD(n.digit[iy]); + } + + /* + Now fix carry for next digit, W[ix+1]. + */ + W[ix + 1] += (W[ix] >> _DIGIT_BITS); + } + + /* + Now we have to propagate the carries and shift the words downward + [all those least significant digits we zeroed]. + */ + + for ; ix < n.used * 2; ix += 1 { + W[ix + 1] += (W[ix] >> _DIGIT_BITS); + } + + /* copy out, A = A/b**n + * + * The result is A/b**n but instead of converting from an + * array of mp_word to mp_digit than calling mp_rshd + * we just copy them in the right order + */ + + for ix = 0; ix < (n.used + 1); ix += 1 { + x.digit[ix] = DIGIT(W[n.used + ix] & _WORD(_MASK)); + } + + /* + Set the max used. + */ + x.used = n.used + 1; + + /* + Zero old_used digits, if the input a was larger than m->used+1 we'll have to clear the digits. + */ + internal_zero_unused(x, old_used); + internal_clamp(x); + + /* + if A >= m then A = A - m + */ + if internal_cmp_mag(x, n) != -1 { + return internal_sub(x, x, n); + } + return nil; +} + /* hac 14.61, pp608 */