From 2cfd6b702473093b167aaa3075e71a36f3bc4eef Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Sat, 28 Aug 2021 14:59:13 +0200 Subject: [PATCH] big: Add `_private_int_mul_high`. --- core/math/big/build.bat | 4 +- core/math/big/private.odin | 123 +++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/core/math/big/build.bat b/core/math/big/build.bat index 31480bcc8..e6049b2e1 100644 --- a/core/math/big/build.bat +++ b/core/math/big/build.bat @@ -1,9 +1,9 @@ @echo off -:odin run . -vet +odin run . -vet set TEST_ARGS=-fast-tests :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% -odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% +:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% :odin build . -build-mode:shared -show-timings -o:size -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% :odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% :odin build . -build-mode:shared -show-timings -o:speed -define:MATH_BIG_EXE=false && python test.py -fast-tests %TEST_ARGS% \ No newline at end of file diff --git a/core/math/big/private.odin b/core/math/big/private.odin index d4cced10a..5fda9f2b9 100644 --- a/core/math/big/private.odin +++ b/core/math/big/private.odin @@ -426,6 +426,129 @@ _private_int_mul_comba :: proc(dest, a, b: ^Int, digits: int, allocator := conte return internal_clamp(dest); } +/* + Multiplies |a| * |b| and does not compute the lower digs digits + [meant to get the higher part of the product] +*/ +_private_int_mul_high :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Can we use the fast multiplier? + */ + if a.used + b.used + 1 < _WARRAY && min(a.used, b.used) < _MAX_COMBA { + return _private_int_mul_high_comba(dest, a, b, digits); + } + + internal_grow(dest, a.used + b.used + 1) or_return; + dest.used = a.used + b.used + 1; + + pa := a.used; + pb := b.used; + for ix := 0; ix < pa; ix += 1 { + carry := DIGIT(0); + + for iy := digits - ix; iy < pb; iy += 1 { + /* + Calculate the double precision result. + */ + r := _WORD(dest.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + _WORD(carry); + + /* + Get the lower part. + */ + dest.digit[ix + iy] = DIGIT(r & _WORD(_MASK)); + + /* + Carry the carry. + */ + carry = DIGIT(r >> _WORD(_DIGIT_BITS)); + } + dest.digit[ix + pb] = carry; + } + return internal_clamp(dest); +} + +/* + This is a modified version of `_private_int_mul_comba` that only produces output digits *above* `digits`. + See the comments for `_private_int_mul_comba` to see how it works. + + This is used in the Barrett reduction since for one of the multiplications + only the higher digits were needed. This essentially halves the work. + + Based on Algorithm 14.12 on pp.595 of HAC. +*/ +_private_int_mul_high_comba :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + W: [_WARRAY]DIGIT = ---; + _W: _WORD = 0; + + /* + Number of output digits to produce. Grow the destination as required. + */ + pa := a.used + b.used; + internal_grow(dest, pa) or_return; + + ix: int; + for ix = digits; ix < pa; ix += 1 { + /* + Get offsets into the two bignums. + */ + ty := min(b.used - 1, ix); + tx := ix - ty; + + /* + This is the number of times the loop will iterrate, essentially it's + while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy := min(a.used - tx, ty + 1); + + /* + Execute loop. + */ + for iz := 0; iz < iy; iz += 1 { + _W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]); + } + + /* + Store term. + */ + W[ix] = DIGIT(_W) & DIGIT(_MASK); + + /* + Make next carry. + */ + _W = _W >> _WORD(_DIGIT_BITS); + } + + /* + Setup dest + */ + old_used := dest.used; + dest.used = pa; + + for ix = digits; ix < pa; ix += 1 { + /* + Now extract the previous digit [below the carry]. + */ + dest.digit[ix] = W[ix]; + } + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + + /* + Adjust dest.used based on leading zeroes. + */ + return internal_clamp(dest); +} + + + + /* Low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 Assumes `dest` and `src` to not be `nil`, and `src` to have been initialized.