From dc02566a84f0ad98aaa4f88f0a4e4abdd3aa1f1a Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 13 Aug 2021 23:45:00 +0200 Subject: [PATCH] big: Add `_private_int_div_recursive`. --- core/math/big/example.odin | 15 +-- core/math/big/internal.odin | 16 ++- core/math/big/private.odin | 190 +++++++++++++++++++++++++++++++++++- 3 files changed, 209 insertions(+), 12 deletions(-) diff --git a/core/math/big/example.odin b/core/math/big/example.odin index ff4d71059..986580f7f 100644 --- a/core/math/big/example.odin +++ b/core/math/big/example.odin @@ -206,13 +206,16 @@ demo :: proc() { a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; defer destroy(a, b, c, d, e, f); - { - SCOPED_TIMING(.rm_trials); - for bits in 0..10242 { - _ = number_of_rabin_miller_trials(bits); - } + power_of_two(a, 14_500); + print("a: ", a); + + power_of_two(b, 10_500); + + if err := internal_int_divmod(c, d, a, b); err != nil { + fmt.printf("Error: %v\n", err); } - SCOPED_COUNT_ADD(.rm_trials, 10242); + print("c: ", c); + print("d: ", d); } main :: proc() { diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin index 6610c8429..506c5b683 100644 --- a/core/math/big/internal.odin +++ b/core/math/big/internal.odin @@ -260,6 +260,12 @@ internal_int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context } internal_add :: proc { internal_int_add_signed, internal_int_add_digit, }; + +internal_int_incr :: proc(dest: ^Int, allocator := context.allocator) -> (err: Error) { + return #force_inline internal_add(dest, dest, 1); +} +internal_incr :: proc { internal_int_incr, }; + /* Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|. Handbook of Applied Cryptography, algorithm 14.9. @@ -458,6 +464,11 @@ internal_int_sub_digit :: proc(dest, number: ^Int, digit: DIGIT, allocator := co internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, }; +internal_int_decr :: proc(dest: ^Int, allocator := context.allocator) -> (err: Error) { + return #force_inline internal_sub(dest, dest, 1); +} +internal_decr :: proc { internal_int_decr, }; + /* dest = src / 2 dest = src >> 1 @@ -718,8 +729,9 @@ internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, a return nil; } - if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) { - // err = _int_div_recursive(quotient, remainder, numerator, denominator); + if (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) { + err = _private_int_div_recursive(quotient, remainder, numerator, denominator); + // err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator); } else { when true { err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator); diff --git a/core/math/big/private.odin b/core/math/big/private.odin index 20e9c2819..6094e6baf 100644 --- a/core/math/big/private.odin +++ b/core/math/big/private.odin @@ -430,7 +430,7 @@ _private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) - context.allocator = allocator; S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{}; - defer destroy(S0, a0, a1, a2); + defer internal_destroy(S0, a0, a1, a2); /* Init temps. @@ -752,6 +752,188 @@ _private_int_div_school :: proc(quotient, remainder, numerator, denominator: ^In return nil; } +/* + Direct implementation of algorithms 1.8 "RecursiveDivRem" and 1.9 "UnbalancedDivision" from: + + Brent, Richard P., and Paul Zimmermann. "Modern computer arithmetic" + Vol. 18. Cambridge University Press, 2010 + Available online at https://arxiv.org/pdf/1004.4710 + + pages 19ff. in the above online document. +*/ +_private_div_recursion :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + A1, A2, B1, B0, Q1, Q0, R1, R0, t := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(A1, A2, B1, B0, Q1, Q0, R1, R0, t); + + m := a.used - b.used; + k := m / 2; + + if m < MUL_KARATSUBA_CUTOFF { return _private_int_div_school(quotient, remainder, a, b); } + + if err = internal_init_multi(A1, A2, B1, B0, Q1, Q0, R1, R0, t); err != nil { return err; } + + /* + `B1` = `b` / `beta`^`k`, `B0` = `b` % `beta`^`k` + */ + if err = internal_shrmod(B1, B0, b, k * _DIGIT_BITS); err != nil { return err; } + + /* + (Q1, R1) = RecursiveDivRem(A / beta^(2k), B1) + */ + if err = internal_shrmod(A1, t, a, 2 * k * _DIGIT_BITS); err != nil { return err; } + if err = _private_div_recursion(Q1, R1, A1, B1); err != nil { return err; } + + /* + A1 = (R1 * beta^(2k)) + (A % beta^(2k)) - (Q1 * B0 * beta^k) + */ + if err = internal_shl_digit(R1, 2 * k); err != nil { return err; } + if err = internal_add(A1, R1, t); err != nil { return err; } + if err = internal_mul(t, Q1, B0); err != nil { return err; } + + /* + While A1 < 0 do Q1 = Q1 - 1, A1 = A1 + (beta^k * B) + */ + if internal_cmp(A1, 0) == -1 { + if internal_shl(t, b, k * _DIGIT_BITS); err != nil { return err; } + + for { + if err = internal_decr(Q1); err != nil { return err; } + if err = internal_add(A1, A1, t); err != nil { return err; } + if internal_cmp(A1, 0) != -1 { break; } + } + } + + /* + (Q0, R0) = RecursiveDivRem(A1 / beta^(k), B1) + */ + if internal_shrmod(A1, t, A1, k * _DIGIT_BITS); err != nil { return err; } + if _private_div_recursion(Q0, R0, A1, B1); err != nil { return err; } + + /* + A2 = (R0*beta^k) + (A1 % beta^k) - (Q0*B0) + */ + if err = internal_shl_digit(R0, k); err != nil { return err; } + if err = internal_add(A2, R0, t); err != nil { return err; } + if err = internal_mul(t, Q0, B0); err != nil { return err; } + if err = internal_sub(A2, A2, t); err != nil { return err; } + + /* + While A2 < 0 do Q0 = Q0 - 1, A2 = A2 + B. + */ + for internal_cmp(A2, 0) == -1 { + if err = internal_decr(Q0); err != nil { return err; } + if err = internal_add(A2, A2, b); err != nil { return err; } + } + + /* + Return q = (Q1*beta^k) + Q0, r = A2. + */ + if err = internal_shl_digit(Q1, k); err != nil { return err; } + if err = internal_add(quotient, Q1, Q0); err != nil { return err; } + + return internal_copy(remainder, A2); +} + +_private_int_div_recursive :: proc(quotient, remainder, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + A, B, Q, Q1, R, A_div, A_mod := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(A, B, Q, Q1, R, A_div, A_mod); + + if err = internal_init_multi(A, B, Q, Q1, R, A_div, A_mod); err != nil { return err; } + + /* + Most significant bit of a limb. + Assumes _DIGIT_MAX < (sizeof(DIGIT) * sizeof(u8)). + */ + msb := (_DIGIT_MAX + DIGIT(1)) >> 1; + sigma := 0; + msb_b := b.digit[b.used - 1]; + for msb_b < msb { + sigma += 1; + msb_b <<= 1; + } + + /* + Use that sigma to normalize B. + */ + if err = internal_shl(B, b, sigma); err != nil { return err; } + if err = internal_shl(A, a, sigma); err != nil { return err; } + + /* + Fix the sign. + */ + neg := a.sign != b.sign; + A.sign = .Zero_or_Positive; B.sign = .Zero_or_Positive; + + /* + If the magnitude of "A" is not more more than twice that of "B" we can work + on them directly, otherwise we need to work at "A" in chunks. + */ + n := B.used; + m := A.used - B.used; + + /* + Q = 0. We already ensured that when we called `internal_init_multi`. + */ + for m > n { + /* + (q, r) = RecursiveDivRem(A / (beta^(m-n)), B) + */ + j := (m - n) * _DIGIT_BITS; + if err = internal_shrmod(A_div, A_mod, A, j); err != nil { return err; } + if err = _private_div_recursion(Q1, R, A_div, B); err != nil { return err; } + + /* + Q = (Q*beta!(n)) + q + */ + if err = internal_shl(Q, Q, n * _DIGIT_BITS); err != nil { return err; } + if err = internal_add(Q, Q, Q1); err != nil { return err; } + + /* + A = (r * beta^(m-n)) + (A % beta^(m-n)) + */ + if err = internal_shl(R, R, (m - n) * _DIGIT_BITS); err != nil { return err; } + if err = internal_add(A, R, A_mod); err != nil { return err; } + + /* + m = m - n + */ + m -= n; + } + + /* + (q, r) = RecursiveDivRem(A, B) + */ + if err = _private_div_recursion(Q1, R, A, B); err != nil { return err; } + + /* + Q = (Q * beta^m) + q, R = r + */ + if err = internal_shl(Q, Q, m * _DIGIT_BITS); err != nil { return err; } + if err = internal_add(Q, Q, Q1); err != nil { return err; } + + /* + Get sign before writing to dest. + */ + R.sign = .Zero_or_Positive if internal_is_zero(Q) else a.sign; + + if quotient != nil { + swap(quotient, Q); + quotient.sign = .Negative if neg else .Zero_or_Positive; + } + if remainder != nil { + /* + De-normalize the remainder. + */ + if err = internal_shrmod(R, nil, R, sigma); err != nil { return err; } + swap(remainder, R); + } + return nil; +} + /* Slower bit-bang division... also smaller. */ @@ -1040,7 +1222,7 @@ _private_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context. */ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) { bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; - defer destroy(bracket_low, bracket_high, bracket_mid, t, bi_base); + defer internal_destroy(bracket_low, bracket_high, bracket_mid, t, bi_base); ic := #force_inline internal_cmp(a, base); if ic == -1 || ic == 0 { @@ -1107,7 +1289,7 @@ _private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) - _private_inverse_modulo :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { context.allocator = allocator; x, y, u, v, A, B, C, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; - defer destroy(x, y, u, v, A, B, C, D); + defer internal_destroy(x, y, u, v, A, B, C, D); /* `b` cannot be negative. @@ -1254,7 +1436,7 @@ _private_inverse_modulo :: proc(dest, a, b: ^Int, allocator := context.allocator _private_inverse_modulo_odd :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { context.allocator = allocator; x, y, u, v, B, D := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; - defer destroy(x, y, u, v, B, D); + defer internal_destroy(x, y, u, v, B, D); sign: Sign;