diff --git a/core/math/big/build.bat b/core/math/big/build.bat index 4a6aeeb3e..43ece1054 100644 --- a/core/math/big/build.bat +++ b/core/math/big/build.bat @@ -1,10 +1,10 @@ @echo off -:odin run . -vet +odin run . -vet -define:MATH_BIG_USE_FROBENIUS_TEST=true set TEST_ARGS=-fast-tests :set TEST_ARGS= :odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% -odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% +:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% :odin build . -build-mode:shared -show-timings -o:size -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% :odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check -define:MATH_BIG_EXE=false && python test.py %TEST_ARGS% :odin build . -build-mode:shared -show-timings -o:speed -define:MATH_BIG_EXE=false && python test.py -fast-tests %TEST_ARGS% \ No newline at end of file diff --git a/core/math/big/common.odin b/core/math/big/common.odin index 4171d25f3..4d8224cd6 100644 --- a/core/math/big/common.odin +++ b/core/math/big/common.odin @@ -75,6 +75,17 @@ FACTORIAL_MAX_N := 1_000_000; FACTORIAL_BINARY_SPLIT_CUTOFF := 6100; FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100; +/* + `internal_int_is_prime` switchables. + + Use Frobenius-Underwood for primality testing, or use Lucas-Selfridge (default). +*/ +MATH_BIG_USE_FROBENIUS_TEST :: #config(MATH_BIG_USE_FROBENIUS_TEST, false); + +/* + Runtime tunable to use Miller-Rabin primality testing only and skip the above. +*/ +USE_MILLER_RABIN_ONLY := false; /* We don't allow these to be switched at runtime for two reasons: diff --git a/core/math/big/example.odin b/core/math/big/example.odin index 4da2ebbe9..fb1e51053 100644 --- a/core/math/big/example.odin +++ b/core/math/big/example.odin @@ -26,6 +26,7 @@ Configuration: _WARRAY %v _TAB_SIZE %v _MAX_WIN_SIZE %v + MATH_BIG_USE_FROBENIUS_TEST %v Runtime tunable: MUL_KARATSUBA_CUTOFF %v SQR_KARATSUBA_CUTOFF %v @@ -35,6 +36,7 @@ Runtime tunable: FACTORIAL_MAX_N %v FACTORIAL_BINARY_SPLIT_CUTOFF %v FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v + USE_MILLER_RABIN_ONLY %v `, _DIGIT_BITS, _LOW_MEMORY, @@ -45,6 +47,8 @@ _MAX_COMBA, _WARRAY, _TAB_SIZE, _MAX_WIN_SIZE, +MATH_BIG_USE_FROBENIUS_TEST, + MUL_KARATSUBA_CUTOFF, SQR_KARATSUBA_CUTOFF, MUL_TOOM_CUTOFF, @@ -53,6 +57,7 @@ MAX_ITERATIONS_ROOT_N, FACTORIAL_MAX_N, FACTORIAL_BINARY_SPLIT_CUTOFF, FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS, +USE_MILLER_RABIN_ONLY, ); } @@ -79,11 +84,24 @@ print :: proc(name: string, a: ^Int, base := i8(10), print_name := true, newline } } -// printf :: fmt.printf; +printf :: fmt.printf; demo :: proc() { a, b, c, d, e, f, res := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; defer destroy(a, b, c, d, e, f, res); + + err: Error; + prime: bool; + + set(a, "3317044064679887385961981"); // Composite: 1287836182261 × 2575672364521 + trials := number_of_rabin_miller_trials(internal_count_bits(a)); + { + SCOPED_TIMING(.is_prime); + prime, err = internal_int_is_prime(a, trials); + } + print("Candidate prime: ", a); + fmt.printf("%v Miller-Rabin trials needed.\n", trials); + fmt.printf("Is prime: %v, Error: %v\n", prime, err); } main :: proc() { diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin index 72ff1fe76..6ae2f4284 100644 --- a/core/math/big/internal.odin +++ b/core/math/big/internal.odin @@ -1871,7 +1871,7 @@ internal_int_set_from_integer :: proc(dest: ^Int, src: $T, minimize := false, al return nil; } -internal_set :: proc { internal_int_set_from_integer, internal_int_copy }; +internal_set :: proc { internal_int_set_from_integer, internal_int_copy, int_atoi }; internal_copy_digits :: #force_inline proc(dest, src: ^Int, digits: int, offset := int(0)) -> (err: Error) { #force_inline internal_error_if_immutable(dest) or_return; @@ -2019,8 +2019,18 @@ internal_invmod :: proc{ internal_int_inverse_modulo, }; /* Helpers to extract values from the `Int`. */ +internal_int_bitfield_extract_bool :: proc(a: ^Int, offset: int) -> (val: bool, err: Error) { + limb := offset / _DIGIT_BITS; + if limb < 0 || limb >= a.used { return false, .Invalid_Argument; } + i := _WORD(1 << _WORD((offset % _DIGIT_BITS))); + return bool(_WORD(a.digit[limb]) & i), nil; +} + internal_int_bitfield_extract_single :: proc(a: ^Int, offset: int) -> (bit: _WORD, err: Error) { - return #force_inline int_bitfield_extract(a, offset, 1); + limb := offset / _DIGIT_BITS; + if limb < 0 || limb >= a.used { return 0, .Invalid_Argument; } + i := _WORD(1 << _WORD((offset % _DIGIT_BITS))); + return 1 if ((_WORD(a.digit[limb]) & i) != 0) else 0, nil; } internal_int_bitfield_extract :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) #no_bounds_check { diff --git a/core/math/big/prime.odin b/core/math/big/prime.odin index d6626ffbf..48c72de0d 100644 --- a/core/math/big/prime.odin +++ b/core/math/big/prime.odin @@ -10,6 +10,8 @@ */ package math_big +import rnd "core:math/rand"; + /* Determines if an Integer is divisible by one of the _PRIME_TABLE primes. Returns true if it is, false if not. @@ -204,13 +206,404 @@ internal_int_kronecker :: proc(a, p: ^Int, allocator := context.allocator) -> (k return; } +/* + Miller-Rabin test of "a" to the base of "b" as described in HAC pp. 139 Algorithm 4.24. + + Sets result to `false` if definitely composite or `true` if probably prime. + Randomly the chance of error is no more than 1/4 and often very much lower. + + Assumes `a` and `b` not to be `nil` and to have been initialized. +*/ +internal_int_prime_miller_rabin :: proc(a, b: ^Int, allocator := context.allocator) -> (probably_prime: bool, err: Error) { + context.allocator = allocator; + + n1, y, r := &Int{}, &Int{}, &Int{}; + defer internal_destroy(n1, y, r); + + /* + Ensure `b` > 1. + */ + if internal_lte(b, 1) { return false, nil; } + + /* + Get `n1` = `a` - 1. + */ + internal_copy(n1, a) or_return; + internal_sub(n1, n1, 1) or_return; + + /* + Set `2`**`s` * `r` = `n1` + */ + internal_copy(r, n1) or_return; + + /* + Count the number of least significant bits which are zero. + */ + s := internal_count_lsb(r) or_return; + + /* + Now divide `n` - 1 by `2`**`s`. + */ + internal_shr(r, r, s) or_return; + + /* + Compute `y` = `b`**`r` mod `a`. + */ + internal_int_exponent_mod(y, b, r, a) or_return; + + /* + If `y` != 1 and `y` != `n1` do. + */ + if !internal_eq(y, 1) && !internal_eq(y, n1) { + j := 1; + + /* + While `j` <= `s` - 1 and `y` != `n1`. + */ + for j <= (s - 1) && !internal_eq(y, n1) { + internal_sqrmod(y, y, a) or_return; + + /* + If `y` == 1 then composite. + */ + if internal_eq(y, 1) { + return false, nil; + } + + j += 1; + } + + /* + If `y` != `n1` then composite. + */ + if !internal_eq(y, n1) { + return false, nil; + } + } + + /* + Probably prime now. + */ + return true, nil; +} + +/* + `a` is the big Int to test for primality. + + `miller_rabin_trials` can be one of the following: + `< 0`: For `a` up to 3_317_044_064_679_887_385_961_981, set `miller_rabin_trials` to negative to run a predetermined + number of trials for a deterministic answer. + `= 0`: Run Miller-Rabin with bases 2, 3 and one random base < `a`. Non-deterministic. + `> 0`: Run Miller-Rabin with bases 2, 3 and `miller_rabin_trials` number of random bases. Non-deterministic. + + `miller_rabin_only`: + `false` Also use either Frobenius-Underwood or Lucas-Selfridge, depending on the compile-time `MATH_BIG_USE_FROBENIUS_TEST` choice. + `true` Run Rabin-Miller trials but skip Frobenius-Underwood / Lucas-Selfridge. + + `r` takes a pointer to an instance of `core:math/rand`'s `Rand` and may be `nil` to use the global one. + + Returns `is_prime` (bool), where: + `false` Definitively composite. + `true` Probably prime if `miller_rabin_trials` >= 0, with increasing certainty with more trials. + Deterministically prime if `miller_rabin_trials` = 0 for `a` up to 3_317_044_064_679_887_385_961_981. + + Assumes `a` not to be `nil` and to have been initialized. +*/ +internal_int_is_prime :: proc(a: ^Int, miller_rabin_trials := int(-1), miller_rabin_only := USE_MILLER_RABIN_ONLY, r: ^rnd.Rand = nil, allocator := context.allocator) -> (is_prime: bool, err: Error) { + context.allocator = allocator; + miller_rabin_trials := miller_rabin_trials; + + // Default to `no`. + is_prime = false; + + b, res := &Int{}, &Int{}; + defer internal_destroy(b, res); + + // Some shortcuts + // `N` > 3 + if a.used == 1 { + if a.digit[0] == 0 || a.digit[0] == 1 { + return; + } + if a.digit[0] == 2 { + return true, nil; + } + } + + // `N` must be odd. + if internal_is_even(a) { + return; + } + + // `N` is not a perfect square: floor(sqrt(`N`))^2 != `N` + if internal_int_is_square(a) or_return { return; } + + // Is the input equal to one of the primes in the table? + for p in _private_prime_table { + if internal_eq(a, p) { + return true, nil; + } + } + + // First perform trial division + if internal_int_prime_is_divisible(a) or_return { return; } + + // Run the Miller-Rabin test with base 2 for the BPSW test. + internal_set(b, 2) or_return; + if !internal_int_prime_miller_rabin(a, b) or_return { return; } + + // Rumours have it that Mathematica does a second M-R test with base 3. + // Other rumours have it that their strong L-S test is slightly different. + // It does not hurt, though, beside a bit of extra runtime. + + b.digit[0] += 1; + if !internal_int_prime_miller_rabin(a, b) or_return { return; } + + // Both, the Frobenius-Underwood test and the the Lucas-Selfridge test are quite + // slow so if speed is an issue, set `USE_MILLER_RABIN_ONLY` to use M-R tests with + // bases 2, 3 and t random bases. + + if !miller_rabin_only { + if miller_rabin_trials >= 0 { + when MATH_BIG_USE_FROBENIUS_TEST { + if !internal_int_prime_frobenius_underwood(a) or_return { return; } + } else { +// if ((err = mp_prime_strong_lucas_selfridge(a, &res)) != MP_OKAY) { +// goto LBL_B; +// } +// if (!res) { +// goto LBL_B; +// } + } + } + } + + // Run at least one Miller-Rabin test with a random base. + // Don't replace this with `min`, because we try known deterministic bases + // for certain sized inputs when `miller_rabin_trials` is negative. + if miller_rabin_trials == 0 { + miller_rabin_trials = 1; + } + + // Only recommended if the input range is known to be < 3_317_044_064_679_887_385_961_981 + // It uses the bases necessary for a deterministic M-R test if the input is smaller than 3_317_044_064_679_887_385_961_981 + // The caller has to check the size. + // TODO: can be made a bit finer grained but comparing is not free. + + if miller_rabin_trials < 0 { + p_max := 0; + + // Sorenson, Jonathan; Webster, Jonathan (2015), "Strong Pseudoprimes to Twelve Prime Bases". + + // 0x437ae92817f9fc85b7e5 = 318_665_857_834_031_151_167_461 + atoi(b, "437ae92817f9fc85b7e5", 16) or_return; + if internal_lt(a, b) { + p_max = 12; + } else { + /* 0x2be6951adc5b22410a5fd = 3_317_044_064_679_887_385_961_981 */ + atoi(b, "2be6951adc5b22410a5fd", 16) or_return; + if internal_lt(a, b) { + p_max = 13; + } else { + return false, .Invalid_Argument; + } + } + + // We did bases 2 and 3 already, skip them + for ix := 2; ix < p_max; ix += 1 { + internal_set(b, _private_prime_table[ix]); + if !internal_int_prime_miller_rabin(a, b) or_return { return; } + } + } else if miller_rabin_trials > 0 { + // Perform `miller_rabin_trials` M-R tests with random bases between 3 and "a". + // See Fips 186.4 p. 126ff + + // The DIGITs have a defined bit-size but the size of a.digit is a simple 'int', + // the size of which can depend on the platform. + size_a := internal_count_bits(a); + mask := (1 << uint(ilog2(size_a))) - 1; + + /* + Assuming the General Rieman hypothesis (never thought to write that in a + comment) the upper bound can be lowered to 2*(log a)^2. + E. Bach, "Explicit bounds for primality testing and related problems," + Math. Comp. 55 (1990), 355-380. + + size_a = (size_a/10) * 7; + len = 2 * (size_a * size_a); + + E.g.: a number of size 2^2048 would be reduced to the upper limit + + floor(2048/10)*7 = 1428 + 2 * 1428^2 = 4078368 + + (would have been ~4030331.9962 with floats and natural log instead) + That number is smaller than 2^28, the default bit-size of DIGIT on 32-bit platforms. + */ + + /* + How many tests, you might ask? Dana Jacobsen of Math::Prime::Util fame + does exactly 1. In words: one. Look at the end of _GMP_is_prime() in + Math-Prime-Util-GMP-0.50/primality.c if you do not believe it. + + The function rand() goes to some length to use a cryptographically + good PRNG. That also means that the chance to always get the same base + in the loop is non-zero, although very low. + -- NOTE(Jeroen): This is not yet true in Odin, but I have some ideas. + + If the BPSW test and/or the addtional Frobenious test have been + performed instead of just the Miller-Rabin test with the bases 2 and 3, + a single extra test should suffice, so such a very unlikely event will not do much harm. + + To preemptivly answer the dangling question: no, a witness does not need to be prime. + */ + for ix := 0; ix < miller_rabin_trials; ix += 1 { + + // rand() guarantees the first digit to be non-zero + internal_rand(b, _DIGIT_TYPE_BITS, r) or_return; + + // Reduce digit before casting because DIGIT might be bigger than + // an unsigned int and "mask" on the other side is most probably not. + l: int; + + fips_rand := (uint)(b.digit[0] & DIGIT(mask)); + if fips_rand > (uint)(max(int) - _DIGIT_BITS) { + l = max(int) / _DIGIT_BITS; + } else { + l = (int(fips_rand) + _DIGIT_BITS) / _DIGIT_BITS; + } + + // Unlikely. + if (l < 0) { + ix -= 1; + continue; + } + internal_rand(b, l) or_return; + + // That number might got too big and the witness has to be smaller than "a" + l = internal_count_bits(b); + if l >= size_a { + l = (l - size_a) + 1; + internal_shr(b, b, l) or_return; + } + + // Although the chance for b <= 3 is miniscule, try again. + if internal_lte(b, 3) { + ix -= 1; + continue; + } + if !internal_int_prime_miller_rabin(a, b) or_return { return; } + } + } + + // Passed the test. + return true, nil; +} + +/* + * floor of positive solution of (2^16) - 1 = (a + 4) * (2 * a + 5) + * TODO: Both values are smaller than N^(1/4), would have to use a bigint + * for `a` instead, but any `a` bigger than about 120 are already so rare that + * it is possible to ignore them and still get enough pseudoprimes. + * But it is still a restriction of the set of available pseudoprimes + * which makes this implementation less secure if used stand-alone. + */ +_FROBENIUS_UNDERWOOD_A :: 32764; + +internal_int_prime_frobenius_underwood :: proc(N: ^Int, allocator := context.allocator) -> (result: bool, err: Error) { + context.allocator = allocator; + + T1z, T2z, Np1z, sz, tz := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(T1z, T2z, Np1z, sz, tz); + + internal_init_multi(T1z, T2z, Np1z, sz, tz) or_return; + + a, ap2: int; + + frob: for a = 0; a < _FROBENIUS_UNDERWOOD_A; a += 1 { + switch a { + case 2, 4, 7, 8, 10, 14, 18, 23, 26, 28: + continue frob; + } + + internal_set(T1z, i32((a * a) - 4)); + j := internal_int_kronecker(T1z, N) or_return; + + switch j { + case -1: break frob; + case 0: return false, nil; + } + } + + // Tell it a composite and set return value accordingly. + if a >= _FROBENIUS_UNDERWOOD_A { return false, .Max_Iterations_Reached; } + + // Composite if N and (a+4)*(2*a+5) are not coprime. + internal_set(T1z, u32((a + 4) * ((2 * a) + 5))); + internal_int_gcd_lcm(T1z, nil, T1z, N) or_return; + + if !(T1z.used == 1 && T1z.digit[0] == 1) { + // Composite. + return false, nil; + } + + ap2 = a + 2; + internal_add(Np1z, N, 1) or_return; + + internal_set(sz, 1) or_return; + internal_set(tz, 2) or_return; + + for i := internal_count_bits(Np1z) - 2; i >= 0; i -= 1 { + // temp = (sz * (a * sz + 2 * tz)) % N; + // tz = ((tz - sz) * (tz + sz)) % N; + // sz = temp; + + internal_int_shl1(T2z, tz) or_return; + + // a = 0 at about 50% of the cases (non-square and odd input) + if a != 0 { + internal_mul(T1z, sz, DIGIT(a)) or_return; + internal_add(T2z, T2z, T1z) or_return; + } + + internal_mul(T1z, T2z, sz) or_return; + internal_sub(T2z, tz, sz) or_return; + internal_add(sz, sz, tz) or_return; + internal_mul(tz, sz, T2z) or_return; + internal_mod(tz, tz, N) or_return; + internal_mod(sz, T1z, N) or_return; + + if bit, _ := internal_int_bitfield_extract_bool(Np1z, i); bit { + // temp = (a+2) * sz + tz + // tz = 2 * tz - sz + // sz = temp + if a == 0 { + internal_int_shl1(T1z, sz) or_return; + } else { + internal_mul(T1z, sz, DIGIT(ap2)) or_return; + } + internal_add(T1z, T1z, tz) or_return; + internal_int_shl1(T2z, tz) or_return; + internal_sub(tz, T2z, sz); + internal_swap(sz, T1z); + } + } + + internal_set(T1z, u32((2 * a) + 5)) or_return; + internal_mod(T1z, T1z, N) or_return; + + result = internal_is_zero(sz) && internal_eq(tz, T1z); + + return; +} + /* Returns the number of Rabin-Miller trials needed for a given bit size. */ number_of_rabin_miller_trials :: proc(bit_size: int) -> (number_of_trials: int) { switch { case bit_size <= 80: - return - 1; /* Use deterministic algorithm for size <= 80 bits */ + return -1; /* Use deterministic algorithm for size <= 80 bits */ case bit_size >= 81 && bit_size < 96: return 37; /* max. error = 2^(-96) */ case bit_size >= 96 && bit_size < 128: diff --git a/core/math/big/private.odin b/core/math/big/private.odin index fc2fe69e8..002dbda09 100644 --- a/core/math/big/private.odin +++ b/core/math/big/private.odin @@ -1373,7 +1373,7 @@ _private_int_div_recursive :: proc(quotient, remainder, a, b: ^Int, allocator := _private_int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (err: Error) { ta, tb, tq, q := &Int{}, &Int{}, &Int{}, &Int{}; - c: int; + defer internal_destroy(ta, tb, tq, q); for { diff --git a/core/math/big/radix.odin b/core/math/big/radix.odin index 8a7040158..76854e244 100644 --- a/core/math/big/radix.odin +++ b/core/math/big/radix.odin @@ -413,14 +413,14 @@ _log_bases :: [65]u32{ */ RADIX_TABLE := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; RADIX_TABLE_REVERSE := [RADIX_TABLE_REVERSE_SIZE]u8{ - 0x3e, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x01, 0x02, 0x03, 0x04, /* +,-./01234 */ - 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, /* 56789:;<=> */ - 0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, /* ?@ABCDEFGH */ - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, /* IJKLMNOPQR */ - 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0xff, 0xff, /* STUVWXYZ[\ */ - 0xff, 0xff, 0xff, 0xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, /* ]^_`abcdef */ - 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, /* ghijklmnop */ - 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* qrstuvwxyz */ + 0x3e, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x01, 0x02, 0x03, 0x04, /* +,-./01234 */ + 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, /* 56789:;<=> */ + 0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, /* ?@ABCDEFGH */ + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, /* IJKLMNOPQR */ + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0xff, 0xff, /* STUVWXYZ[\ */ + 0xff, 0xff, 0xff, 0xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, /* ]^_`abcdef */ + 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, /* ghijklmnop */ + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* qrstuvwxyz */ }; RADIX_TABLE_REVERSE_SIZE :: 80; diff --git a/core/math/big/tune.odin b/core/math/big/tune.odin index 3381065bb..ced8e5f5d 100644 --- a/core/math/big/tune.odin +++ b/core/math/big/tune.odin @@ -23,6 +23,7 @@ Category :: enum { sqr, bitfield_extract, rm_trials, + is_prime, }; Event :: struct {