Clean up core:math/big

- Deprecate the u64/u32 implementation so we can use fewer nails and have an easier time of maintaining and optimizing the package going forward. The remaining implementation still works on 32-bit targets, it's just a smidge less efficient.

- Use only 1 nail instead of 4. The tests now run 3.5% faster as a result.

Future optimizations may including using fully packed backing (no nails) using `intrinsics.overflow_*` to handle borrow and carry safely.
This commit is contained in:
Jeroen van Rijn
2026-02-15 17:00:53 +01:00
parent 730e6ce0d4
commit 5c95a48bc7
6 changed files with 34 additions and 103 deletions

View File

@@ -14,21 +14,10 @@ import "base:runtime"
This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
*/
/*
There is a bug with DLL globals. They don't get set.
To allow tests to run we add `-define:MATH_BIG_EXE=false` to hardcode the cutoffs for now.
*/
when #config(MATH_BIG_EXE, true) {
MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF
SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF
MUL_TOOM_CUTOFF := _DEFAULT_MUL_TOOM_CUTOFF
SQR_TOOM_CUTOFF := _DEFAULT_SQR_TOOM_CUTOFF
} else {
MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF
SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF
MUL_TOOM_CUTOFF := _DEFAULT_MUL_TOOM_CUTOFF
SQR_TOOM_CUTOFF := _DEFAULT_SQR_TOOM_CUTOFF
}
MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF
SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF
MUL_TOOM_CUTOFF := _DEFAULT_MUL_TOOM_CUTOFF
SQR_TOOM_CUTOFF := _DEFAULT_SQR_TOOM_CUTOFF
/*
These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
@@ -38,9 +27,6 @@ when #config(MATH_BIG_EXE, true) {
It would also be cool if we collected some data across various processor families.
This would let uss set reasonable defaults at runtime as this library initializes
itself by using `cpuid` or the ARM equivalent.
IMPORTANT: The 32_BIT path has largely gone untested. It needs to be tested and
debugged where necessary.
*/
_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MATH_BIG_MUL_KARATSUBA_CUTOFF, 80)
@@ -54,7 +40,7 @@ MAX_ITERATIONS_ROOT_N := 500
/*
Largest `N` for which we'll compute `N!`
*/
FACTORIAL_MAX_N := 1_000_000
FACTORIAL_MAX_N := 1_000_000
/*
Cutoff to switch to int_factorial_binary_split, and its max recursion level.
@@ -86,22 +72,10 @@ MAX_ITERATIONS_RANDOM_PRIME := 1_000_000
*/
@thread_local RANDOM_PRIME_ITERATIONS_USED: int
/*
We don't allow these to be switched at runtime for two reasons:
1) 32-bit and 64-bit versions of procedures use different types for their storage,
so we'd have to double the number of procedures, and they couldn't interact.
2) Optimizations thanks to precomputed masks wouldn't work.
*/
MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false)
MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false)
when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously.") }
/*
Trade a smaller memory footprint for more processing overhead?
*/
_LOW_MEMORY :: #config(MATH_BIG_SMALL_MEMORY, false)
_LOW_MEMORY :: #config(MATH_BIG_SMALL_MEMORY, false)
when _LOW_MEMORY {
_DEFAULT_DIGIT_COUNT :: 8
_TAB_SIZE :: 32
@@ -217,28 +191,19 @@ _MIN_DIGIT_COUNT :: max(3, ((size_of(u128) + _DIGIT_BITS) - 1) / _DIGIT_BITS)
_MAX_BIT_COUNT :: (max(int) - 2)
_MAX_DIGIT_COUNT :: _MAX_BIT_COUNT / _DIGIT_BITS
when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
/*
We can use u128 as an intermediary.
*/
DIGIT :: distinct u64
_WORD :: distinct u128
// Base 10 extraction constants
ITOA_DIVISOR :: DIGIT(1_000_000_000_000_000_000)
ITOA_COUNT :: 18
} else {
DIGIT :: distinct u32
_WORD :: distinct u64
// Base 10 extraction constants
ITOA_DIVISOR :: DIGIT(100_000_000)
ITOA_COUNT :: 8
}
// We use u128 as an intermediary.
DIGIT :: distinct u64
_WORD :: distinct u128
// Base 10 extraction constants
ITOA_DIVISOR :: DIGIT(1_000_000_000_000_000_000)
ITOA_COUNT :: 18
#assert(size_of(_WORD) == 2 * size_of(DIGIT))
_DIGIT_TYPE_BITS :: 8 * size_of(DIGIT)
_WORD_TYPE_BITS :: 8 * size_of(_WORD)
_DIGIT_NAILS :: 4
_DIGIT_NAILS :: 1
_DIGIT_BITS :: _DIGIT_TYPE_BITS - _DIGIT_NAILS
_WORD_BITS :: 2 * _DIGIT_BITS

View File

@@ -7,7 +7,6 @@ package math_big
import "base:intrinsics"
import "base:runtime"
import rnd "core:math/rand"
/*
TODO: Int.flags and Constants like ONE, NAN, etc, are not yet properly handled everywhere.
@@ -362,17 +361,7 @@ platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)
count_lsb :: proc { int_count_lsb, platform_count_lsb, }
int_random_digit :: proc() -> (res: DIGIT) {
when _DIGIT_BITS == 60 { // DIGIT = u64
return DIGIT(rnd.uint64()) & _MASK
} else when _DIGIT_BITS == 28 { // DIGIT = u32
return DIGIT(rnd.uint32()) & _MASK
} else {
panic("Unsupported DIGIT size.")
}
return 0 // We shouldn't get here.
}
int_random_digit :: internal_int_random_digit
int_random :: proc(dest: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
/*

View File

@@ -30,7 +30,7 @@ package math_big
import "base:builtin"
import "base:intrinsics"
import "base:runtime"
import rnd "core:math/rand"
@(require) import rnd "core:math/rand"
/*
Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
@@ -1178,6 +1178,8 @@ internal_cmp_digit :: internal_compare_digit
*/
internal_int_compare_magnitude :: #force_inline proc(a, b: ^Int) -> (comparison: int) {
assert_if_nil(a, b)
internal_clamp(a)
internal_clamp(b)
// Compare based on used digits.
if a.used != b.used {
@@ -1450,6 +1452,7 @@ internal_int_log :: proc(a: ^Int, base: DIGIT) -> (res: int, err: Error) {
/*
Fast path for `Int`s that fit within a single `DIGIT`.
*/
internal_clamp(a)
if a.used == 1 { return internal_log(a.digit[0], DIGIT(base)) }
return _private_int_log(a, base)
@@ -2820,9 +2823,9 @@ internal_platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)
internal_count_lsb :: proc { internal_int_count_lsb, internal_platform_count_lsb, }
internal_int_random_digit :: proc() -> (res: DIGIT) {
when _DIGIT_BITS == 60 { // DIGIT = u64
when _DIGIT_TYPE_BITS == 64 { // DIGIT = u64
return DIGIT(rnd.uint64()) & _MASK
} else when _DIGIT_BITS == 28 { // DIGIT = u32
} else when _DIGIT_TYPE_BITS == 32 { // DIGIT = u32
return DIGIT(rnd.uint32()) & _MASK
} else {
panic("Unsupported DIGIT size.")

View File

@@ -3282,9 +3282,8 @@ _private_prime_table := [_PRIME_TAB_SIZE]DIGIT{
}
#assert(_PRIME_TAB_SIZE * size_of(DIGIT) == size_of(_private_prime_table))
when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
@(rodata)
_factorial_table := [35]_WORD{
@(rodata)
_factorial_table := [35]_WORD{
/* f(00): */ 1,
/* f(01): */ 1,
/* f(02): */ 2,
@@ -3320,34 +3319,9 @@ when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
/* f(32): */ 263_130_836_933_693_530_167_218_012_160_000_000,
/* f(33): */ 8_683_317_618_811_886_495_518_194_401_280_000_000,
/* f(34): */ 295_232_799_039_604_140_847_618_609_643_520_000_000,
}
} else {
@(rodata)
_factorial_table := [21]_WORD{
/* f(00): */ 1,
/* f(01): */ 1,
/* f(02): */ 2,
/* f(03): */ 6,
/* f(04): */ 24,
/* f(05): */ 120,
/* f(06): */ 720,
/* f(07): */ 5_040,
/* f(08): */ 40_320,
/* f(09): */ 362_880,
/* f(10): */ 3_628_800,
/* f(11): */ 39_916_800,
/* f(12): */ 479_001_600,
/* f(13): */ 6_227_020_800,
/* f(14): */ 87_178_291_200,
/* f(15): */ 1_307_674_368_000,
/* f(16): */ 20_922_789_888_000,
/* f(17): */ 355_687_428_096_000,
/* f(18): */ 6_402_373_705_728_000,
/* f(19): */ 121_645_100_408_832_000,
/* f(20): */ 2_432_902_008_176_640_000,
}
}
/*
========================= End of private tables ========================
*/

View File

@@ -226,7 +226,7 @@ int_itoa_raw :: proc(a: ^Int, radix: i8, buffer: []u8, size := int(-1), zero_ter
// If we optimize `itoa` further, this needs to be evaluated.
itoa_method := _itoa_raw_full
when !MATH_BIG_FORCE_32_BIT && ODIN_OPTIMIZATION_MODE >= .Size {
when ODIN_OPTIMIZATION_MODE >= .Size {
if count >= 32768 {
itoa_method = _itoa_raw_old
}

View File

@@ -151,7 +151,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
err := big.pow(res, a, int(power))
testing.expect(t, err == vec.err)
expect_ab(t, "Expected pow(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected pow(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Root:
n, n_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -160,7 +160,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
err := big.root_n(res, a, int(n))
testing.expect(t, err == vec.err)
expect_ab(t, "Expected root_n(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected root_n(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Shl:
bits, bits_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -169,7 +169,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
err := big.internal_int_shl(res, a, int(bits))
testing.expect(t, err == vec.err)
expect_ab(t, "Expected internal_int_shl(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected internal_int_shl(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Shr:
bits, bits_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -178,7 +178,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
err := big.internal_int_shr(res, a, int(bits))
testing.expect(t, err == vec.err)
expect_ab(t, "Expected internal_int_shr(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected internal_int_shr(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Shr_Signed:
bits, bits_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -188,7 +188,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
err := big.internal_int_shr_signed(res, res, int(bits))
testing.expect(t, err == vec.err)
expect_ab(t, "Expected internal_int_shr_signed(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected internal_int_shr_signed(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Factorial:
n, n_ok := strconv.parse_i64_of_base(vec.a, 16)
@@ -197,26 +197,26 @@ test_big_math_vectors :: proc(t: ^testing.T) {
err := big.factorial(res, int(n))
testing.expect(t, err == vec.err)
expect_a(t, "Expected factorial(%v) to be '%v', got %v", a, expected, res, err)
expect_a(t, "Expected factorial(%v) to be %q, got %q", a, expected, res, err)
case .Gcd:
err := big.internal_int_gcd_lcm(res, nil, a, b)
testing.expect(t, err == vec.err)
expect_ab(t, "Expected gcd(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected gcd(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Lcm:
err := big.internal_int_gcd_lcm(nil, res, a, b)
testing.expect(t, err == vec.err)
expect_ab(t, "Expected lcm(%v, %v) to be '%v', got %v", a, b, expected, res, err)
expect_ab(t, "Expected lcm(%v, %v) to be %q, got %q", a, b, expected, res, err)
case .Is_Square:
square, err := big.internal_int_is_square(a)
testing.expect(t, err == vec.err)
big.set(res, 1 if square else 0)
expect_a(t, "Expected is_square(%v) to be '%v', got %v", a, expected, res, err)
expect_a(t, "Expected is_square(%v) to be %q, got %q", a, expected, res, err)
case:
log.assertf(false, "Unhandled op: %v", vec.op)