Clean up core:math/big

- Deprecate the u64/u32 implementation so we can use fewer nails and have an easier time of maintaining and optimizing the package going forward. The remaining implementation still works on 32-bit targets, it's just a smidge less efficient. - Use only 1 nail instead of 4. The tests now run 3.5% faster as a result. Future optimizations may including using fully packed backing (no nails) using `intrinsics.overflow_*` to handle borrow and carry safely.
2026-04-19 21:10:30 +00:00 · 2026-02-15 17:00:53 +01:00
parent 730e6ce0d4
commit 5c95a48bc7
6 changed files with 34 additions and 103 deletions
--- a/core/math/big/common.odin
+++ b/core/math/big/common.odin
@@ -14,21 +14,10 @@ import "base:runtime"
 	This allows to benchmark and/or setting optimized values for a certain CPU without recompiling.
 */

-/*
-	There is a bug with DLL globals. They don't get set.
-	To allow tests to run we add `-define:MATH_BIG_EXE=false` to hardcode the cutoffs for now.
-*/
-when #config(MATH_BIG_EXE, true) {
-	MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF
-	SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF
-	MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF
-	SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF
-} else {
-	MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF
-	SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF
-	MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF
-	SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF	
-}
+MUL_KARATSUBA_CUTOFF := _DEFAULT_MUL_KARATSUBA_CUTOFF
+SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF
+MUL_TOOM_CUTOFF      := _DEFAULT_MUL_TOOM_CUTOFF
+SQR_TOOM_CUTOFF      := _DEFAULT_SQR_TOOM_CUTOFF

 /*
 	These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`.
@@ -38,9 +27,6 @@ when #config(MATH_BIG_EXE, true) {
 	It would also be cool if we collected some data across various processor families.
 	This would let uss set reasonable defaults at runtime as this library initializes
 	itself by using `cpuid` or the ARM equivalent.
-
-	IMPORTANT: The 32_BIT path has largely gone untested. It needs to be tested and
-	debugged where necessary.
 */

 _DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MATH_BIG_MUL_KARATSUBA_CUTOFF,  80)
@@ -54,7 +40,7 @@ MAX_ITERATIONS_ROOT_N := 500
 /*
 	Largest `N` for which we'll compute `N!`
 */
-FACTORIAL_MAX_N       := 1_000_000
+FACTORIAL_MAX_N := 1_000_000

 /*
 	Cutoff to switch to int_factorial_binary_split, and its max recursion level.
@@ -86,22 +72,10 @@ MAX_ITERATIONS_RANDOM_PRIME  := 1_000_000
 */
@thread_local RANDOM_PRIME_ITERATIONS_USED: int

-/*
-	We don't allow these to be switched at runtime for two reasons:
-
-	1) 32-bit and 64-bit versions of procedures use different types for their storage,
-		so we'd have to double the number of procedures, and they couldn't interact.
-
-	2) Optimizations thanks to precomputed masks wouldn't work.
-*/
-MATH_BIG_FORCE_64_BIT   :: #config(MATH_BIG_FORCE_64_BIT, false)
-MATH_BIG_FORCE_32_BIT   :: #config(MATH_BIG_FORCE_32_BIT, false)
-when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously.") }
-
 /*
 	Trade a smaller memory footprint for more processing overhead?
 */
-_LOW_MEMORY             :: #config(MATH_BIG_SMALL_MEMORY, false)
+_LOW_MEMORY :: #config(MATH_BIG_SMALL_MEMORY, false)
 when _LOW_MEMORY {
 	_DEFAULT_DIGIT_COUNT ::   8
 	_TAB_SIZE            ::  32
@@ -217,28 +191,19 @@ _MIN_DIGIT_COUNT :: max(3, ((size_of(u128) + _DIGIT_BITS) - 1) / _DIGIT_BITS)
 _MAX_BIT_COUNT   :: (max(int) - 2)
 _MAX_DIGIT_COUNT :: _MAX_BIT_COUNT / _DIGIT_BITS

-when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
-	/*
-		We can use u128 as an intermediary.
-	*/
-	DIGIT        :: distinct u64
-	_WORD        :: distinct u128
-	// Base 10 extraction constants
-	ITOA_DIVISOR :: DIGIT(1_000_000_000_000_000_000)
-	ITOA_COUNT   :: 18
-} else {
-	DIGIT        :: distinct u32
-	_WORD        :: distinct u64
-	// Base 10 extraction constants
-	ITOA_DIVISOR :: DIGIT(100_000_000)
-	ITOA_COUNT   :: 8
-}
+// We use u128 as an intermediary.
+DIGIT        :: distinct u64
+_WORD        :: distinct u128
+
+// Base 10 extraction constants
+ITOA_DIVISOR :: DIGIT(1_000_000_000_000_000_000)
+ITOA_COUNT   :: 18
 #assert(size_of(_WORD) == 2 * size_of(DIGIT))

 _DIGIT_TYPE_BITS :: 8 * size_of(DIGIT)
 _WORD_TYPE_BITS  :: 8 * size_of(_WORD)

-_DIGIT_NAILS     :: 4
+_DIGIT_NAILS     :: 1
 _DIGIT_BITS      :: _DIGIT_TYPE_BITS - _DIGIT_NAILS
 _WORD_BITS       :: 2 * _DIGIT_BITS

--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -7,7 +7,6 @@ package math_big

 import "base:intrinsics"
 import "base:runtime"
-import rnd "core:math/rand"

 /*
 	TODO: Int.flags and Constants like ONE, NAN, etc, are not yet properly handled everywhere.
@@ -362,17 +361,7 @@ platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)

 count_lsb :: proc { int_count_lsb, platform_count_lsb, }

-int_random_digit :: proc() -> (res: DIGIT) {
-	when _DIGIT_BITS == 60 { // DIGIT = u64
-		return DIGIT(rnd.uint64()) & _MASK
-	} else when _DIGIT_BITS == 28 { // DIGIT = u32
-		return DIGIT(rnd.uint32()) & _MASK
-	} else {
-		panic("Unsupported DIGIT size.")
-	}
-
-	return 0 // We shouldn't get here.
-}
+int_random_digit :: internal_int_random_digit

 int_random :: proc(dest: ^Int, bits: int, allocator := context.allocator) -> (err: Error) {
 	/*
--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -30,7 +30,7 @@ package math_big
 import "base:builtin"
 import "base:intrinsics"
 import "base:runtime"
-import rnd "core:math/rand"
+@(require) import rnd "core:math/rand"

 /*
 	Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
@@ -1178,6 +1178,8 @@ internal_cmp_digit :: internal_compare_digit
 */
 internal_int_compare_magnitude :: #force_inline proc(a, b: ^Int) -> (comparison: int) {
 	assert_if_nil(a, b)
+	internal_clamp(a)
+	internal_clamp(b)

 	// Compare based on used digits.
 	if a.used != b.used {
@@ -1450,6 +1452,7 @@ internal_int_log :: proc(a: ^Int, base: DIGIT) -> (res: int, err: Error) {
 	/*
 		Fast path for `Int`s that fit within a single `DIGIT`.
 	*/
+	internal_clamp(a)
 	if a.used == 1 { return internal_log(a.digit[0], DIGIT(base)) }

 	return _private_int_log(a, base)
@@ -2820,9 +2823,9 @@ internal_platform_count_lsb :: #force_inline proc(a: $T) -> (count: int)
 internal_count_lsb :: proc { internal_int_count_lsb, internal_platform_count_lsb, }

 internal_int_random_digit :: proc() -> (res: DIGIT) {
-	when _DIGIT_BITS == 60 { // DIGIT = u64
+	when _DIGIT_TYPE_BITS == 64 { // DIGIT = u64
 		return DIGIT(rnd.uint64()) & _MASK
-	} else when _DIGIT_BITS == 28 { // DIGIT = u32
+	} else when _DIGIT_TYPE_BITS == 32 { // DIGIT = u32
 		return DIGIT(rnd.uint32()) & _MASK
 	} else {
 		panic("Unsupported DIGIT size.")
--- a/core/math/big/private.odin
+++ b/core/math/big/private.odin
@@ -3282,9 +3282,8 @@ _private_prime_table := [_PRIME_TAB_SIZE]DIGIT{
 }
 #assert(_PRIME_TAB_SIZE * size_of(DIGIT) == size_of(_private_prime_table))

-when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
-	@(rodata)
-	_factorial_table := [35]_WORD{
+@(rodata)
+_factorial_table := [35]_WORD{
 /* f(00): */                                                     1,
 /* f(01): */                                                     1,
 /* f(02): */                                                     2,
@@ -3320,34 +3319,9 @@ when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) {
 /* f(32): */       263_130_836_933_693_530_167_218_012_160_000_000,
 /* f(33): */     8_683_317_618_811_886_495_518_194_401_280_000_000,
 /* f(34): */   295_232_799_039_604_140_847_618_609_643_520_000_000,
-	}
-} else {
-	@(rodata)
-	_factorial_table := [21]_WORD{
-/* f(00): */                                                     1,
-/* f(01): */                                                     1,
-/* f(02): */                                                     2,
-/* f(03): */                                                     6,
-/* f(04): */                                                    24,
-/* f(05): */                                                   120,
-/* f(06): */                                                   720,
-/* f(07): */                                                 5_040,
-/* f(08): */                                                40_320,
-/* f(09): */                                               362_880,
-/* f(10): */                                             3_628_800,
-/* f(11): */                                            39_916_800,
-/* f(12): */                                           479_001_600,
-/* f(13): */                                         6_227_020_800,
-/* f(14): */                                        87_178_291_200,
-/* f(15): */                                     1_307_674_368_000,
-/* f(16): */                                    20_922_789_888_000,
-/* f(17): */                                   355_687_428_096_000,
-/* f(18): */                                 6_402_373_705_728_000,
-/* f(19): */                               121_645_100_408_832_000,
-/* f(20): */                             2_432_902_008_176_640_000,
-	}
 }

+
 /*
 	=========================  End of private tables  ========================
 */
--- a/core/math/big/radix.odin
+++ b/core/math/big/radix.odin
@@ -226,7 +226,7 @@ int_itoa_raw :: proc(a: ^Int, radix: i8, buffer: []u8, size := int(-1), zero_ter
 	//               If we optimize `itoa` further, this needs to be evaluated.
 	itoa_method := _itoa_raw_full

-	when !MATH_BIG_FORCE_32_BIT && ODIN_OPTIMIZATION_MODE >= .Size {
+	when ODIN_OPTIMIZATION_MODE >= .Size {
 		if count >= 32768 {
 			itoa_method = _itoa_raw_old
 		}
--- a/tests/core/math/big/test_core_math_big.odin
+++ b/tests/core/math/big/test_core_math_big.odin
@@ -151,7 +151,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
 			err := big.pow(res, a, int(power))
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected pow(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected pow(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Root:
 			n, n_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -160,7 +160,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
 			err := big.root_n(res, a, int(n))
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected root_n(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected root_n(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Shl:
 			bits, bits_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -169,7 +169,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
 			err := big.internal_int_shl(res, a, int(bits))
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected internal_int_shl(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected internal_int_shl(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Shr:
 			bits, bits_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -178,7 +178,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
 			err := big.internal_int_shr(res, a, int(bits))
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected internal_int_shr(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected internal_int_shr(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Shr_Signed:
 			bits, bits_ok := strconv.parse_i64_of_base(vec.b, 16)
@@ -188,7 +188,7 @@ test_big_math_vectors :: proc(t: ^testing.T) {
 			err := big.internal_int_shr_signed(res, res, int(bits))
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected internal_int_shr_signed(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected internal_int_shr_signed(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Factorial:
 			n, n_ok := strconv.parse_i64_of_base(vec.a, 16)
@@ -197,26 +197,26 @@ test_big_math_vectors :: proc(t: ^testing.T) {
 			err := big.factorial(res, int(n))
 			testing.expect(t, err == vec.err)

-			expect_a(t, "Expected factorial(%v) to be '%v', got %v", a, expected, res, err)
+			expect_a(t, "Expected factorial(%v) to be %q, got %q", a, expected, res, err)

 		case .Gcd:
 			err := big.internal_int_gcd_lcm(res, nil, a, b)
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected gcd(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected gcd(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Lcm:
 			err := big.internal_int_gcd_lcm(nil, res, a, b)
 			testing.expect(t, err == vec.err)

-			expect_ab(t, "Expected lcm(%v, %v) to be '%v', got %v", a, b, expected, res, err)
+			expect_ab(t, "Expected lcm(%v, %v) to be %q, got %q", a, b, expected, res, err)

 		case .Is_Square:
 			square, err := big.internal_int_is_square(a)
 			testing.expect(t, err == vec.err)

 			big.set(res, 1 if square else 0)
-			expect_a(t, "Expected is_square(%v) to be '%v', got %v", a, expected, res, err)
+			expect_a(t, "Expected is_square(%v) to be %q, got %q", a, expected, res, err)

 		case:
 			log.assertf(false, "Unhandled op: %v", vec.op)