big: Split up add and sub into public and internal parts.

2026-02-17 16:38:22 +00:00 · 2021-08-05 18:38:38 +02:00
parent 511057ca36
commit 9858989b1c
6 changed files with 556 additions and 508 deletions
--- a/core/math/big/basic.odin
+++ b/core/math/big/basic.odin
@@ -23,35 +23,12 @@ import "core:intrinsics"
 /*
 	High-level addition. Handles sign.
 */
-int_add :: proc(dest, a, b: ^Int) -> (err: Error) {
-	dest := dest; x := a; y := b;
-	if err = clear_if_uninitialized(x); err != nil { return err; }
-	if err = clear_if_uninitialized(y); err != nil { return err; }
-	if err = clear_if_uninitialized(dest); err != nil { return err; }
+int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = clear_if_uninitialized(dest, a, b); err != nil { return err; }
 	/*
 		All parameters have been initialized.
-		We can now safely ignore errors from comparison routines.
 	*/
-
-	/*
-		Handle both negative or both positive.
-	*/
-	if x.sign == y.sign {
-		dest.sign = x.sign;
-		return _int_add(dest, x, y);
-	}
-
-	/*
-		One positive, the other negative.
-		Subtract the one with the greater magnitude from the other.
-		The result gets the sign of the one with the greater magnitude.
-	*/
-	if c, _ := cmp_mag(a, b); c == -1 {
-		x, y = y, x;
-	}
-
-	dest.sign = x.sign;
-	return _int_sub(dest, x, y);
+	return #force_inline internal_int_add_signed(dest, a, b, allocator);
 }

 /*
@@ -60,178 +37,28 @@ int_add :: proc(dest, a, b: ^Int) -> (err: Error) {

 	dest = a + digit;
 */
-int_add_digit :: proc(dest, a: ^Int, digit: DIGIT) -> (err: Error) {
-	dest := dest; digit := digit;
-	if err = clear_if_uninitialized(a); err != nil {
-		return err;
-	}
+int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	if err = clear_if_uninitialized(a); err != nil { return err; }
 	/*
 		Grow destination as required.
 	*/
-	if err = grow(dest, a.used + 1); err != nil {
-		return err;
-	}
+	if err = grow(dest, a.used + 1, false, allocator); err != nil { return err; }

 	/*
 		All parameters have been initialized.
-		We can now safely ignore errors from comparison routines.
 	*/
-
-	/*
-		Fast paths for destination and input Int being the same.
-	*/
-	if dest == a {
-		/*
-			Fast path for dest.digit[0] + digit fits in dest.digit[0] without overflow.
-		*/
-		if p, _ := is_pos(dest); p && (dest.digit[0] + digit < _DIGIT_MAX) {
-			dest.digit[0] += digit;
-			dest.used += 1;
-			return clamp(dest);
-		}
-		/*
-			Can be subtracted from dest.digit[0] without underflow.
-		*/
-		if n, _ := is_neg(a); n && (dest.digit[0] > digit) {
-			dest.digit[0] -= digit;
-			dest.used += 1;
-			return clamp(dest);
-		}
-	}
-
-	/*
-		If `a` is negative and `|a|` >= `digit`, call `dest = |a| - digit`
-	*/
-	if n, _ := is_neg(a); n && (a.used > 1 || a.digit[0] >= digit) {
-		/*
-			Temporarily fix `a`'s sign.
-		*/
-		a.sign = .Zero_or_Positive;
-		/*
-			dest = |a| - digit
-		*/
-		if err = sub(dest, a, digit); err != nil {
-			/*
-				Restore a's sign.
-			*/
-			a.sign = .Negative;
-			return err;
-		}
-		/*
-			Restore sign and set `dest` sign.
-		*/
-		a.sign    = .Negative;
-		dest.sign = .Negative;
-
-		return clamp(dest);
-	}
-
-	/*
-		Remember the currently used number of digits in `dest`.
-	*/
-	old_used := dest.used;
-
-	/*
-		If `a` is positive
-	*/
-	if p, _ := is_pos(a); p {
-		/*
-			Add digits, use `carry`.
-		*/
-		i: int;
-		carry := digit;
-		for i = 0; i < a.used; i += 1 {
-			dest.digit[i] = a.digit[i] + carry;
-			carry = dest.digit[i] >> _DIGIT_BITS;
-			dest.digit[i] &= _MASK;
-		}
-		/*
-			Set final carry.
-		*/
-		dest.digit[i] = carry;
-		/*
-			Set `dest` size.
-		*/
-		dest.used = a.used + 1;
-	} else {
-		/*
-			`a` was negative and |a| < digit.
-		*/
-		dest.used = 1;
-		/*
-			The result is a single DIGIT.
-		*/
-		dest.digit[0] = digit - a.digit[0] if a.used == 1 else digit;
-	}
-	/*
-		Sign is always positive.
-	*/
-	dest.sign = .Zero_or_Positive;
-
-	zero_count := old_used - dest.used;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
-	}
-	/*
-		Adjust dest.used based on leading zeroes.
-	*/
-	return clamp(dest);
+	return #force_inline internal_int_add_digit(dest, a, digit);
 }

 /*
 	High-level subtraction, dest = number - decrease. Handles signs.
 */
-int_sub :: proc(dest, number, decrease: ^Int) -> (err: Error) {
-	dest := dest; x := number; y := decrease;
-	if err = clear_if_uninitialized(dest); err != nil {
-		return err;
-	}
-	if err = clear_if_uninitialized(x); err != nil {
-		return err;
-	}
-	if err = clear_if_uninitialized(y); err != nil {
-		return err;
-	}
+int_sub :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	if err = clear_if_uninitialized(dest, number, decrease); err != nil { return err; }
 	/*
 		All parameters have been initialized.
-		We can now safely ignore errors from comparison routines.
 	*/
-
-	if x.sign != y.sign {
-		/*
-			Subtract a negative from a positive, OR subtract a positive from a negative.
-			In either case, ADD their magnitudes and use the sign of the first number.
-		*/
-		dest.sign = x.sign;
-		return _int_add(dest, x, y);
-	}
-
-	/*
-		Subtract a positive from a positive, OR negative from a negative.
-		First, take the difference between their magnitudes, then...
-	*/
-	if c, _ := cmp_mag(x, y); c == -1 {
-		/*
-			The second has a larger magnitude.
-			The result has the *opposite* sign from the first number.
-		*/
-		if p, _ := is_pos(x); p {
-			dest.sign = .Negative;
-		} else {
-			dest.sign = .Zero_or_Positive;
-		}
-		x, y = y, x;
-	} else {
-		/*
-			The first has a larger or equal magnitude.
-			Copy the sign from the first.
-		*/
-		dest.sign = x.sign;
-	}
-	return _int_sub(dest, x, y);
+	return #force_inline internal_int_sub_signed(dest, number, decrease, allocator);
 }

 /*
@@ -240,99 +67,19 @@ int_sub :: proc(dest, number, decrease: ^Int) -> (err: Error) {

 	dest = a - digit;
 */
-int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT) -> (err: Error) {
-	dest := dest; digit := digit;
-	if err = clear_if_uninitialized(dest); err != nil {
-		return err;
-	}
+int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) {
+	if err = clear_if_uninitialized(a); err != nil { return err; }
 	/*
 		Grow destination as required.
 	*/
-	if dest != a {
-		if err = grow(dest, a.used + 1); err != nil {
-			return err;
-		}
-	}
+	if err = grow(dest, a.used + 1, false, allocator); err != nil { return err; }
+
 	/*
 		All parameters have been initialized.
-		We can now safely ignore errors from comparison routines.
 	*/
-
-	/*
-		Fast paths for destination and input Int being the same.
-	*/
-	if dest == a {
-		/*
-			Fast path for `dest` is negative and unsigned addition doesn't overflow the lowest digit.
-		*/
-		if n, _ := is_neg(dest); n && (dest.digit[0] + digit < _DIGIT_MAX) {
-			dest.digit[0] += digit;
-			return nil;
-		}
-		/*
-			Can be subtracted from dest.digit[0] without underflow.
-		*/
-		if p, _ := is_pos(a); p && (dest.digit[0] > digit) {
-			dest.digit[0] -= digit;
-			return nil;
-		}
-	}
-
-	/*
-		If `a` is negative, just do an unsigned addition (with fudged signs).
-	*/
-	if n, _ := is_neg(a); n {
-		t := a;
-		t.sign = .Zero_or_Positive;
-
-		err = add(dest, t, digit);
-		dest.sign = .Negative;
-
-		clamp(dest);
-		return err;
-	}
-
-	old_used := dest.used;
-
-	/*
-		if `a`<= digit, simply fix the single digit.
-	*/
-	z, _ := is_zero(a);
-
-	if a.used == 1 && (a.digit[0] <= digit) || z {
-		dest.digit[0] = digit - a.digit[0] if a.used == 1 else digit;
-		dest.sign = .Negative;
-		dest.used = 1;
-	} else {
-		dest.sign = .Zero_or_Positive;
-		dest.used = a.used;
-
-		/*
-			Subtract with carry.
-		*/
-		carry := digit;
-
-		for i := 0; i < a.used; i += 1 {
-			dest.digit[i] = a.digit[i] - carry;
-			carry := dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
-			dest.digit[i] &= _MASK;
-		}
-	}
-
-	zero_count := old_used - dest.used;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
-	}
-	/*
-		Adjust dest.used based on leading zeroes.
-	*/
-	return clamp(dest);
+	return #force_inline internal_int_sub_digit(dest, a, digit);
 }

-
 /*
 	dest = src  / 2
 	dest = src >> 1
@@ -870,167 +617,6 @@ int_choose_digit :: proc(res: ^Int, n, k: DIGIT) -> (err: Error) {
 }
 choose :: proc { int_choose_digit, };

-/*
-	==========================
-		Low-level routines    
-	==========================
-*/
-
-/*
-	Low-level addition, unsigned.
-	Handbook of Applied Cryptography, algorithm 14.7.
-*/
-_int_add :: proc(dest, a, b: ^Int) -> (err: Error) {
-	dest := dest; x := a; y := b;
-
-	old_used, min_used, max_used, i: int;
-
-	if x.used < y.used {
-		x, y = y, x;
-		assert(x.used >= y.used);
-	}
-
-	min_used = y.used;
-	max_used = x.used;
-	old_used = dest.used;
-
-	if err = grow(dest, max(max_used + 1, _DEFAULT_DIGIT_COUNT)); err != nil {
-		return err;
-	}
-	dest.used = max_used + 1;
-	/*
-		All parameters have been initialized.
-	*/
-
-	/* Zero the carry */
-	carry := DIGIT(0);
-
-	#no_bounds_check for i = 0; i < min_used; i += 1 {
-		/*
-			Compute the sum one _DIGIT at a time.
-			dest[i] = a[i] + b[i] + carry;
-		*/
-		dest.digit[i] = x.digit[i] + y.digit[i] + carry;
-
-		/*
-			Compute carry
-		*/
-		carry = dest.digit[i] >> _DIGIT_BITS;
-		/*
-			Mask away carry from result digit.
-		*/
-		dest.digit[i] &= _MASK;
-	}
-
-	if min_used != max_used {
-		/*
-			Now copy higher words, if any, in A+B.
-			If A or B has more digits, add those in.
-		*/
-		#no_bounds_check for ; i < max_used; i += 1 {
-			dest.digit[i] = x.digit[i] + carry;
-			/*
-				Compute carry
-			*/
-			carry = dest.digit[i] >> _DIGIT_BITS;
-			/*
-				Mask away carry from result digit.
-			*/
-			dest.digit[i] &= _MASK;
-		}
-	}
-	/*
-		Add remaining carry.
-	*/
-	dest.digit[i] = carry;
-	zero_count := old_used - dest.used;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
-	}
-	/*
-		Adjust dest.used based on leading zeroes.
-	*/
-	return clamp(dest);
-}
-
-/*
-	Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|.
-	Handbook of Applied Cryptography, algorithm 14.9.
-*/
-_int_sub :: proc(dest, number, decrease: ^Int) -> (err: Error) {
-	dest := dest; x := number; y := decrease;
-	if err = clear_if_uninitialized(x); err != nil {
-		return err;
-	}
-	if err = clear_if_uninitialized(y); err != nil {
-		return err;
-	}
-
-	old_used := dest.used;
-	min_used := y.used;
-	max_used := x.used;
-	i: int;
-
-	if err = grow(dest, max(max_used, _DEFAULT_DIGIT_COUNT)); err != nil {
-		return err;
-	}
-	dest.used = max_used;
-	/*
-		All parameters have been initialized.
-	*/
-
-	borrow := DIGIT(0);
-
-	#no_bounds_check for i = 0; i < min_used; i += 1 {
-		dest.digit[i] = (x.digit[i] - y.digit[i] - borrow);
-		/*
-			borrow = carry bit of dest[i]
-			Note this saves performing an AND operation since if a carry does occur,
-			it will propagate all the way to the MSB.
-			As a result a single shift is enough to get the carry.
-		*/
-		borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
-		/*
-			Clear borrow from dest[i].
-		*/
-		dest.digit[i] &= _MASK;
-	}
-
-	/*
-		Now copy higher words if any, e.g. if A has more digits than B
-	*/
-	#no_bounds_check for ; i < max_used; i += 1 {
-		dest.digit[i] = x.digit[i] - borrow;
-		/*
-			borrow = carry bit of dest[i]
-			Note this saves performing an AND operation since if a carry does occur,
-			it will propagate all the way to the MSB.
-			As a result a single shift is enough to get the carry.
-		*/
-		borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
-		/*
-			Clear borrow from dest[i].
-		*/
-		dest.digit[i] &= _MASK;
-	}
-
-	zero_count := old_used - dest.used;
-	/*
-		Zero remainder.
-	*/
-	if zero_count > 0 {
-		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
-	}
-	/*
-		Adjust dest.used based on leading zeroes.
-	*/
-	return clamp(dest);
-}
-
-
 /*
 	Multiplies |a| * |b| and only computes upto digs digits of result.
 	HAC pp. 595, Algorithm 14.12  Modified so you can control how
--- a/core/math/big/build.bat
+++ b/core/math/big/build.bat
@@ -1,10 +1,10 @@
@echo off
-odin run . -vet
+:odin run . -vet-more
 : -o:size -no-bounds-check
-:odin build . -build-mode:shared -show-timings -o:minimal -use-separate-modules
-:odin build . -build-mode:shared -show-timings -o:size -use-separate-modules -no-bounds-check
-:odin build . -build-mode:shared -show-timings -o:size -use-separate-modules
-:odin build . -build-mode:shared -show-timings -o:speed -use-separate-modules -no-bounds-check
-:odin build . -build-mode:shared -show-timings -o:speed -use-separate-modules
+:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check
+:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check
+:odin build . -build-mode:shared -show-timings -o:size
+odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check
+:odin build . -build-mode:shared -show-timings -o:speed

-:python test.py
+python test.py
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -12,7 +12,6 @@ package big

 import "core:fmt"
 import "core:mem"
-import "core:time"

 print_configation :: proc() {
 	fmt.printf(
@@ -42,74 +41,6 @@ _SQR_TOOM_CUTOFF,

 }

-print_timings :: proc() {
-	fmt.printf("Timings:\n");
-	for v, i in Timings {
-		if v.count > 0 {
-			avg_ticks  := time.Duration(f64(v.ticks) / f64(v.count));
-			avg_cycles := f64(v.cycles) / f64(v.count);
-
-			avg_s: string;
-			switch {
-			case avg_ticks < time.Microsecond:
-				avg_s = fmt.tprintf("%v ns / %v cycles", time.duration_nanoseconds(avg_ticks), avg_cycles);
-			case avg_ticks < time.Millisecond:
-				avg_s = fmt.tprintf("%v µs / %v cycles", time.duration_microseconds(avg_ticks), avg_cycles);
-			case:
-				avg_s = fmt.tprintf("%v ms / %v cycles", time.duration_milliseconds(avg_ticks), avg_cycles);
-			}
-
-			total_s: string;
-			switch {
-			case v.ticks < time.Microsecond:
-				total_s = fmt.tprintf("%v ns / %v cycles", time.duration_nanoseconds(v.ticks), v.cycles);
-			case v.ticks < time.Millisecond:
-				total_s = fmt.tprintf("%v µs / %v cycles", time.duration_microseconds(v.ticks), v.cycles);
-			case:
-				total_s = fmt.tprintf("%v ms / %v cycles", time.duration_milliseconds(v.ticks), v.cycles);
-			}
-
-			fmt.printf("\t%v: %s (avg), %s (total, %v calls)\n", i, avg_s, total_s, v.count);
-		}
-	}
-}
-
-@(deferred_in_out=_SCOPE_END)
-SCOPED_TIMING :: #force_inline proc(c: Category) -> (ticks: time.Tick, cycles: u64) {
-	cycles = time.read_cycle_counter();
-	ticks  = time.tick_now();
-	return;
-}
-_SCOPE_END :: #force_inline proc(c: Category, ticks: time.Tick, cycles: u64) {
-	cycles_now := time.read_cycle_counter();
-	ticks_now  := time.tick_now();
-
-	Timings[c].ticks  = time.tick_diff(ticks, ticks_now);
-	Timings[c].cycles = cycles_now - cycles;
-	Timings[c].count += 1;
-}
-SCOPED_COUNT_ADD :: #force_inline proc(c: Category, count: int) {
-	Timings[c].count += count;
-}
-
-Category :: enum {
-	itoa,
-	atoi,
-	factorial,
-	factorial_bin,
-	choose,
-	lsb,
-	ctz,
-	bitfield_extract,
-};
-
-Event :: struct {
-	ticks:  time.Duration,
-	count:  int,
-	cycles: u64,
-}
-Timings := [Category]Event{};
-
 print :: proc(name: string, a: ^Int, base := i8(10), print_name := true, newline := true, print_extra_info := false) {
 	as, err := itoa(a, base);

--- a/core/math/big/internal.odin
+++ b/core/math/big/internal.odin
@@ -0,0 +1,446 @@
+//+ignore
+package big
+
+/*
+	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+	Made available under Odin's BSD-2 license.
+
+	A BigInt implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	==========================    Low-level routines    ==========================
+
+	IMPORTANT: `internal_*` procedures make certain assumptions about their input.
+
+	The public functions that call them are expected to satisfy their sanity check requirements.
+	This allows `internal_*` call `internal_*` without paying this overhead multiple times.
+
+	Where errors can occur, they are of course still checked and returned as appropriate.
+
+	When importing `math:core/big` to implement an involved algorithm of your own, you are welcome
+	to use these procedures instead of their public counterparts.
+
+	Most inputs and outputs are expected to be passed an initialized `Int`, for example.
+	Exceptions include `quotient` and `remainder`, which are allowed to be `nil` when the calling code doesn't need them.
+
+	Check the comments above each `internal_*` implementation to see what constraints it expects to have met.
+*/
+
+import "core:mem"
+
+/*
+	Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7.
+
+	Assumptions:
+		`dest`, `a` and `b` != `nil` and have been initalized.
+*/
+internal_int_add_unsigned :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	dest := dest; x := a; y := b;
+
+	old_used, min_used, max_used, i: int;
+
+	if x.used < y.used {
+		x, y = y, x;
+		assert(x.used >= y.used);
+	}
+
+	min_used = y.used;
+	max_used = x.used;
+	old_used = dest.used;
+
+	if err = grow(dest, max(max_used + 1, _DEFAULT_DIGIT_COUNT), false, allocator); err != nil { return err; }
+	dest.used = max_used + 1;
+	/*
+		All parameters have been initialized.
+	*/
+
+	/* Zero the carry */
+	carry := DIGIT(0);
+
+	#no_bounds_check for i = 0; i < min_used; i += 1 {
+		/*
+			Compute the sum one _DIGIT at a time.
+			dest[i] = a[i] + b[i] + carry;
+		*/
+		dest.digit[i] = x.digit[i] + y.digit[i] + carry;
+
+		/*
+			Compute carry
+		*/
+		carry = dest.digit[i] >> _DIGIT_BITS;
+		/*
+			Mask away carry from result digit.
+		*/
+		dest.digit[i] &= _MASK;
+	}
+
+	if min_used != max_used {
+		/*
+			Now copy higher words, if any, in A+B.
+			If A or B has more digits, add those in.
+		*/
+		#no_bounds_check for ; i < max_used; i += 1 {
+			dest.digit[i] = x.digit[i] + carry;
+			/*
+				Compute carry
+			*/
+			carry = dest.digit[i] >> _DIGIT_BITS;
+			/*
+				Mask away carry from result digit.
+			*/
+			dest.digit[i] &= _MASK;
+		}
+	}
+	/*
+		Add remaining carry.
+	*/
+	dest.digit[i] = carry;
+	zero_count := old_used - dest.used;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return clamp(dest);
+}
+
+/*
+	Low-level addition, signed. Handbook of Applied Cryptography, algorithm 14.7.
+
+	Assumptions:
+		`dest`, `a` and `b` != `nil` and have been initalized.
+*/
+internal_int_add_signed :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) {
+	x := a; y := b;
+	/*
+		Handle both negative or both positive.
+	*/
+	if x.sign == y.sign {
+		dest.sign = x.sign;
+		return #force_inline internal_int_add_unsigned(dest, x, y, allocator);
+	}
+
+	/*
+		One positive, the other negative.
+		Subtract the one with the greater magnitude from the other.
+		The result gets the sign of the one with the greater magnitude.
+	*/
+	if c, _ := #force_inline cmp_mag(a, b); c == -1 {
+		x, y = y, x;
+	}
+
+	dest.sign = x.sign;
+	return #force_inline internal_int_sub_unsigned(dest, x, y, allocator);
+}
+
+/*
+	Low-level addition Int+DIGIT, signed. Handbook of Applied Cryptography, algorithm 14.7.
+
+	Assumptions:
+		`dest` and `a` != `nil` and have been initalized.
+		`dest` is large enough (a.used + 1) to fit result.
+*/
+internal_int_add_digit :: proc(dest, a: ^Int, digit: DIGIT) -> (err: Error) {
+	/*
+		Fast paths for destination and input Int being the same.
+	*/
+	if dest == a {
+		/*
+			Fast path for dest.digit[0] + digit fits in dest.digit[0] without overflow.
+		*/
+		if dest.sign == .Zero_or_Positive && (dest.digit[0] + digit < _DIGIT_MAX) {
+			dest.digit[0] += digit;
+			dest.used += 1;
+			return clamp(dest);
+		}
+		/*
+			Can be subtracted from dest.digit[0] without underflow.
+		*/
+		if a.sign == .Negative && (dest.digit[0] > digit) {
+			dest.digit[0] -= digit;
+			dest.used += 1;
+			return clamp(dest);
+		}
+	}
+
+	/*
+		If `a` is negative and `|a|` >= `digit`, call `dest = |a| - digit`
+	*/
+	if a.sign == .Negative && (a.used > 1 || a.digit[0] >= digit) {
+		/*
+			Temporarily fix `a`'s sign.
+		*/
+		a.sign = .Zero_or_Positive;
+		/*
+			dest = |a| - digit
+		*/
+		if err =  #force_inline internal_int_add_digit(dest, a, digit); err != nil {
+			/*
+				Restore a's sign.
+			*/
+			a.sign = .Negative;
+			return err;
+		}
+		/*
+			Restore sign and set `dest` sign.
+		*/
+		a.sign    = .Negative;
+		dest.sign = .Negative;
+
+		return clamp(dest);
+	}
+
+	/*
+		Remember the currently used number of digits in `dest`.
+	*/
+	old_used := dest.used;
+
+	/*
+		If `a` is positive
+	*/
+	if a.sign == .Zero_or_Positive {
+		/*
+			Add digits, use `carry`.
+		*/
+		i: int;
+		carry := digit;
+		#no_bounds_check for i = 0; i < a.used; i += 1 {
+			dest.digit[i] = a.digit[i] + carry;
+			carry = dest.digit[i] >> _DIGIT_BITS;
+			dest.digit[i] &= _MASK;
+		}
+		/*
+			Set final carry.
+		*/
+		dest.digit[i] = carry;
+		/*
+			Set `dest` size.
+		*/
+		dest.used = a.used + 1;
+	} else {
+		/*
+			`a` was negative and |a| < digit.
+		*/
+		dest.used = 1;
+		/*
+			The result is a single DIGIT.
+		*/
+		dest.digit[0] = digit - a.digit[0] if a.used == 1 else digit;
+	}
+	/*
+		Sign is always positive.
+	*/
+	dest.sign = .Zero_or_Positive;
+
+	zero_count := old_used - dest.used;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return clamp(dest);	
+}
+
+internal_add :: proc { internal_int_add_signed, internal_int_add_digit, };
+
+/*
+	Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|.
+	Handbook of Applied Cryptography, algorithm 14.9.
+
+	Assumptions:
+		`dest`, `number` and `decrease` != `nil` and have been initalized.
+*/
+internal_int_sub_unsigned :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	dest := dest; x := number; y := decrease;
+	old_used := dest.used;
+	min_used := y.used;
+	max_used := x.used;
+	i: int;
+
+	if err = grow(dest, max(max_used, _DEFAULT_DIGIT_COUNT), false, allocator); err != nil { return err; }
+	dest.used = max_used;
+	/*
+		All parameters have been initialized.
+	*/
+
+	borrow := DIGIT(0);
+
+	#no_bounds_check for i = 0; i < min_used; i += 1 {
+		dest.digit[i] = (x.digit[i] - y.digit[i] - borrow);
+		/*
+			borrow = carry bit of dest[i]
+			Note this saves performing an AND operation since if a carry does occur,
+			it will propagate all the way to the MSB.
+			As a result a single shift is enough to get the carry.
+		*/
+		borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
+		/*
+			Clear borrow from dest[i].
+		*/
+		dest.digit[i] &= _MASK;
+	}
+
+	/*
+		Now copy higher words if any, e.g. if A has more digits than B
+	*/
+	#no_bounds_check for ; i < max_used; i += 1 {
+		dest.digit[i] = x.digit[i] - borrow;
+		/*
+			borrow = carry bit of dest[i]
+			Note this saves performing an AND operation since if a carry does occur,
+			it will propagate all the way to the MSB.
+			As a result a single shift is enough to get the carry.
+		*/
+		borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1);
+		/*
+			Clear borrow from dest[i].
+		*/
+		dest.digit[i] &= _MASK;
+	}
+
+	zero_count := old_used - dest.used;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return clamp(dest);
+}
+
+/*
+	Low-level subtraction, signed. Handbook of Applied Cryptography, algorithm 14.9.
+	dest = number - decrease. Assumes |number| > |decrease|.
+
+	Assumptions:
+		`dest`, `number` and `decrease` != `nil` and have been initalized.
+*/
+internal_int_sub_signed :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) {
+	number := number; decrease := decrease;
+	if number.sign != decrease.sign {
+		/*
+			Subtract a negative from a positive, OR subtract a positive from a negative.
+			In either case, ADD their magnitudes and use the sign of the first number.
+		*/
+		dest.sign = number.sign;
+		return #force_inline internal_int_add_unsigned(dest, number, decrease, allocator);
+	}
+
+	/*
+		Subtract a positive from a positive, OR negative from a negative.
+		First, take the difference between their magnitudes, then...
+	*/
+	if c, _ := #force_inline cmp_mag(number, decrease); c == -1 {
+		/*
+			The second has a larger magnitude.
+			The result has the *opposite* sign from the first number.
+		*/
+		dest.sign = .Negative if number.sign == .Zero_or_Positive else .Zero_or_Positive;
+		number, decrease = decrease, number;
+	} else {
+		/*
+			The first has a larger or equal magnitude.
+			Copy the sign from the first.
+		*/
+		dest.sign = number.sign;
+	}
+	return #force_inline internal_int_sub_unsigned(dest, number, decrease, allocator);
+}
+
+/*
+	Low-level subtraction, signed. Handbook of Applied Cryptography, algorithm 14.9.
+	dest = number - decrease. Assumes |number| > |decrease|.
+
+	Assumptions:
+		`dest`, `number` != `nil` and have been initalized.
+		`dest` is large enough (number.used + 1) to fit result.
+*/
+internal_int_sub_digit :: proc(dest, number: ^Int, digit: DIGIT) -> (err: Error) {
+	dest := dest; digit := digit;
+	/*
+		All parameters have been initialized.
+
+		Fast paths for destination and input Int being the same.
+	*/
+	if dest == number {
+		/*
+			Fast path for `dest` is negative and unsigned addition doesn't overflow the lowest digit.
+		*/
+		if dest.sign == .Negative && (dest.digit[0] + digit < _DIGIT_MAX) {
+			dest.digit[0] += digit;
+			return nil;
+		}
+		/*
+			Can be subtracted from dest.digit[0] without underflow.
+		*/
+		if number.sign == .Zero_or_Positive && (dest.digit[0] > digit) {
+			dest.digit[0] -= digit;
+			return nil;
+		}
+	}
+
+	/*
+		If `a` is negative, just do an unsigned addition (with fudged signs).
+	*/
+	if number.sign == .Negative {
+		t := number;
+		t.sign = .Zero_or_Positive;
+
+		err =  #force_inline internal_int_add_digit(dest, t, digit);
+		dest.sign = .Negative;
+
+		clamp(dest);
+		return err;
+	}
+
+	old_used := dest.used;
+
+	/*
+		if `a`<= digit, simply fix the single digit.
+	*/
+	if number.used == 1 && (number.digit[0] <= digit) || number.used == 0 {
+		dest.digit[0] = digit - number.digit[0] if number.used == 1 else digit;
+		dest.sign = .Negative;
+		dest.used = 1;
+	} else {
+		dest.sign = .Zero_or_Positive;
+		dest.used = number.used;
+
+		/*
+			Subtract with carry.
+		*/
+		carry := digit;
+
+		#no_bounds_check for i := 0; i < number.used; i += 1 {
+			dest.digit[i] = number.digit[i] - carry;
+			carry := dest.digit[i] >> (_DIGIT_TYPE_BITS - 1);
+			dest.digit[i] &= _MASK;
+		}
+	}
+
+	zero_count := old_used - dest.used;
+	/*
+		Zero remainder.
+	*/
+	if zero_count > 0 {
+		mem.zero_slice(dest.digit[dest.used:][:zero_count]);
+	}
+	/*
+		Adjust dest.used based on leading zeroes.
+	*/
+	return clamp(dest);
+}
+
+internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, };
--- a/core/math/big/test.odin
+++ b/core/math/big/test.odin
@@ -9,7 +9,11 @@ package big
 	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
 	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.

-	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
+	This file exports procedures for use with the test.py test suite.
+*/
+
+/*
+	TODO: Write tests for `internal_*` and test reusing parameters with the public implementations.
 */

 import "core:runtime"
@@ -35,7 +39,11 @@ PyRes :: struct {

 	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":add:atoi(a):", err=err}; }
 	if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":add:atoi(b):", err=err}; }
-	if err = add(sum, aa, bb);        err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; }
+	if bb.used == 1 {
+		if err = add(sum, aa, bb.digit[0]); err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; }	
+	} else {
+		if err = add(sum, aa, bb);          err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; }
+	}

 	r: cstring;
 	r, err = int_itoa_cstring(sum, 16, context.temp_allocator);
@@ -52,7 +60,11 @@ PyRes :: struct {

 	if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":sub:atoi(a):", err=err}; }
 	if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":sub:atoi(b):", err=err}; }
-	if err = sub(sum, aa, bb);        err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; }
+	if bb.used == 1 {
+		if err = sub(sum, aa, bb.digit[0]); err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; }
+	} else {
+		if err = sub(sum, aa, bb);          err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; }
+	}

 	r: cstring;
 	r, err = int_itoa_cstring(sum, 16, context.temp_allocator);
--- a/core/math/big/tune.odin
+++ b/core/math/big/tune.odin
@@ -0,0 +1,73 @@
+//+ignore
+package big
+
+/*
+	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+	Made available under Odin's BSD-2 license.
+
+	A BigInt implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+*/
+
+import "core:fmt"
+import "core:time"
+
+Category :: enum {
+	itoa,
+	atoi,
+	factorial,
+	factorial_bin,
+	choose,
+	lsb,
+	ctz,
+	bitfield_extract,
+};
+
+Event :: struct {
+	ticks:  time.Duration,
+	count:  int,
+	cycles: u64,
+}
+Timings := [Category]Event{};
+
+print_timings :: proc() {
+	duration :: proc(d: time.Duration) -> (res: string) {
+		switch {
+		case d < time.Microsecond:
+			return fmt.tprintf("%v ns", time.duration_nanoseconds(d));
+		case d < time.Millisecond:
+			return fmt.tprintf("%v µs", time.duration_microseconds(d));
+		case:
+			return fmt.tprintf("%v ms", time.duration_milliseconds(d));
+		}
+	}
+
+	fmt.println("\nTimings:");
+	for v, i in Timings {
+		if v.count > 0 {
+			avg_ticks  := time.Duration(f64(v.ticks) / f64(v.count));
+			avg_cycles := f64(v.cycles) / f64(v.count);
+
+			fmt.printf("\t%v: %s / %v cycles (avg), %s / %v cycles (total, %v calls)\n", i, duration(avg_ticks), avg_cycles, duration(v.ticks), v.cycles, v.count);
+		}
+	}
+}
+
+@(deferred_in_out=_SCOPE_END)
+SCOPED_TIMING :: #force_inline proc(c: Category) -> (ticks: time.Tick, cycles: u64) {
+	cycles = time.read_cycle_counter();
+	ticks  = time.tick_now();
+	return;
+}
+_SCOPE_END :: #force_inline proc(c: Category, ticks: time.Tick, cycles: u64) {
+	cycles_now := time.read_cycle_counter();
+	ticks_now  := time.tick_now();
+
+	Timings[c].ticks  = time.tick_diff(ticks, ticks_now);
+	Timings[c].cycles = cycles_now - cycles;
+	Timings[c].count += 1;
+}
+SCOPED_COUNT_ADD :: #force_inline proc(c: Category, count: int) {
+	Timings[c].count += count;
+}