bit: Improved bitfield extraction.

2026-02-19 01:18:22 +00:00 · 2021-08-04 21:25:38 +02:00
parent 85a2a8815e
commit 463003e86a
6 changed files with 158 additions and 102 deletions
--- a/core/math/big/basic.odin
+++ b/core/math/big/basic.odin
@@ -711,7 +711,12 @@ int_mod :: proc(remainder, numerator, denominator: ^Int) -> (err: Error) {
 	if z, err = is_zero(remainder); z || denominator.sign == remainder.sign { return nil; }
 	return add(remainder, remainder, numerator);
 }
-mod :: proc { int_mod, };
+
+int_mod_digit :: proc(numerator: ^Int, denominator: DIGIT) -> (remainder: DIGIT, err: Error) {
+	return _int_div_digit(nil, numerator, denominator);
+}
+
+mod :: proc { int_mod, int_mod_digit, };

 /*
 	remainder = (number + addend) % modulus.
@@ -1263,14 +1268,14 @@ _int_sqr :: proc(dest, src: ^Int) -> (err: Error) {
 /*
 	Divide by three (based on routine from MPI and the GMP manual).
 */
-_int_div_3 :: proc(quotient, numerator: ^Int) -> (remainder: int, err: Error) {
+_int_div_3 :: proc(quotient, numerator: ^Int) -> (remainder: DIGIT, err: Error) {
 	/*
 		b = 2**MP_DIGIT_BIT / 3
 	*/
 	b := _WORD(1) << _WORD(_DIGIT_BITS) / _WORD(3);

 	q := &Int{};
-	if err = grow(q, numerator.used); err != nil { return -1, err; }
+	if err = grow(q, numerator.used); err != nil { return 0, err; }
 	q.used = numerator.used;
 	q.sign = numerator.sign;

@@ -1300,8 +1305,7 @@ _int_div_3 :: proc(quotient, numerator: ^Int) -> (remainder: int, err: Error) {
 		}
 		q.digit[ix] = DIGIT(t);
 	}
-
-	remainder = int(w);
+	remainder = DIGIT(w);

 	/*
 		[optional] store the quotient.
@@ -1542,7 +1546,7 @@ _int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (er
 /*
 	Single digit division (based on routine from MPI).
 */
-_int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remainder: int, err: Error) {
+_int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remainder: DIGIT, err: Error) {
 	q := &Int{};
 	ix: int;

@@ -1581,7 +1585,7 @@ _int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remain
 		for ix < _DIGIT_BITS && denominator != (1 << uint(ix)) {
 			ix += 1;
 		}
-		remainder = int(numerator.digit[0]) & ((1 << uint(ix)) - 1);
+		remainder = numerator.digit[0] & ((1 << uint(ix)) - 1);
 		if quotient == nil {
 			return remainder, nil;
 		}
@@ -1615,7 +1619,7 @@ _int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remain
 		}
 		q.digit[ix] = t;
 	}
-	remainder = int(w);
+	remainder = DIGIT(w);

 	if quotient != nil {
 		clamp(q);
--- a/core/math/big/example.odin
+++ b/core/math/big/example.odin
@@ -81,6 +81,8 @@ Category :: enum {
 	choose,
 	lsb,
 	ctz,
+	bitfield_extract_old,
+	bitfield_extract_new,
 };
 Event :: struct {
 	t: time.Duration,
@@ -118,39 +120,41 @@ demo :: proc() {
 	a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
 	defer destroy(a, b, c, d, e, f);

-	nan(a);
-	print(" nan: ", a, 10, true, true, true);
-	fmt.println();
+	err = factorial(a, 1224);
+	count, _ := count_bits(a);

-	inf(a);
-	print(" inf: ", a, 10, true, true, true);
-	fmt.println();
+	bits   :=  101;
+	be1, be2: _WORD;

-	minus_inf(a);
-	print("-inf: ", a, 10, true, true, true);
-	fmt.println();
-
-
-	factorial(a, 128); // Untimed warmup.
-
-	N :: 128;
-
-	s := time.tick_now();
-	err = factorial(a, N);
-	Timings[.factorial].t += time.tick_since(s); Timings[.factorial].c += 1;
-
-	if err != nil {
-		fmt.printf("factorial(%v) returned %v\n", N, err);
+	/*
+		Sanity check loop.
+	*/
+	for o := 0; o < count - bits; o += 1 {
+		be1, _ = int_bitfield_extract(a, o, bits);
+		be2, _ = int_bitfield_extract_fast(a, o, bits);
+		if be1 != be2 {
+			fmt.printf("Offset: %v | Expected: %v | Got: %v\n", o, be1, be2);
+			assert(false);
+		}
 	}

-	s = time.tick_now();
-	as, err = itoa(a, 16);
-	Timings[.itoa].t += time.tick_since(s); Timings[.itoa].c += 1;
-	if err != nil {
-		fmt.printf("itoa(factorial(%v), 16) returned %v\n", N, err);
+	/*
+		Timing loop
+	*/
+	s_old := time.tick_now();
+	for o := 0; o < count - bits; o += 1 {
+		be1, _ = int_bitfield_extract(a, o, bits);
 	}
+	Timings[.bitfield_extract_old].t += time.tick_since(s_old);
+	Timings[.bitfield_extract_old].c += (count - bits);

-	fmt.printf("factorial(%v): %v (first 10 hex digits)\n", N, as[:10]);
+	s_new := time.tick_now();
+	for o := 0; o < count - bits; o += 1 {
+		be2, _ = int_bitfield_extract_fast(a, o, bits);
+	}
+	Timings[.bitfield_extract_new].t += time.tick_since(s_new);
+	Timings[.bitfield_extract_new].c += (count - bits);
+	assert(be1 == be2);
 }

 main :: proc() {
--- a/core/math/big/helpers.odin
+++ b/core/math/big/helpers.odin
@@ -13,6 +13,8 @@ import "core:mem"
 import "core:intrinsics"
 import rnd "core:math/rand"

+// import "core:fmt"
+
 /*
 	TODO: Int.flags and Constants like ONE, NAN, etc, are not yet properly handled everywhere.
 */
@@ -193,9 +195,7 @@ extract_bit :: proc(a: ^Int, bit_offset: int) -> (bit: DIGIT, err: Error) {
 	/*
 		Check that `a`is usable.
 	*/
-	if err = clear_if_uninitialized(a); err != nil {
-		return 0, err;
-	}
+	if err = clear_if_uninitialized(a); err != nil { return 0, err; }

 	limb := bit_offset / _DIGIT_BITS;
 	if limb < 0 || limb >= a.used {
@@ -207,72 +207,44 @@ extract_bit :: proc(a: ^Int, bit_offset: int) -> (bit: DIGIT, err: Error) {
 	return 1 if ((a.digit[limb] & i) != 0) else 0, nil;
 }

-/*
-	TODO: Optimize.
-*/
 int_bitfield_extract :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) {
 	/*
-		Check that `a`is usable.
+		Check that `a` is usable.
 	*/
-	if err = clear_if_uninitialized(a); err != nil {
-		return 0, err;
-	}
+	if err = clear_if_uninitialized(a); err != nil { return 0, err; }
+	if count > _WORD_BITS || count < 1             { return 0, .Invalid_Argument; }

-	if count > _WORD_BITS || count < 1 {
-		return 0, .Invalid_Argument;
-	}
+	for shift := 0; shift < count; shift += 1 {
+		bit_offset := offset + shift;

-	when true {
-		v: DIGIT;
-		e: Error;
+		limb := bit_offset / _DIGIT_BITS;
+		mask := DIGIT(1 << DIGIT((bit_offset % _DIGIT_BITS)));

-		for shift := 0; shift < count; shift += 1 {
-			o   := offset + shift;
-			v, e = extract_bit(a, o);
-			if e != nil {
-				break;
-			}
-			res = res + _WORD(v) << uint(shift);
+		if (a.digit[limb] & mask) != 0 {
+			res += _WORD(1) << uint(shift);
 		}
-
-		return res, e;
-	} else {
-		limb_lo :=  offset          / _DIGIT_BITS;
-		bits_lo :=  offset          % _DIGIT_BITS;
-		limb_hi := (offset + count) / _DIGIT_BITS;
-		bits_hi := (offset + count) % _DIGIT_BITS;
-
-		if limb_lo < 0 || limb_lo >= a.used || limb_hi < 0 || limb_hi >= a.used {
-			return 0, .Invalid_Argument;
-		}
-
-		for i := limb_hi; i >= limb_lo; i -= 1 {
-			res <<= _DIGIT_BITS;
-
-			/*
-				Determine which bits to extract from each DIGIT. The whole DIGIT's worth by default.
-			*/
-			bit_count  := _DIGIT_BITS;
-			bit_offset := 0;
-			if i == limb_lo {
-				bit_count  -= bits_lo;
-				bit_offset = _DIGIT_BITS - bit_count;
-			} else if i == limb_hi {
-				bit_count  = bits_hi;
-				bit_offset = 0;
-			}
-
-			d := a.digit[i];
-
-			v := (d >> uint(bit_offset)) & DIGIT(1 << uint(bit_count - 1));
-			m := DIGIT(1 << uint(bit_count-1));
-			r := v & m;
-
-			res |= _WORD(r);
-		}
-		return res, nil;
-
 	}
+	return res, nil;
+}
+
+int_bitfield_extract_fast :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) {
+	/*
+		Check that `a` is usable.
+	*/
+	if err = clear_if_uninitialized(a); err != nil { return 0, err; }
+	if count > _WORD_BITS || count < 1             { return 0, .Invalid_Argument; }
+
+	for shift := 0; shift < count; shift += 1 {
+		bit_offset := offset + shift;
+
+		limb := bit_offset / _DIGIT_BITS;
+		mask := DIGIT(1 << DIGIT((bit_offset % _DIGIT_BITS)));
+
+		if (a.digit[limb] & mask) != 0 {
+			res += _WORD(1) << uint(shift);
+		}
+	}
+	return res, nil;
 }

 /*
--- a/core/math/big/prime.odin
+++ b/core/math/big/prime.odin
@@ -0,0 +1,68 @@
+package big
+
+/*
+	Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
+	Made available under Odin's BSD-2 license.
+
+	An arbitrary precision mathematics implementation in Odin.
+	For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
+	The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
+
+	This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
+*/
+
+/*
+	Determines if an Integer is divisible by one of the _PRIME_TABLE primes.
+	Returns true if it is, false if not. 
+*/
+int_prime_is_divisible :: proc(a: ^Int) -> (res: bool, err: Error) {
+
+	rem: DIGIT;
+	for prime in _PRIME_TABLE {
+		if rem, err = mod(a, prime); err != nil { return false, err; }
+		if rem == 0 { return true, nil; }
+	}
+	/*
+		Default to not divisible.
+	*/
+	return false, nil;
+}
+
+
+_PRIME_TABLE := []DIGIT{
+	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+
+	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+
+	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+
+	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
+};
--- a/core/math/big/radix.odin
+++ b/core/math/big/radix.odin
@@ -202,7 +202,9 @@ int_itoa_raw :: proc(a: ^Int, radix: i8, buffer: []u8, size := int(-1), zero_ter

 		for offset := 0; offset < count; offset += shift {
 			bits_to_get := int(min(count - offset, shift));
-			if digit, err = int_bitfield_extract(a, offset, bits_to_get); err != nil {
+
+			digit, err = int_bitfield_extract(a, offset, bits_to_get);
+			if err != nil {
 				return len(buffer) - available, .Invalid_Argument;
 			}
 			available -= 1;
@@ -448,7 +450,7 @@ _itoa_raw_full :: proc(a: ^Int, radix: i8, buffer: []u8, zero_terminate := false
 		temp.sign = .Zero_or_Positive;
 	}

-	remainder: int;
+	remainder: DIGIT;
 	for {
 		if remainder, err = _int_div_digit(temp, temp, DIGIT(radix)); err != nil {
 			destroy(temp, denominator);
--- a/core/math/big/test.py
+++ b/core/math/big/test.py
@@ -11,13 +11,13 @@ from enum import Enum
 # With EXIT_ON_FAIL set, we exit at the first fail.
 #
 EXIT_ON_FAIL = True
-EXIT_ON_FAIL = False
+#EXIT_ON_FAIL = False

 #
 # We skip randomized tests altogether if NO_RANDOM_TESTS is set.
 #
 NO_RANDOM_TESTS = True
-NO_RANDOM_TESTS = False
+#NO_RANDOM_TESTS = False

 #
 # If TIMED_TESTS == False and FAST_TESTS == True, we cut down the number of iterations.
@@ -197,7 +197,13 @@ def test_sub(a = 0, b = 0, expected_error = Error.Okay):

 def test_mul(a = 0, b = 0, expected_error = Error.Okay):
 	args = [arg_to_odin(a), arg_to_odin(b)]
-	res  = mul(*args)
+	try:
+		res  = mul(*args)
+	except OSError as e:
+		print("{} while trying to multiply {} x {}.".format(e, a, b))
+		if EXIT_ON_FAIL: exit(3)
+		return False
+
 	expected_result = None
 	if expected_error == Error.Okay:
 		expected_result = a * b
@@ -369,7 +375,7 @@ TESTS = {
 	],
 	test_mul: [
 		[ 1234,   5432],
-		[ 0xd3b4e926aaba3040e1c12b5ea553b5, 0x1a821e41257ed9281bee5bc7789ea7]
+		[ 0xd3b4e926aaba3040e1c12b5ea553b5, 0x1a821e41257ed9281bee5bc7789ea7],
 	],
 	test_div: [
 		[ 54321,	12345],