bit: Improved bitfield extraction.

This commit is contained in:
Jeroen van Rijn
2021-08-04 21:25:38 +02:00
parent 85a2a8815e
commit 463003e86a
6 changed files with 158 additions and 102 deletions

View File

@@ -711,7 +711,12 @@ int_mod :: proc(remainder, numerator, denominator: ^Int) -> (err: Error) {
if z, err = is_zero(remainder); z || denominator.sign == remainder.sign { return nil; }
return add(remainder, remainder, numerator);
}
mod :: proc { int_mod, };
int_mod_digit :: proc(numerator: ^Int, denominator: DIGIT) -> (remainder: DIGIT, err: Error) {
return _int_div_digit(nil, numerator, denominator);
}
mod :: proc { int_mod, int_mod_digit, };
/*
remainder = (number + addend) % modulus.
@@ -1263,14 +1268,14 @@ _int_sqr :: proc(dest, src: ^Int) -> (err: Error) {
/*
Divide by three (based on routine from MPI and the GMP manual).
*/
_int_div_3 :: proc(quotient, numerator: ^Int) -> (remainder: int, err: Error) {
_int_div_3 :: proc(quotient, numerator: ^Int) -> (remainder: DIGIT, err: Error) {
/*
b = 2**MP_DIGIT_BIT / 3
*/
b := _WORD(1) << _WORD(_DIGIT_BITS) / _WORD(3);
q := &Int{};
if err = grow(q, numerator.used); err != nil { return -1, err; }
if err = grow(q, numerator.used); err != nil { return 0, err; }
q.used = numerator.used;
q.sign = numerator.sign;
@@ -1300,8 +1305,7 @@ _int_div_3 :: proc(quotient, numerator: ^Int) -> (remainder: int, err: Error) {
}
q.digit[ix] = DIGIT(t);
}
remainder = int(w);
remainder = DIGIT(w);
/*
[optional] store the quotient.
@@ -1542,7 +1546,7 @@ _int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (er
/*
Single digit division (based on routine from MPI).
*/
_int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remainder: int, err: Error) {
_int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remainder: DIGIT, err: Error) {
q := &Int{};
ix: int;
@@ -1581,7 +1585,7 @@ _int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remain
for ix < _DIGIT_BITS && denominator != (1 << uint(ix)) {
ix += 1;
}
remainder = int(numerator.digit[0]) & ((1 << uint(ix)) - 1);
remainder = numerator.digit[0] & ((1 << uint(ix)) - 1);
if quotient == nil {
return remainder, nil;
}
@@ -1615,7 +1619,7 @@ _int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT) -> (remain
}
q.digit[ix] = t;
}
remainder = int(w);
remainder = DIGIT(w);
if quotient != nil {
clamp(q);

View File

@@ -81,6 +81,8 @@ Category :: enum {
choose,
lsb,
ctz,
bitfield_extract_old,
bitfield_extract_new,
};
Event :: struct {
t: time.Duration,
@@ -118,39 +120,41 @@ demo :: proc() {
a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{};
defer destroy(a, b, c, d, e, f);
nan(a);
print(" nan: ", a, 10, true, true, true);
fmt.println();
err = factorial(a, 1224);
count, _ := count_bits(a);
inf(a);
print(" inf: ", a, 10, true, true, true);
fmt.println();
bits := 101;
be1, be2: _WORD;
minus_inf(a);
print("-inf: ", a, 10, true, true, true);
fmt.println();
factorial(a, 128); // Untimed warmup.
N :: 128;
s := time.tick_now();
err = factorial(a, N);
Timings[.factorial].t += time.tick_since(s); Timings[.factorial].c += 1;
if err != nil {
fmt.printf("factorial(%v) returned %v\n", N, err);
/*
Sanity check loop.
*/
for o := 0; o < count - bits; o += 1 {
be1, _ = int_bitfield_extract(a, o, bits);
be2, _ = int_bitfield_extract_fast(a, o, bits);
if be1 != be2 {
fmt.printf("Offset: %v | Expected: %v | Got: %v\n", o, be1, be2);
assert(false);
}
}
s = time.tick_now();
as, err = itoa(a, 16);
Timings[.itoa].t += time.tick_since(s); Timings[.itoa].c += 1;
if err != nil {
fmt.printf("itoa(factorial(%v), 16) returned %v\n", N, err);
/*
Timing loop
*/
s_old := time.tick_now();
for o := 0; o < count - bits; o += 1 {
be1, _ = int_bitfield_extract(a, o, bits);
}
Timings[.bitfield_extract_old].t += time.tick_since(s_old);
Timings[.bitfield_extract_old].c += (count - bits);
fmt.printf("factorial(%v): %v (first 10 hex digits)\n", N, as[:10]);
s_new := time.tick_now();
for o := 0; o < count - bits; o += 1 {
be2, _ = int_bitfield_extract_fast(a, o, bits);
}
Timings[.bitfield_extract_new].t += time.tick_since(s_new);
Timings[.bitfield_extract_new].c += (count - bits);
assert(be1 == be2);
}
main :: proc() {

View File

@@ -13,6 +13,8 @@ import "core:mem"
import "core:intrinsics"
import rnd "core:math/rand"
// import "core:fmt"
/*
TODO: Int.flags and Constants like ONE, NAN, etc, are not yet properly handled everywhere.
*/
@@ -193,9 +195,7 @@ extract_bit :: proc(a: ^Int, bit_offset: int) -> (bit: DIGIT, err: Error) {
/*
Check that `a`is usable.
*/
if err = clear_if_uninitialized(a); err != nil {
return 0, err;
}
if err = clear_if_uninitialized(a); err != nil { return 0, err; }
limb := bit_offset / _DIGIT_BITS;
if limb < 0 || limb >= a.used {
@@ -207,72 +207,44 @@ extract_bit :: proc(a: ^Int, bit_offset: int) -> (bit: DIGIT, err: Error) {
return 1 if ((a.digit[limb] & i) != 0) else 0, nil;
}
/*
TODO: Optimize.
*/
int_bitfield_extract :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) {
/*
Check that `a`is usable.
Check that `a` is usable.
*/
if err = clear_if_uninitialized(a); err != nil {
return 0, err;
}
if err = clear_if_uninitialized(a); err != nil { return 0, err; }
if count > _WORD_BITS || count < 1 { return 0, .Invalid_Argument; }
if count > _WORD_BITS || count < 1 {
return 0, .Invalid_Argument;
}
for shift := 0; shift < count; shift += 1 {
bit_offset := offset + shift;
when true {
v: DIGIT;
e: Error;
limb := bit_offset / _DIGIT_BITS;
mask := DIGIT(1 << DIGIT((bit_offset % _DIGIT_BITS)));
for shift := 0; shift < count; shift += 1 {
o := offset + shift;
v, e = extract_bit(a, o);
if e != nil {
break;
}
res = res + _WORD(v) << uint(shift);
if (a.digit[limb] & mask) != 0 {
res += _WORD(1) << uint(shift);
}
return res, e;
} else {
limb_lo := offset / _DIGIT_BITS;
bits_lo := offset % _DIGIT_BITS;
limb_hi := (offset + count) / _DIGIT_BITS;
bits_hi := (offset + count) % _DIGIT_BITS;
if limb_lo < 0 || limb_lo >= a.used || limb_hi < 0 || limb_hi >= a.used {
return 0, .Invalid_Argument;
}
for i := limb_hi; i >= limb_lo; i -= 1 {
res <<= _DIGIT_BITS;
/*
Determine which bits to extract from each DIGIT. The whole DIGIT's worth by default.
*/
bit_count := _DIGIT_BITS;
bit_offset := 0;
if i == limb_lo {
bit_count -= bits_lo;
bit_offset = _DIGIT_BITS - bit_count;
} else if i == limb_hi {
bit_count = bits_hi;
bit_offset = 0;
}
d := a.digit[i];
v := (d >> uint(bit_offset)) & DIGIT(1 << uint(bit_count - 1));
m := DIGIT(1 << uint(bit_count-1));
r := v & m;
res |= _WORD(r);
}
return res, nil;
}
return res, nil;
}
int_bitfield_extract_fast :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) {
/*
Check that `a` is usable.
*/
if err = clear_if_uninitialized(a); err != nil { return 0, err; }
if count > _WORD_BITS || count < 1 { return 0, .Invalid_Argument; }
for shift := 0; shift < count; shift += 1 {
bit_offset := offset + shift;
limb := bit_offset / _DIGIT_BITS;
mask := DIGIT(1 << DIGIT((bit_offset % _DIGIT_BITS)));
if (a.digit[limb] & mask) != 0 {
res += _WORD(1) << uint(shift);
}
}
return res, nil;
}
/*

68
core/math/big/prime.odin Normal file
View File

@@ -0,0 +1,68 @@
package big
/*
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-2 license.
An arbitrary precision mathematics implementation in Odin.
For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3.
The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks.
This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ...
*/
/*
Determines if an Integer is divisible by one of the _PRIME_TABLE primes.
Returns true if it is, false if not.
*/
int_prime_is_divisible :: proc(a: ^Int) -> (res: bool, err: Error) {
rem: DIGIT;
for prime in _PRIME_TABLE {
if rem, err = mod(a, prime); err != nil { return false, err; }
if rem == 0 { return true, nil; }
}
/*
Default to not divisible.
*/
return false, nil;
}
_PRIME_TABLE := []DIGIT{
0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
};

View File

@@ -202,7 +202,9 @@ int_itoa_raw :: proc(a: ^Int, radix: i8, buffer: []u8, size := int(-1), zero_ter
for offset := 0; offset < count; offset += shift {
bits_to_get := int(min(count - offset, shift));
if digit, err = int_bitfield_extract(a, offset, bits_to_get); err != nil {
digit, err = int_bitfield_extract(a, offset, bits_to_get);
if err != nil {
return len(buffer) - available, .Invalid_Argument;
}
available -= 1;
@@ -448,7 +450,7 @@ _itoa_raw_full :: proc(a: ^Int, radix: i8, buffer: []u8, zero_terminate := false
temp.sign = .Zero_or_Positive;
}
remainder: int;
remainder: DIGIT;
for {
if remainder, err = _int_div_digit(temp, temp, DIGIT(radix)); err != nil {
destroy(temp, denominator);

View File

@@ -11,13 +11,13 @@ from enum import Enum
# With EXIT_ON_FAIL set, we exit at the first fail.
#
EXIT_ON_FAIL = True
EXIT_ON_FAIL = False
#EXIT_ON_FAIL = False
#
# We skip randomized tests altogether if NO_RANDOM_TESTS is set.
#
NO_RANDOM_TESTS = True
NO_RANDOM_TESTS = False
#NO_RANDOM_TESTS = False
#
# If TIMED_TESTS == False and FAST_TESTS == True, we cut down the number of iterations.
@@ -197,7 +197,13 @@ def test_sub(a = 0, b = 0, expected_error = Error.Okay):
def test_mul(a = 0, b = 0, expected_error = Error.Okay):
args = [arg_to_odin(a), arg_to_odin(b)]
res = mul(*args)
try:
res = mul(*args)
except OSError as e:
print("{} while trying to multiply {} x {}.".format(e, a, b))
if EXIT_ON_FAIL: exit(3)
return False
expected_result = None
if expected_error == Error.Okay:
expected_result = a * b
@@ -369,7 +375,7 @@ TESTS = {
],
test_mul: [
[ 1234, 5432],
[ 0xd3b4e926aaba3040e1c12b5ea553b5, 0x1a821e41257ed9281bee5bc7789ea7]
[ 0xd3b4e926aaba3040e1c12b5ea553b5, 0x1a821e41257ed9281bee5bc7789ea7],
],
test_div: [
[ 54321, 12345],