diff --git a/core/math/big/api.odin b/core/math/big/api.odin new file mode 100644 index 000000000..e1b687c14 --- /dev/null +++ b/core/math/big/api.odin @@ -0,0 +1,154 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + This file collects public proc maps and their aliases. +/* + +*/ + === === === === === === === === === === === === === === === === === === === === === === === === + Basic arithmetic. + See `public.odin`. + === === === === === === === === === === === === === === === === === === === === === === === === +*/ + +/* + High-level addition. Handles sign. +*/ +add :: proc { + /* + int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) + */ + int_add, + /* + Adds the unsigned `DIGIT` immediate to an `Int`, such that the + `DIGIT` doesn't have to be turned into an `Int` first. + + int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) + */ + int_add_digit, +}; + +/* + err = sub(dest, a, b); +*/ +sub :: proc { + /* + int_sub :: proc(dest, a, b: ^Int) -> (err: Error) + */ + int_sub, + /* + int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT) -> (err: Error) + */ + int_sub_digit, +}; + +/* + === === === === === === === === === === === === === === === === === === === === === === === === + Comparisons. + See `compare.odin`. + === === === === === === === === === === === === === === === === === === === === === === === === +*/ + +is_initialized :: proc { + /* + int_is_initialized :: proc(a: ^Int) -> bool + */ + int_is_initialized, +}; + +is_zero :: proc { + /* + int_is_zero :: proc(a: ^Int) -> bool + */ + int_is_zero, +}; + +is_positive :: proc { + /* + int_is_positive :: proc(a: ^Int) -> bool + */ + int_is_positive, +}; +is_pos :: is_positive; + +is_negative :: proc { + /* + int_is_negative :: proc(a: ^Int) -> bool + */ + int_is_negative, +}; +is_neg :: is_negative; + +is_even :: proc { + /* + int_is_even :: proc(a: ^Int) -> bool + */ + int_is_even, +}; + +is_odd :: proc { + /* + int_is_odd :: proc(a: ^Int) -> bool + */ + int_is_odd, +}; + +is_power_of_two :: proc { + /* + platform_int_is_power_of_two :: proc(a: int) -> bool + */ + platform_int_is_power_of_two, + /* + int_is_power_of_two :: proc(a: ^Int) -> (res: bool) + */ + int_is_power_of_two, +}; + +compare :: proc { + /* + Compare two `Int`s, signed. + + int_compare :: proc(a, b: ^Int) -> Comparison_Flag + */ + int_compare, + /* + Compare an `Int` to an unsigned number upto the size of the backing type. + + int_compare_digit :: proc(a: ^Int, u: DIGIT) -> Comparison_Flag + */ + int_compare_digit, +}; +cmp :: compare; + +compare_magnitude :: proc { + /* + Compare the magnitude of two `Int`s, unsigned. + */ + int_compare_magnitude, +}; +cmp_mag :: compare_magnitude; + +/* + === === === === === === === === === === === === === === === === === === === === === === === === + Initialization and other helpers. + See `helpers.odin`. + === === === === === === === === === === === === === === === === === === === === === === === === +*/ + +destroy :: proc { + /* + Clears one or more `Int`s and dellocates their backing memory. + + int_destroy :: proc(integers: ..^Int) + */ + int_destroy, +}; + + diff --git a/core/math/big/build.bat b/core/math/big/build.bat new file mode 100644 index 000000000..b1ff7303a --- /dev/null +++ b/core/math/big/build.bat @@ -0,0 +1,8 @@ +@echo off +odin run . -vet +: -o:size +:odin build . -build-mode:shared -show-timings -o:minimal -no-bounds-check && python test.py -fast-tests +:odin build . -build-mode:shared -show-timings -o:size -no-bounds-check && python test.py -fast-tests +:odin build . -build-mode:shared -show-timings -o:size && python test.py -fast-tests +:odin build . -build-mode:shared -show-timings -o:speed -no-bounds-check && python test.py -fast-tests +:odin build . -build-mode:shared -show-timings -o:speed && python test.py -fast-tests \ No newline at end of file diff --git a/core/math/big/common.odin b/core/math/big/common.odin new file mode 100644 index 000000000..5e2fe4d8c --- /dev/null +++ b/core/math/big/common.odin @@ -0,0 +1,206 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. +*/ + +import "core:intrinsics" + +/* + TODO: Make the tunables runtime adjustable where practical. + + This allows to benchmark and/or setting optimized values for a certain CPU without recompiling. +*/ + +/* + ========================== TUNABLES ========================== + + `initialize_constants` returns `#config(MUL_KARATSUBA_CUTOFF, _DEFAULT_MUL_KARATSUBA_CUTOFF)` + and we initialize this cutoff that way so that the procedure is used and called, + because it handles initializing the constants ONE, ZERO, MINUS_ONE, NAN and INF. + + `initialize_constants` also replaces the other `_DEFAULT_*` cutoffs with custom compile-time values if so `#config`ured. + +*/ +MUL_KARATSUBA_CUTOFF := initialize_constants(); +SQR_KARATSUBA_CUTOFF := _DEFAULT_SQR_KARATSUBA_CUTOFF; +MUL_TOOM_CUTOFF := _DEFAULT_MUL_TOOM_CUTOFF; +SQR_TOOM_CUTOFF := _DEFAULT_SQR_TOOM_CUTOFF; + +/* + These defaults were tuned on an AMD A8-6600K (64-bit) using libTomMath's `make tune`. + + TODO(Jeroen): Port this tuning algorithm and tune them for more modern processors. + + It would also be cool if we collected some data across various processor families. + This would let uss set reasonable defaults at runtime as this library initializes + itself by using `cpuid` or the ARM equivalent. + + IMPORTANT: The 32_BIT path has largely gone untested. It needs to be tested and + debugged where necessary. +*/ + +_DEFAULT_MUL_KARATSUBA_CUTOFF :: #config(MUL_KARATSUBA_CUTOFF, 80); +_DEFAULT_SQR_KARATSUBA_CUTOFF :: #config(SQR_KARATSUBA_CUTOFF, 120); +_DEFAULT_MUL_TOOM_CUTOFF :: #config(MUL_TOOM_CUTOFF, 350); +_DEFAULT_SQR_TOOM_CUTOFF :: #config(SQR_TOOM_CUTOFF, 400); + + +MAX_ITERATIONS_ROOT_N := 500; + +/* + Largest `N` for which we'll compute `N!` +*/ +FACTORIAL_MAX_N := 1_000_000; + +/* + Cutoff to switch to int_factorial_binary_split, and its max recursion level. +*/ +FACTORIAL_BINARY_SPLIT_CUTOFF := 6100; +FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS := 100; + + +/* + We don't allow these to be switched at runtime for two reasons: + + 1) 32-bit and 64-bit versions of procedures use different types for their storage, + so we'd have to double the number of procedures, and they couldn't interact. + + 2) Optimizations thanks to precomputed masks wouldn't work. +*/ +MATH_BIG_FORCE_64_BIT :: #config(MATH_BIG_FORCE_64_BIT, false); +MATH_BIG_FORCE_32_BIT :: #config(MATH_BIG_FORCE_32_BIT, false); +when (MATH_BIG_FORCE_32_BIT && MATH_BIG_FORCE_64_BIT) { #panic("Cannot force 32-bit and 64-bit big backend simultaneously."); }; + +_LOW_MEMORY :: #config(BIGINT_SMALL_MEMORY, false); +when _LOW_MEMORY { + _DEFAULT_DIGIT_COUNT :: 8; +} else { + _DEFAULT_DIGIT_COUNT :: 32; +} + +/* + ======================= END OF TUNABLES ======================= +*/ + +Sign :: enum u8 { + Zero_or_Positive = 0, + Negative = 1, +}; + +Int :: struct { + used: int, + digit: [dynamic]DIGIT, + sign: Sign, + flags: Flags, +}; + +Flag :: enum u8 { + NaN, + Inf, + Immutable, +}; + +Flags :: bit_set[Flag; u8]; + +/* + Errors are a strict superset of runtime.Allocation_Error. +*/ +Error :: enum int { + Okay = 0, + Out_Of_Memory = 1, + Invalid_Pointer = 2, + Invalid_Argument = 3, + + Assignment_To_Immutable = 4, + Max_Iterations_Reached = 5, + Buffer_Overflow = 6, + Integer_Overflow = 7, + + Division_by_Zero = 8, + Math_Domain_Error = 9, + + Unimplemented = 127, +}; + +Error_String :: #partial [Error]string{ + .Out_Of_Memory = "Out of memory", + .Invalid_Pointer = "Invalid pointer", + .Invalid_Argument = "Invalid argument", + + .Assignment_To_Immutable = "Assignment to immutable", + .Max_Iterations_Reached = "Max iterations reached", + .Buffer_Overflow = "Buffer overflow", + .Integer_Overflow = "Integer overflow", + + .Division_by_Zero = "Division by zero", + .Math_Domain_Error = "Math domain error", + + .Unimplemented = "Unimplemented", +}; + +Primality_Flag :: enum u8 { + Blum_Blum_Shub = 0, /* BBS style prime */ + Safe = 1, /* Safe prime (p-1)/2 == prime */ + Second_MSB_On = 3, /* force 2nd MSB to 1 */ +}; +Primality_Flags :: bit_set[Primality_Flag; u8]; + +/* + How do we store the Ints? + + Minimum number of available digits in `Int`, `_DEFAULT_DIGIT_COUNT` >= `_MIN_DIGIT_COUNT` + - Must be at least 3 for `_div_school`. + - Must be large enough such that `init_integer` can store `u128` in the `Int` without growing. + */ + +_MIN_DIGIT_COUNT :: max(3, ((size_of(u128) + _DIGIT_BITS) - 1) / _DIGIT_BITS); +#assert(_DEFAULT_DIGIT_COUNT >= _MIN_DIGIT_COUNT); + +/* + Maximum number of digits. + - Must be small enough such that `_bit_count` does not overflow. + - Must be small enough such that `_radix_size` for base 2 does not overflow. + `_radix_size` needs two additional bytes for zero termination and sign. +*/ +_MAX_BIT_COUNT :: (max(int) - 2); +_MAX_DIGIT_COUNT :: _MAX_BIT_COUNT / _DIGIT_BITS; + +when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) { + /* + We can use u128 as an intermediary. + */ + DIGIT :: distinct u64; + _WORD :: distinct u128; +} else { + DIGIT :: distinct u32; + _WORD :: distinct u64; +} +#assert(size_of(_WORD) == 2 * size_of(DIGIT)); + +_DIGIT_TYPE_BITS :: 8 * size_of(DIGIT); +_WORD_TYPE_BITS :: 8 * size_of(_WORD); + +_DIGIT_BITS :: _DIGIT_TYPE_BITS - 4; +_WORD_BITS :: 2 * _DIGIT_BITS; + +_MASK :: (DIGIT(1) << DIGIT(_DIGIT_BITS)) - DIGIT(1); +_DIGIT_MAX :: _MASK; +_MAX_COMBA :: 1 << (_WORD_TYPE_BITS - (2 * _DIGIT_BITS)) ; +_WARRAY :: 1 << ((_WORD_TYPE_BITS - (2 * _DIGIT_BITS)) + 1); + +Order :: enum i8 { + LSB_First = -1, + MSB_First = 1, +}; + +Endianness :: enum i8 { + Little = -1, + Platform = 0, + Big = 1, +}; \ No newline at end of file diff --git a/core/math/big/example.odin b/core/math/big/example.odin new file mode 100644 index 000000000..ae900d0c9 --- /dev/null +++ b/core/math/big/example.odin @@ -0,0 +1,246 @@ +//+ignore +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + A BigInt implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. +*/ + +import "core:fmt" +import "core:mem" + +print_configation :: proc() { + fmt.printf( +` +Configuration: + _DIGIT_BITS %v + _MIN_DIGIT_COUNT %v + _MAX_DIGIT_COUNT %v + _DEFAULT_DIGIT_COUNT %v + _MAX_COMBA %v + _WARRAY %v +Runtime tunable: + MUL_KARATSUBA_CUTOFF %v + SQR_KARATSUBA_CUTOFF %v + MUL_TOOM_CUTOFF %v + SQR_TOOM_CUTOFF %v + MAX_ITERATIONS_ROOT_N %v + FACTORIAL_MAX_N %v + FACTORIAL_BINARY_SPLIT_CUTOFF %v + FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS %v + +`, _DIGIT_BITS, +_MIN_DIGIT_COUNT, +_MAX_DIGIT_COUNT, +_DEFAULT_DIGIT_COUNT, +_MAX_COMBA, +_WARRAY, +MUL_KARATSUBA_CUTOFF, +SQR_KARATSUBA_CUTOFF, +MUL_TOOM_CUTOFF, +SQR_TOOM_CUTOFF, +MAX_ITERATIONS_ROOT_N, +FACTORIAL_MAX_N, +FACTORIAL_BINARY_SPLIT_CUTOFF, +FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS, +); + +} + +print :: proc(name: string, a: ^Int, base := i8(10), print_name := true, newline := true, print_extra_info := false) { + assert_if_nil(a); + + as, err := itoa(a, base); + defer delete(as); + + cb := internal_count_bits(a); + if print_name { + fmt.printf("%v", name); + } + if err != nil { + fmt.printf("%v (error: %v | %v)", name, err, a); + } + fmt.printf("%v", as); + if print_extra_info { + fmt.printf(" (base: %v, bits: %v (digits: %v), flags: %v)", base, cb, a.used, a.flags); + } + if newline { + fmt.println(); + } +} + +int_to_byte :: proc(v: ^Int) { + err: Error; + size: int; + print("v: ", v); + fmt.println(); + + t := &Int{}; + defer destroy(t); + + if size, err = int_to_bytes_size(v); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b1 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_big(v, b1); + int_from_bytes_big(t, b1); + fmt.printf("big: %v | err: %v\n", b1, err); + + int_from_bytes_big(t, b1); + if internal_cmp_mag(t, v) != 0 { + print("\tError parsing t: ", t); + } + + if size, err = int_to_bytes_size(v); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b2 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_big_python(v, b2); + fmt.printf("big python: %v | err: %v\n", b2, err); + + if err == nil { + int_from_bytes_big_python(t, b2); + if internal_cmp_mag(t, v) != 0 { + print("\tError parsing t: ", t); + } + } + + if size, err = int_to_bytes_size(v, true); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b3 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_big(v, b3, true); + fmt.printf("big signed: %v | err: %v\n", b3, err); + + int_from_bytes_big(t, b3, true); + if internal_cmp(t, v) != 0 { + print("\tError parsing t: ", t); + } + + if size, err = int_to_bytes_size(v, true); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b4 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_big_python(v, b4, true); + fmt.printf("big signed python: %v | err: %v\n", b4, err); + + int_from_bytes_big_python(t, b4, true); + if internal_cmp(t, v) != 0 { + print("\tError parsing t: ", t); + } +} + +int_to_byte_little :: proc(v: ^Int) { + err: Error; + size: int; + print("v: ", v); + fmt.println(); + + t := &Int{}; + defer destroy(t); + + if size, err = int_to_bytes_size(v); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b1 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_little(v, b1); + fmt.printf("little: %v | err: %v\n", b1, err); + + int_from_bytes_little(t, b1); + if internal_cmp_mag(t, v) != 0 { + print("\tError parsing t: ", t); + } + + if size, err = int_to_bytes_size(v); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b2 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_little_python(v, b2); + fmt.printf("little python: %v | err: %v\n", b2, err); + + if err == nil { + int_from_bytes_little_python(t, b2); + if internal_cmp_mag(t, v) != 0 { + print("\tError parsing t: ", t); + } + } + + if size, err = int_to_bytes_size(v, true); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b3 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_little(v, b3, true); + fmt.printf("little signed: %v | err: %v\n", b3, err); + + int_from_bytes_little(t, b3, true); + if internal_cmp(t, v) != 0 { + print("\tError parsing t: ", t); + } + + if size, err = int_to_bytes_size(v, true); err != nil { + fmt.printf("int_to_bytes_size returned: %v\n", err); + return; + } + b4 := make([]u8, size, context.temp_allocator); + err = int_to_bytes_little_python(v, b4, true); + fmt.printf("little signed python: %v | err: %v\n", b4, err); + + int_from_bytes_little_python(t, b4, true); + if internal_cmp(t, v) != 0 { + print("\tError parsing t: ", t); + } +} + +demo :: proc() { + a, b, c, d, e, f := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer destroy(a, b, c, d, e, f); + + set(a, 64336); + fmt.println("--- --- --- ---"); + int_to_byte(a); + fmt.println("--- --- --- ---"); + int_to_byte_little(a); + fmt.println("--- --- --- ---"); + + set(b, -64336); + fmt.println("--- --- --- ---"); + int_to_byte(b); + fmt.println("--- --- --- ---"); + int_to_byte_little(b); + fmt.println("--- --- --- ---"); +} + +main :: proc() { + ta := mem.Tracking_Allocator{}; + mem.tracking_allocator_init(&ta, context.allocator); + context.allocator = mem.tracking_allocator(&ta); + + demo(); + + print_configation(); + + print_timings(); + + if len(ta.allocation_map) > 0 { + for _, v in ta.allocation_map { + fmt.printf("Leaked %v bytes @ %v\n", v.size, v.location); + } + } + if len(ta.bad_free_array) > 0 { + fmt.println("Bad frees:"); + for v in ta.bad_free_array { + fmt.println(v); + } + } +} \ No newline at end of file diff --git a/core/math/big/helpers.odin b/core/math/big/helpers.odin new file mode 100644 index 000000000..81a95a215 --- /dev/null +++ b/core/math/big/helpers.odin @@ -0,0 +1,806 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. +*/ + +import "core:intrinsics" +import rnd "core:math/rand" + +// import "core:fmt" + +/* + TODO: Int.flags and Constants like ONE, NAN, etc, are not yet properly handled everywhere. +*/ + +/* + Deallocates the backing memory of one or more `Int`s. +*/ +int_destroy :: proc(integers: ..^Int) { + integers := integers; + + for a in &integers { + assert_if_nil(a); + } + #force_inline internal_int_destroy(..integers); +} + +/* + Helpers to set an `Int` to a specific value. +*/ +int_set_from_integer :: proc(dest: ^Int, src: $T, minimize := false, allocator := context.allocator) -> (err: Error) + where intrinsics.type_is_integer(T) { + context.allocator = allocator; + src := src; + + /* + Check that `src` is usable and `dest` isn't immutable. + */ + assert_if_nil(dest); + if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; } + + return #force_inline internal_int_set_from_integer(dest, src, minimize); +} + +set :: proc { int_set_from_integer, int_copy, int_atoi, }; + +/* + Copy one `Int` to another. +*/ +int_copy :: proc(dest, src: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + If dest == src, do nothing + */ + if (dest == src) { return nil; } + + /* + Check that `src` is usable and `dest` isn't immutable. + */ + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; } + if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; } + + return #force_inline internal_int_copy(dest, src, minimize); +} +copy :: proc { int_copy, }; + +/* + In normal code, you can also write `a, b = b, a`. + However, that only swaps within the current scope. + This helper swaps completely. +*/ +int_swap :: proc(a, b: ^Int) { + assert_if_nil(a, b); + #force_inline internal_swap(a, b); +} +swap :: proc { int_swap, }; + +/* + Set `dest` to |`src`|. +*/ +int_abs :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + /* + Check that `src` is usable and `dest` isn't immutable. + */ + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; } + if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; } + + return #force_inline internal_int_abs(dest, src); +} + +platform_abs :: proc(n: $T) -> T where intrinsics.type_is_integer(T) { + return n if n >= 0 else -n; +} +abs :: proc{ int_abs, platform_abs, }; + +/* + Set `dest` to `-src`. +*/ +int_neg :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + /* + Check that `src` is usable and `dest` isn't immutable. + */ + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; } + if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; } + + return #force_inline internal_int_neg(dest, src); +} +neg :: proc { int_neg, }; + +/* + Helpers to extract values from the `Int`. +*/ +int_bitfield_extract_single :: proc(a: ^Int, offset: int, allocator := context.allocator) -> (bit: _WORD, err: Error) { + return #force_inline int_bitfield_extract(a, offset, 1, allocator); +} + +int_bitfield_extract :: proc(a: ^Int, offset, count: int, allocator := context.allocator) -> (res: _WORD, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + context.allocator = allocator; + + if err = #force_inline internal_clear_if_uninitialized(a); err != nil { return {}, err; } + return #force_inline internal_int_bitfield_extract(a, offset, count); +} + +/* + Resize backing store. +*/ +shrink :: proc(a: ^Int, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + context.allocator = allocator; + + if err = #force_inline internal_clear_if_uninitialized(a); err != nil { return err; } + return #force_inline internal_shrink(a); +} + +int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_int_grow(a, digits, allow_shrink, allocator); +} +grow :: proc { int_grow, }; + +/* + Clear `Int` and resize it to the default size. +*/ +int_clear :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_int_clear(a, minimize, allocator); +} +clear :: proc { int_clear, }; +zero :: clear; + +/* + Set the `Int` to 1 and optionally shrink it to the minimum backing size. +*/ +int_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_one(a, minimize, allocator); +} +one :: proc { int_one, }; + +/* + Set the `Int` to -1 and optionally shrink it to the minimum backing size. +*/ +int_minus_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_minus_one(a, minimize, allocator); +} +minus_one :: proc { int_minus_one, }; + +/* + Set the `Int` to Inf and optionally shrink it to the minimum backing size. +*/ +int_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_inf(a, minimize, allocator); +} +inf :: proc { int_inf, }; + +/* + Set the `Int` to -Inf and optionally shrink it to the minimum backing size. +*/ +int_minus_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_minus_inf(a, minimize, allocator); +} +minus_inf :: proc { int_inf, }; + +/* + Set the `Int` to NaN and optionally shrink it to the minimum backing size. +*/ +int_nan :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_nan(a, minimize, allocator); +} +nan :: proc { int_nan, }; + +power_of_two :: proc(a: ^Int, power: int, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return #force_inline internal_int_power_of_two(a, power, allocator); +} + +int_get_u128 :: proc(a: ^Int, allocator := context.allocator) -> (res: u128, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return int_get(a, u128, allocator); +} +get_u128 :: proc { int_get_u128, }; + +int_get_i128 :: proc(a: ^Int, allocator := context.allocator) -> (res: i128, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return int_get(a, i128, allocator); +} +get_i128 :: proc { int_get_i128, }; + +int_get_u64 :: proc(a: ^Int, allocator := context.allocator) -> (res: u64, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return int_get(a, u64, allocator); +} +get_u64 :: proc { int_get_u64, }; + +int_get_i64 :: proc(a: ^Int, allocator := context.allocator) -> (res: i64, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return int_get(a, i64, allocator); +} +get_i64 :: proc { int_get_i64, }; + +int_get_u32 :: proc(a: ^Int, allocator := context.allocator) -> (res: u32, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return int_get(a, u32, allocator); +} +get_u32 :: proc { int_get_u32, }; + +int_get_i32 :: proc(a: ^Int, allocator := context.allocator) -> (res: i32, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + return int_get(a, i32, allocator); +} +get_i32 :: proc { int_get_i32, }; + +/* + TODO: Think about using `count_bits` to check if the value could be returned completely, + and maybe return max(T), .Integer_Overflow if not? +*/ +int_get :: proc(a: ^Int, $T: typeid, allocator := context.allocator) -> (res: T, err: Error) where intrinsics.type_is_integer(T) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return T{}, err; } + return #force_inline internal_int_get(a, T); +} +get :: proc { int_get, }; + +int_get_float :: proc(a: ^Int, allocator := context.allocator) -> (res: f64, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return 0, err; } + return #force_inline internal_int_get_float(a); +} + +/* + Count bits in an `Int`. +*/ +count_bits :: proc(a: ^Int, allocator := context.allocator) -> (count: int, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return 0, err; } + return #force_inline internal_count_bits(a), nil; +} + +/* + Returns the number of trailing zeroes before the first one. + Differs from regular `ctz` in that 0 returns 0. +*/ +int_count_lsb :: proc(a: ^Int, allocator := context.allocator) -> (count: int, err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(a); + if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return 0, err; } + return #force_inline internal_int_count_lsb(a); +} + +platform_count_lsb :: #force_inline proc(a: $T) -> (count: int) + where intrinsics.type_is_integer(T) && intrinsics.type_is_unsigned(T) { + return int(intrinsics.count_trailing_zeros(a)) if a > 0 else 0; +} + +count_lsb :: proc { int_count_lsb, platform_count_lsb, }; + +int_random_digit :: proc(r: ^rnd.Rand = nil) -> (res: DIGIT) { + when _DIGIT_BITS == 60 { // DIGIT = u64 + return DIGIT(rnd.uint64(r)) & _MASK; + } else when _DIGIT_BITS == 28 { // DIGIT = u32 + return DIGIT(rnd.uint32(r)) & _MASK; + } else { + panic("Unsupported DIGIT size."); + } + + return 0; // We shouldn't get here. +} + +int_rand :: proc(dest: ^Int, bits: int, r: ^rnd.Rand = nil, allocator := context.allocator) -> (err: Error) { + /* + Check that `a` is usable. + */ + assert_if_nil(dest); + return #force_inline internal_int_rand(dest, bits, r, allocator); + +} +rand :: proc { int_rand, }; + +/* + Internal helpers. +*/ +assert_initialized :: proc(a: ^Int, loc := #caller_location) { + assert_if_nil(a); + assert(is_initialized(a), "`Int` was not properly initialized.", loc); +} + +zero_unused :: proc(dest: ^Int, old_used := -1) { + assert_if_nil(dest); + if ! #force_inline is_initialized(dest) { return; } + + #force_inline internal_zero_unused(dest, old_used); +} + +clear_if_uninitialized_single :: proc(arg: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(arg); + return #force_inline internal_clear_if_uninitialized_single(arg, allocator); +} + +clear_if_uninitialized_multi :: proc(args: ..^Int, allocator := context.allocator) -> (err: Error) { + args := args; + assert_if_nil(..args); + + for i in &args { + if err = #force_inline internal_clear_if_uninitialized_single(i, allocator); err != nil { return nil; } + } + return err; +} +clear_if_uninitialized :: proc {clear_if_uninitialized_single, clear_if_uninitialized_multi, }; + +error_if_immutable_single :: proc(arg: ^Int) -> (err: Error) { + if arg != nil && .Immutable in arg.flags { return .Assignment_To_Immutable; } + return nil; +} + +error_if_immutable_multi :: proc(args: ..^Int) -> (err: Error) { + for i in args { + if i != nil && .Immutable in i.flags { return .Assignment_To_Immutable; } + } + return nil; +} +error_if_immutable :: proc {error_if_immutable_single, error_if_immutable_multi, }; + +/* + Allocates several `Int`s at once. +*/ +int_init_multi :: proc(integers: ..^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(..integers); + + integers := integers; + for a in &integers { + if err = #force_inline internal_clear(a, true, allocator); err != nil { return err; } + } + return nil; +} + +init_multi :: proc { int_init_multi, }; + +copy_digits :: proc(dest, src: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + digits := digits; + /* + Check that `src` is usable and `dest` isn't immutable. + */ + assert_if_nil(dest, src); + if err = #force_inline internal_clear_if_uninitialized(src); err != nil { return err; } + + digits = min(digits, len(src.digit), len(dest.digit)); + return #force_inline internal_copy_digits(dest, src, digits); +} + +/* + Trim unused digits. + + This is used to ensure that leading zero digits are trimmed and the leading "used" digit will be non-zero. + Typically very fast. Also fixes the sign if there are no more leading digits. +*/ +clamp :: proc(a: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return err; } + + for a.used > 0 && a.digit[a.used - 1] == 0 { + a.used -= 1; + } + + if z, _ := is_zero(a); z { + a.sign = .Zero_or_Positive; + } + return nil; +} + + +/* + Size binary representation +*/ +int_to_bytes_size :: proc(a: ^Int, signed := false, allocator := context.allocator) -> (size_in_bytes: int, err: Error) { + assert_if_nil(a); + if err = #force_inline internal_clear_if_uninitialized(a, allocator); err != nil { return {}, err; } + + size_in_bits := internal_count_bits(a); + + size_in_bytes = (size_in_bits / 8); + size_in_bytes += 0 if size_in_bits % 8 == 0 else 1; + size_in_bytes += 1 if signed else 0; + return; +} + +/* + Return Little Endian binary representation of `a`, either signed or unsigned. + If `a` is negative and we ask for the default unsigned representation, we return abs(a). +*/ +int_to_bytes_little :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + size_in_bytes: int; + + if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; } + l := len(buf); + if size_in_bytes > l { return .Buffer_Overflow; } + + size_in_bits := internal_count_bits(a); + i := 0; + if signed { + buf[l - 1] = 1 if a.sign == .Negative else 0; + } + for offset := 0; offset < size_in_bits; offset += 8 { + bits, _ := internal_int_bitfield_extract(a, offset, 8); + buf[i] = u8(bits & 255); i += 1; + } + return; +} + +/* + Return Big Endian binary representation of `a`, either signed or unsigned. + If `a` is negative and we ask for the default unsigned representation, we return abs(a). +*/ +int_to_bytes_big :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + size_in_bytes: int; + + if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; } + l := len(buf); + if size_in_bytes > l { return .Buffer_Overflow; } + + size_in_bits := internal_count_bits(a); + i := l - 1; + + if signed { + buf[0] = 1 if a.sign == .Negative else 0; + } + for offset := 0; offset < size_in_bits; offset += 8 { + bits, _ := internal_int_bitfield_extract(a, offset, 8); + buf[i] = u8(bits & 255); i -= 1; + } + return; +} + +/* + Return Python 3.x compatible Little Endian binary representation of `a`, either signed or unsigned. + If `a` is negative when asking for an unsigned number, we return an error like Python does. +*/ +int_to_bytes_little_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + size_in_bytes: int; + + if !signed && a.sign == .Negative { return .Invalid_Argument; } + + l := len(buf); + if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; } + if size_in_bytes > l { return .Buffer_Overflow; } + + if a.sign == .Negative { + t := &Int{}; + defer destroy(t); + if err = internal_complement(t, a, allocator); err != nil { return err; } + + size_in_bits := internal_count_bits(t); + i := 0; + for offset := 0; offset < size_in_bits; offset += 8 { + bits, _ := internal_int_bitfield_extract(t, offset, 8); + buf[i] = 255 - u8(bits & 255); i += 1; + } + buf[l-1] = 255; + } else { + size_in_bits := internal_count_bits(a); + i := 0; + for offset := 0; offset < size_in_bits; offset += 8 { + bits, _ := internal_int_bitfield_extract(a, offset, 8); + buf[i] = u8(bits & 255); i += 1; + } + } + return; +} + +/* + Return Python 3.x compatible Big Endian binary representation of `a`, either signed or unsigned. + If `a` is negative when asking for an unsigned number, we return an error like Python does. +*/ +int_to_bytes_big_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + size_in_bytes: int; + + if !signed && a.sign == .Negative { return .Invalid_Argument; } + if a.sign == .Zero_or_Positive { return int_to_bytes_big(a, buf, signed, allocator); } + + l := len(buf); + if size_in_bytes, err = int_to_bytes_size(a, signed, allocator); err != nil { return err; } + if size_in_bytes > l { return .Buffer_Overflow; } + + t := &Int{}; + defer destroy(t); + + if err = internal_complement(t, a, allocator); err != nil { return err; } + + size_in_bits := internal_count_bits(t); + i := l - 1; + for offset := 0; offset < size_in_bits; offset += 8 { + bits, _ := internal_int_bitfield_extract(t, offset, 8); + buf[i] = 255 - u8(bits & 255); i -= 1; + } + buf[0] = 255; + + return; +} + +/* + Read `Int` from a Big Endian binary representation. + Sign is detected from the first byte if `signed` is true. +*/ +int_from_bytes_big :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + buf := buf; + l := len(buf); + if l == 0 { return .Invalid_Argument; } + + sign: Sign; + size_in_bits := l * 8; + if signed { + /* + First byte denotes the sign. + */ + size_in_bits -= 8; + } + size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS; + size_in_digits += 0 if size_in_bits % 8 == 0 else 1; + if err = internal_zero(a, false, allocator); err != nil { return err; } + if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; } + + if signed { + sign = .Zero_or_Positive if buf[0] == 0 else .Negative; + buf = buf[1:]; + } + + for v in buf { + if err = internal_shl(a, a, 8); err != nil { return err; } + a.digit[0] |= DIGIT(v); + } + a.sign = sign; + a.used = size_in_digits; + return internal_clamp(a); +} + +/* + Read `Int` from a Big Endian Python binary representation. + Sign is detected from the first byte if `signed` is true. +*/ +int_from_bytes_big_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + buf := buf; + l := len(buf); + if l == 0 { return .Invalid_Argument; } + + sign: Sign; + size_in_bits := l * 8; + if signed { + /* + First byte denotes the sign. + */ + size_in_bits -= 8; + } + size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS; + size_in_digits += 0 if size_in_bits % 8 == 0 else 1; + if err = internal_zero(a, false, allocator); err != nil { return err; } + if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; } + + if signed { + sign = .Zero_or_Positive if buf[0] == 0 else .Negative; + buf = buf[1:]; + } + + for v in buf { + if err = internal_shl(a, a, 8); err != nil { return err; } + if signed && sign == .Negative { + a.digit[0] |= DIGIT(255 - v); + } else { + a.digit[0] |= DIGIT(v); + } + } + a.sign = sign; + a.used = size_in_digits; + if err = internal_clamp(a); err != nil { return err; } + + if signed && sign == .Negative { + return internal_sub(a, a, 1); + } + return nil; +} + +/* + Read `Int` from a Little Endian binary representation. + Sign is detected from the last byte if `signed` is true. +*/ +int_from_bytes_little :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + buf := buf; + l := len(buf); + if l == 0 { return .Invalid_Argument; } + + sign: Sign; + size_in_bits := l * 8; + if signed { + /* + First byte denotes the sign. + */ + size_in_bits -= 8; + } + size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS; + size_in_digits += 0 if size_in_bits % 8 == 0 else 1; + if err = internal_zero(a, false, allocator); err != nil { return err; } + if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; } + + if signed { + sign = .Zero_or_Positive if buf[l-1] == 0 else .Negative; + buf = buf[:l-1]; + l -= 1; + } + + for _, i in buf { + if err = internal_shl(a, a, 8); err != nil { return err; } + a.digit[0] |= DIGIT(buf[l-i-1]); + } + a.sign = sign; + a.used = size_in_digits; + return internal_clamp(a); +} + +/* + Read `Int` from a Little Endian Python binary representation. + Sign is detected from the first byte if `signed` is true. +*/ +int_from_bytes_little_python :: proc(a: ^Int, buf: []u8, signed := false, allocator := context.allocator) -> (err: Error) { + assert_if_nil(a); + buf := buf; + l := len(buf); + if l == 0 { return .Invalid_Argument; } + + sign: Sign; + size_in_bits := l * 8; + if signed { + /* + First byte denotes the sign. + */ + size_in_bits -= 8; + } + size_in_digits := (size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS; + size_in_digits += 0 if size_in_bits % 8 == 0 else 1; + if err = internal_zero(a, false, allocator); err != nil { return err; } + if err = internal_grow(a, size_in_digits, false, allocator); err != nil { return err; } + + if signed { + sign = .Zero_or_Positive if buf[l-1] == 0 else .Negative; + buf = buf[:l-1]; + l -= 1; + } + + for _, i in buf { + if err = internal_shl(a, a, 8); err != nil { return err; } + if signed && sign == .Negative { + a.digit[0] |= DIGIT(255 - buf[l-i-1]); + } else { + a.digit[0] |= DIGIT(buf[l-i-1]); + } + } + a.sign = sign; + a.used = size_in_digits; + if err = internal_clamp(a); err != nil { return err; } + + if signed && sign == .Negative { + return internal_sub(a, a, 1); + } + return nil; +} + +/* + Initialize constants. +*/ +INT_ONE, INT_ZERO, INT_MINUS_ONE, INT_INF, INT_MINUS_INF, INT_NAN := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + +initialize_constants :: proc() -> (res: int) { + internal_set( INT_ZERO, 0); INT_ZERO.flags = {.Immutable}; + internal_set( INT_ONE, 1); INT_ONE.flags = {.Immutable}; + internal_set(INT_MINUS_ONE, -1); INT_MINUS_ONE.flags = {.Immutable}; + + /* + We set these special values to -1 or 1 so they don't get mistake for zero accidentally. + This allows for shortcut tests of is_zero as .used == 0. + */ + internal_set( INT_NAN, 1); INT_NAN.flags = {.Immutable, .NaN}; + internal_set( INT_INF, 1); INT_INF.flags = {.Immutable, .Inf}; + internal_set( INT_INF, -1); INT_MINUS_INF.flags = {.Immutable, .Inf}; + + return _DEFAULT_MUL_KARATSUBA_CUTOFF; +} + +/* + Destroy constants. + Optional for an EXE, as this would be called at the very end of a process. +*/ +destroy_constants :: proc() { + internal_destroy(INT_ONE, INT_ZERO, INT_MINUS_ONE, INT_INF, INT_MINUS_INF, INT_NAN); +} + + +assert_if_nil :: #force_inline proc(integers: ..^Int, loc := #caller_location) { + integers := integers; + + for i in &integers { + assert(i != nil, "(nil)", loc); + } +} diff --git a/core/math/big/internal.odin b/core/math/big/internal.odin new file mode 100644 index 000000000..695dca04d --- /dev/null +++ b/core/math/big/internal.odin @@ -0,0 +1,2606 @@ +//+ignore +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + A BigInt implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + ========================== Low-level routines ========================== + + IMPORTANT: `internal_*` procedures make certain assumptions about their input. + + The public functions that call them are expected to satisfy their sanity check requirements. + This allows `internal_*` call `internal_*` without paying this overhead multiple times. + + Where errors can occur, they are of course still checked and returned as appropriate. + + When importing `math:core/big` to implement an involved algorithm of your own, you are welcome + to use these procedures instead of their public counterparts. + + Most inputs and outputs are expected to be passed an initialized `Int`, for example. + Exceptions include `quotient` and `remainder`, which are allowed to be `nil` when the calling code doesn't need them. + + Check the comments above each `internal_*` implementation to see what constraints it expects to have met. + + We pass the custom allocator to procedures by default using the pattern `context.allocator = allocator`. + This way we don't have to add `, allocator` at the end of each call. + + TODO: Handle +/- Infinity and NaN. +*/ + +import "core:mem" +import "core:intrinsics" +import rnd "core:math/rand" + +/* + Low-level addition, unsigned. Handbook of Applied Cryptography, algorithm 14.7. + + Assumptions: + `dest`, `a` and `b` != `nil` and have been initalized. +*/ +internal_int_add_unsigned :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + dest := dest; x := a; y := b; + context.allocator = allocator; + + old_used, min_used, max_used, i: int; + + if x.used < y.used { + x, y = y, x; + } + + min_used = y.used; + max_used = x.used; + old_used = dest.used; + + if err = internal_grow(dest, max(max_used + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; } + dest.used = max_used + 1; + /* + All parameters have been initialized. + */ + + /* Zero the carry */ + carry := DIGIT(0); + + #no_bounds_check for i = 0; i < min_used; i += 1 { + /* + Compute the sum one _DIGIT at a time. + dest[i] = a[i] + b[i] + carry; + */ + dest.digit[i] = x.digit[i] + y.digit[i] + carry; + + /* + Compute carry + */ + carry = dest.digit[i] >> _DIGIT_BITS; + /* + Mask away carry from result digit. + */ + dest.digit[i] &= _MASK; + } + + if min_used != max_used { + /* + Now copy higher words, if any, in A+B. + If A or B has more digits, add those in. + */ + #no_bounds_check for ; i < max_used; i += 1 { + dest.digit[i] = x.digit[i] + carry; + /* + Compute carry + */ + carry = dest.digit[i] >> _DIGIT_BITS; + /* + Mask away carry from result digit. + */ + dest.digit[i] &= _MASK; + } + } + /* + Add remaining carry. + */ + dest.digit[i] = carry; + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + /* + Adjust dest.used based on leading zeroes. + */ + return internal_clamp(dest); +} +internal_add_unsigned :: proc { internal_int_add_unsigned, }; + +/* + Low-level addition, signed. Handbook of Applied Cryptography, algorithm 14.7. + + Assumptions: + `dest`, `a` and `b` != `nil` and have been initalized. +*/ +internal_int_add_signed :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + x := a; y := b; + context.allocator = allocator; + /* + Handle both negative or both positive. + */ + if x.sign == y.sign { + dest.sign = x.sign; + return #force_inline internal_int_add_unsigned(dest, x, y); + } + + /* + One positive, the other negative. + Subtract the one with the greater magnitude from the other. + The result gets the sign of the one with the greater magnitude. + */ + if #force_inline internal_cmp_mag(a, b) == -1 { + x, y = y, x; + } + + dest.sign = x.sign; + return #force_inline internal_int_sub_unsigned(dest, x, y); +} +internal_add_signed :: proc { internal_int_add_signed, }; + +/* + Low-level addition Int+DIGIT, signed. Handbook of Applied Cryptography, algorithm 14.7. + + Assumptions: + `dest` and `a` != `nil` and have been initalized. + `dest` is large enough (a.used + 1) to fit result. +*/ +internal_int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if err = internal_grow(dest, a.used + 1); err != nil { return err; } + /* + Fast paths for destination and input Int being the same. + */ + if dest == a { + /* + Fast path for dest.digit[0] + digit fits in dest.digit[0] without overflow. + */ + if dest.sign == .Zero_or_Positive && (dest.digit[0] + digit < _DIGIT_MAX) { + dest.digit[0] += digit; + dest.used += 1; + return internal_clamp(dest); + } + /* + Can be subtracted from dest.digit[0] without underflow. + */ + if a.sign == .Negative && (dest.digit[0] > digit) { + dest.digit[0] -= digit; + dest.used += 1; + return internal_clamp(dest); + } + } + + /* + If `a` is negative and `|a|` >= `digit`, call `dest = |a| - digit` + */ + if a.sign == .Negative && (a.used > 1 || a.digit[0] >= digit) { + /* + Temporarily fix `a`'s sign. + */ + a.sign = .Zero_or_Positive; + /* + dest = |a| - digit + */ + if err = #force_inline internal_int_add_digit(dest, a, digit); err != nil { + /* + Restore a's sign. + */ + a.sign = .Negative; + return err; + } + /* + Restore sign and set `dest` sign. + */ + a.sign = .Negative; + dest.sign = .Negative; + + return internal_clamp(dest); + } + + /* + Remember the currently used number of digits in `dest`. + */ + old_used := dest.used; + + /* + If `a` is positive + */ + if a.sign == .Zero_or_Positive { + /* + Add digits, use `carry`. + */ + i: int; + carry := digit; + #no_bounds_check for i = 0; i < a.used; i += 1 { + dest.digit[i] = a.digit[i] + carry; + carry = dest.digit[i] >> _DIGIT_BITS; + dest.digit[i] &= _MASK; + } + /* + Set final carry. + */ + dest.digit[i] = carry; + /* + Set `dest` size. + */ + dest.used = a.used + 1; + } else { + /* + `a` was negative and |a| < digit. + */ + dest.used = 1; + /* + The result is a single DIGIT. + */ + dest.digit[0] = digit - a.digit[0] if a.used == 1 else digit; + } + /* + Sign is always positive. + */ + dest.sign = .Zero_or_Positive; + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + + /* + Adjust dest.used based on leading zeroes. + */ + return internal_clamp(dest); +} +internal_add :: proc { internal_int_add_signed, internal_int_add_digit, }; + +/* + Low-level subtraction, dest = number - decrease. Assumes |number| > |decrease|. + Handbook of Applied Cryptography, algorithm 14.9. + + Assumptions: + `dest`, `number` and `decrease` != `nil` and have been initalized. +*/ +internal_int_sub_unsigned :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + dest := dest; x := number; y := decrease; + old_used := dest.used; + min_used := y.used; + max_used := x.used; + i: int; + + if err = grow(dest, max(max_used, _DEFAULT_DIGIT_COUNT)); err != nil { return err; } + dest.used = max_used; + /* + All parameters have been initialized. + */ + + borrow := DIGIT(0); + + #no_bounds_check for i = 0; i < min_used; i += 1 { + dest.digit[i] = (x.digit[i] - y.digit[i] - borrow); + /* + borrow = carry bit of dest[i] + Note this saves performing an AND operation since if a carry does occur, + it will propagate all the way to the MSB. + As a result a single shift is enough to get the carry. + */ + borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1); + /* + Clear borrow from dest[i]. + */ + dest.digit[i] &= _MASK; + } + + /* + Now copy higher words if any, e.g. if A has more digits than B + */ + #no_bounds_check for ; i < max_used; i += 1 { + dest.digit[i] = x.digit[i] - borrow; + /* + borrow = carry bit of dest[i] + Note this saves performing an AND operation since if a carry does occur, + it will propagate all the way to the MSB. + As a result a single shift is enough to get the carry. + */ + borrow = dest.digit[i] >> ((size_of(DIGIT) * 8) - 1); + /* + Clear borrow from dest[i]. + */ + dest.digit[i] &= _MASK; + } + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + + /* + Adjust dest.used based on leading zeroes. + */ + return internal_clamp(dest); +} +internal_sub_unsigned :: proc { internal_int_sub_unsigned, }; + +/* + Low-level subtraction, signed. Handbook of Applied Cryptography, algorithm 14.9. + dest = number - decrease. Assumes |number| > |decrease|. + + Assumptions: + `dest`, `number` and `decrease` != `nil` and have been initalized. +*/ +internal_int_sub_signed :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + number := number; decrease := decrease; + if number.sign != decrease.sign { + /* + Subtract a negative from a positive, OR subtract a positive from a negative. + In either case, ADD their magnitudes and use the sign of the first number. + */ + dest.sign = number.sign; + return #force_inline internal_int_add_unsigned(dest, number, decrease); + } + + /* + Subtract a positive from a positive, OR negative from a negative. + First, take the difference between their magnitudes, then... + */ + if #force_inline internal_cmp_mag(number, decrease) == -1 { + /* + The second has a larger magnitude. + The result has the *opposite* sign from the first number. + */ + dest.sign = .Negative if number.sign == .Zero_or_Positive else .Zero_or_Positive; + number, decrease = decrease, number; + } else { + /* + The first has a larger or equal magnitude. + Copy the sign from the first. + */ + dest.sign = number.sign; + } + return #force_inline internal_int_sub_unsigned(dest, number, decrease); +} + +/* + Low-level subtraction, signed. Handbook of Applied Cryptography, algorithm 14.9. + dest = number - decrease. Assumes |number| > |decrease|. + + Assumptions: + `dest`, `number` != `nil` and have been initalized. + `dest` is large enough (number.used + 1) to fit result. +*/ +internal_int_sub_digit :: proc(dest, number: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if err = internal_grow(dest, number.used + 1); err != nil { return err; } + + dest := dest; digit := digit; + /* + All parameters have been initialized. + + Fast paths for destination and input Int being the same. + */ + if dest == number { + /* + Fast path for `dest` is negative and unsigned addition doesn't overflow the lowest digit. + */ + if dest.sign == .Negative && (dest.digit[0] + digit < _DIGIT_MAX) { + dest.digit[0] += digit; + return nil; + } + /* + Can be subtracted from dest.digit[0] without underflow. + */ + if number.sign == .Zero_or_Positive && (dest.digit[0] > digit) { + dest.digit[0] -= digit; + return nil; + } + } + + /* + If `a` is negative, just do an unsigned addition (with fudged signs). + */ + if number.sign == .Negative { + t := number; + t.sign = .Zero_or_Positive; + + err = #force_inline internal_int_add_digit(dest, t, digit); + dest.sign = .Negative; + + internal_clamp(dest); + return err; + } + + old_used := dest.used; + + /* + if `a`<= digit, simply fix the single digit. + */ + if number.used == 1 && (number.digit[0] <= digit) || number.used == 0 { + dest.digit[0] = digit - number.digit[0] if number.used == 1 else digit; + dest.sign = .Negative; + dest.used = 1; + } else { + dest.sign = .Zero_or_Positive; + dest.used = number.used; + + /* + Subtract with carry. + */ + carry := digit; + + #no_bounds_check for i := 0; i < number.used; i += 1 { + dest.digit[i] = number.digit[i] - carry; + carry = dest.digit[i] >> (_DIGIT_TYPE_BITS - 1); + dest.digit[i] &= _MASK; + } + } + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + + /* + Adjust dest.used based on leading zeroes. + */ + return internal_clamp(dest); +} + +internal_sub :: proc { internal_int_sub_signed, internal_int_sub_digit, }; + +/* + dest = src / 2 + dest = src >> 1 + + Assumes `dest` and `src` not to be `nil` and have been initialized. + We make no allocations here. +*/ +internal_int_shr1 :: proc(dest, src: ^Int) -> (err: Error) { + old_used := dest.used; dest.used = src.used; + /* + Carry + */ + fwd_carry := DIGIT(0); + + #no_bounds_check for x := dest.used - 1; x >= 0; x -= 1 { + /* + Get the carry for the next iteration. + */ + src_digit := src.digit[x]; + carry := src_digit & 1; + /* + Shift the current digit, add in carry and store. + */ + dest.digit[x] = (src_digit >> 1) | (fwd_carry << (_DIGIT_BITS - 1)); + /* + Forward carry to next iteration. + */ + fwd_carry = carry; + } + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + + /* + Adjust dest.used based on leading zeroes. + */ + dest.sign = src.sign; + return internal_clamp(dest); +} + +/* + dest = src * 2 + dest = src << 1 +*/ +internal_int_shl1 :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if err = internal_copy(dest, src); err != nil { return err; } + /* + Grow `dest` to accommodate the additional bits. + */ + digits_needed := dest.used + 1; + if err = internal_grow(dest, digits_needed); err != nil { return err; } + dest.used = digits_needed; + + mask := (DIGIT(1) << uint(1)) - DIGIT(1); + shift := DIGIT(_DIGIT_BITS - 1); + carry := DIGIT(0); + + #no_bounds_check for x:= 0; x < dest.used; x+= 1 { + fwd_carry := (dest.digit[x] >> shift) & mask; + dest.digit[x] = (dest.digit[x] << uint(1) | carry) & _MASK; + carry = fwd_carry; + } + /* + Use final carry. + */ + if carry != 0 { + dest.digit[dest.used] = carry; + dest.used += 1; + } + return internal_clamp(dest); +} + +/* + Multiply by a DIGIT. +*/ +internal_int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + assert_if_nil(dest, src); + + if multiplier == 0 { + return internal_zero(dest); + } + if multiplier == 1 { + return internal_copy(dest, src); + } + + /* + Power of two? + */ + if multiplier == 2 { + return #force_inline internal_int_shl1(dest, src); + } + if #force_inline platform_int_is_power_of_two(int(multiplier)) { + ix: int; + if ix, err = internal_log(multiplier, 2); err != nil { return err; } + return internal_shl(dest, src, ix); + } + + /* + Ensure `dest` is big enough to hold `src` * `multiplier`. + */ + if err = grow(dest, max(src.used + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; } + + /* + Save the original used count. + */ + old_used := dest.used; + /* + Set the sign. + */ + dest.sign = src.sign; + /* + Set up carry. + */ + carry := _WORD(0); + /* + Compute columns. + */ + ix := 0; + #no_bounds_check for ; ix < src.used; ix += 1 { + /* + Compute product and carry sum for this term + */ + product := carry + _WORD(src.digit[ix]) * _WORD(multiplier); + /* + Mask off higher bits to get a single DIGIT. + */ + dest.digit[ix] = DIGIT(product & _WORD(_MASK)); + /* + Send carry into next iteration + */ + carry = product >> _DIGIT_BITS; + } + + /* + Store final carry [if any] and increment used. + */ + dest.digit[ix] = DIGIT(carry); + dest.used = src.used + 1; + + /* + Zero remainder. + */ + internal_zero_unused(dest, old_used); + + return internal_clamp(dest); +} + +/* + High level multiplication (handles sign). +*/ +internal_int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + /* + Early out for `multiplier` is zero; Set `dest` to zero. + */ + if multiplier.used == 0 || src.used == 0 { return internal_zero(dest); } + + if src == multiplier { + /* + Do we need to square? + */ + if src.used >= SQR_TOOM_CUTOFF { + /* + Use Toom-Cook? + */ + err = #force_inline _private_int_sqr_toom(dest, src); + } else if src.used >= SQR_KARATSUBA_CUTOFF { + /* + Karatsuba? + */ + err = #force_inline _private_int_sqr_karatsuba(dest, src); + } else if ((src.used * 2) + 1) < _WARRAY && src.used < (_MAX_COMBA / 2) { + /* + Fast comba? + */ + err = #force_inline _private_int_sqr_comba(dest, src); + //err = #force_inline _private_int_sqr(dest, src); + } else { + err = #force_inline _private_int_sqr(dest, src); + } + } else { + /* + Can we use the balance method? Check sizes. + * The smaller one needs to be larger than the Karatsuba cut-off. + * The bigger one needs to be at least about one `_MUL_KARATSUBA_CUTOFF` bigger + * to make some sense, but it depends on architecture, OS, position of the + * stars... so YMMV. + * Using it to cut the input into slices small enough for _mul_comba + * was actually slower on the author's machine, but YMMV. + */ + + min_used := min(src.used, multiplier.used); + max_used := max(src.used, multiplier.used); + digits := src.used + multiplier.used + 1; + + if false && min_used >= MUL_KARATSUBA_CUTOFF && + max_used / 2 >= MUL_KARATSUBA_CUTOFF && + /* + Not much effect was observed below a ratio of 1:2, but again: YMMV. + */ + max_used >= 2 * min_used { + // err = s_mp_mul_balance(a,b,c); + } else if false && min_used >= MUL_TOOM_CUTOFF { + // err = s_mp_mul_toom(a, b, c); + } else if false && min_used >= MUL_KARATSUBA_CUTOFF { + // err = s_mp_mul_karatsuba(a, b, c); + } else if digits < _WARRAY && min_used <= _MAX_COMBA { + /* + Can we use the fast multiplier? + * The fast multiplier can be used if the output will + * have less than MP_WARRAY digits and the number of + * digits won't affect carry propagation + */ + err = #force_inline _private_int_mul_comba(dest, src, multiplier, digits); + } else { + err = #force_inline _private_int_mul(dest, src, multiplier, digits); + } + } + neg := src.sign != multiplier.sign; + dest.sign = .Negative if dest.used > 0 && neg else .Zero_or_Positive; + return err; +} + +internal_mul :: proc { internal_int_mul, internal_int_mul_digit, }; + +internal_sqr :: proc (dest, src: ^Int, allocator := context.allocator) -> (res: Error) { + /* + We call `internal_mul` and not e.g. `_private_int_sqr` because the former + will dispatch to the optimal implementation depending on the source. + */ + return #force_inline internal_mul(dest, src, src, allocator); +} + +/* + divmod. + Both the quotient and remainder are optional and may be passed a nil. + `numerator` and `denominator` are expected not to be `nil` and have been initialized. +*/ +internal_int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if denominator.used == 0 { return .Division_by_Zero; } + /* + If numerator < denominator then quotient = 0, remainder = numerator. + */ + if #force_inline internal_cmp_mag(numerator, denominator) == -1 { + if remainder != nil { + if err = internal_copy(remainder, numerator); err != nil { return err; } + } + if quotient != nil { + internal_zero(quotient); + } + return nil; + } + + if false && (denominator.used > 2 * MUL_KARATSUBA_CUTOFF) && (denominator.used <= (numerator.used/3) * 2) { + // err = _int_div_recursive(quotient, remainder, numerator, denominator); + } else { + when true { + err = #force_inline _private_int_div_school(quotient, remainder, numerator, denominator); + } else { + /* + NOTE(Jeroen): We no longer need or use `_private_int_div_small`. + We'll keep it around for a bit until we're reasonably certain div_school is bug free. + */ + err = _private_int_div_small(quotient, remainder, numerator, denominator); + } + } + return; +} + +/* + Single digit division (based on routine from MPI). + The quotient is optional and may be passed a nil. +*/ +internal_int_divmod_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) { + context.allocator = allocator; + + /* + Cannot divide by zero. + */ + if denominator == 0 { return 0, .Division_by_Zero; } + + /* + Quick outs. + */ + if denominator == 1 || numerator.used == 0 { + if quotient != nil { + return 0, internal_copy(quotient, numerator); + } + return 0, err; + } + /* + Power of two? + */ + if denominator == 2 { + if numerator.used > 0 && numerator.digit[0] & 1 != 0 { + // Remainder is 1 if numerator is odd. + remainder = 1; + } + if quotient == nil { + return remainder, nil; + } + return remainder, internal_shr(quotient, numerator, 1); + } + + ix: int; + if platform_int_is_power_of_two(int(denominator)) { + ix = 1; + for ix < _DIGIT_BITS && denominator != (1 << uint(ix)) { + ix += 1; + } + remainder = numerator.digit[0] & ((1 << uint(ix)) - 1); + if quotient == nil { + return remainder, nil; + } + + return remainder, internal_shr(quotient, numerator, int(ix)); + } + + /* + Three? + */ + if denominator == 3 { + return _private_int_div_3(quotient, numerator); + } + + /* + No easy answer [c'est la vie]. Just division. + */ + q := &Int{}; + + if err = internal_grow(q, numerator.used); err != nil { return 0, err; } + + q.used = numerator.used; + q.sign = numerator.sign; + + w := _WORD(0); + + for ix = numerator.used - 1; ix >= 0; ix -= 1 { + t := DIGIT(0); + w = (w << _WORD(_DIGIT_BITS) | _WORD(numerator.digit[ix])); + if w >= _WORD(denominator) { + t = DIGIT(w / _WORD(denominator)); + w -= _WORD(t) * _WORD(denominator); + } + q.digit[ix] = t; + } + remainder = DIGIT(w); + + if quotient != nil { + internal_clamp(q); + internal_swap(q, quotient); + } + internal_destroy(q); + return remainder, nil; +} + +internal_divmod :: proc { internal_int_divmod, internal_int_divmod_digit, }; + +/* + Asssumes quotient, numerator and denominator to have been initialized and not to be nil. +*/ +internal_int_div :: proc(quotient, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + return #force_inline internal_int_divmod(quotient, nil, numerator, denominator, allocator); +} +internal_div :: proc { internal_int_div, }; + +/* + remainder = numerator % denominator. + 0 <= remainder < denominator if denominator > 0 + denominator < remainder <= 0 if denominator < 0 + + Asssumes quotient, numerator and denominator to have been initialized and not to be nil. +*/ +internal_int_mod :: proc(remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + if err = #force_inline internal_int_divmod(nil, remainder, numerator, denominator, allocator); err != nil { return err; } + + if remainder.used == 0 || denominator.sign == remainder.sign { return nil; } + + return #force_inline internal_add(remainder, remainder, numerator, allocator); +} +internal_mod :: proc{ internal_int_mod, }; + +/* + remainder = (number + addend) % modulus. +*/ +internal_int_addmod :: proc(remainder, number, addend, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + if err = #force_inline internal_add(remainder, number, addend, allocator); err != nil { return err; } + return #force_inline internal_mod(remainder, remainder, modulus, allocator); +} +internal_addmod :: proc { internal_int_addmod, }; + +/* + remainder = (number - decrease) % modulus. +*/ +internal_int_submod :: proc(remainder, number, decrease, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + if err = #force_inline internal_sub(remainder, number, decrease, allocator); err != nil { return err; } + return #force_inline internal_mod(remainder, remainder, modulus, allocator); +} +internal_submod :: proc { internal_int_submod, }; + +/* + remainder = (number * multiplicand) % modulus. +*/ +internal_int_mulmod :: proc(remainder, number, multiplicand, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + if err = #force_inline internal_mul(remainder, number, multiplicand, allocator); err != nil { return err; } + return #force_inline internal_mod(remainder, remainder, modulus, allocator); +} +internal_mulmod :: proc { internal_int_mulmod, }; + +/* + remainder = (number * number) % modulus. +*/ +internal_int_sqrmod :: proc(remainder, number, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + if err = #force_inline internal_sqr(remainder, number, allocator); err != nil { return err; } + return #force_inline internal_mod(remainder, remainder, modulus, allocator); +} +internal_sqrmod :: proc { internal_int_sqrmod, }; + + + +/* + TODO: Use Sterling's Approximation to estimate log2(N!) to size the result. + This way we'll have to reallocate less, possibly not at all. +*/ +internal_int_factorial :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if n >= FACTORIAL_BINARY_SPLIT_CUTOFF { + return #force_inline _private_int_factorial_binary_split(res, n); + } + + i := len(_factorial_table); + if n < i { + return #force_inline internal_set(res, _factorial_table[n]); + } + + if err = #force_inline internal_set(res, _factorial_table[i - 1]); err != nil { return err; } + for { + if err = #force_inline internal_mul(res, res, DIGIT(i)); err != nil || i == n { return err; } + i += 1; + } + + return nil; +} + +/* + Returns GCD, LCM or both. + + Assumes `a` and `b` to have been initialized. + `res_gcd` and `res_lcm` can be nil or ^Int depending on which results are desired. +*/ +internal_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + if res_gcd == nil && res_lcm == nil { return nil; } + + return #force_inline _private_int_gcd_lcm(res_gcd, res_lcm, a, b, allocator); +} + +/* + remainder = numerator % (1 << bits) + + Assumes `remainder` and `numerator` both not to be `nil` and `bits` to be >= 0. +*/ +internal_int_mod_bits :: proc(remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + /* + Everything is divisible by 1 << 0 == 1, so this returns 0. + */ + if bits == 0 { return internal_zero(remainder); } + + /* + If the modulus is larger than the value, return the value. + */ + err = internal_copy(remainder, numerator); + if bits >= (numerator.used * _DIGIT_BITS) || err != nil { + return; + } + + /* + Zero digits above the last digit of the modulus. + */ + zero_count := (bits / _DIGIT_BITS); + zero_count += 0 if (bits % _DIGIT_BITS == 0) else 1; + + /* + Zero remainder. Special case, can't use `internal_zero_unused`. + */ + if zero_count > 0 { + mem.zero_slice(remainder.digit[zero_count:]); + } + + /* + Clear the digit that is not completely outside/inside the modulus. + */ + remainder.digit[bits / _DIGIT_BITS] &= DIGIT(1 << DIGIT(bits % _DIGIT_BITS)) - DIGIT(1); + return internal_clamp(remainder); +} + +/* + ============================= Low-level helpers ============================= + + + `internal_*` helpers don't return an `Error` like their public counterparts do, + because they expect not to be passed `nil` or uninitialized inputs. + + This makes them more suitable for `internal_*` functions and some of the + public ones that have already satisfied these constraints. +*/ + +/* + This procedure will return `true` if the `Int` is initialized, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_initialized :: #force_inline proc(a: ^Int) -> (initialized: bool) { + raw := transmute(mem.Raw_Dynamic_Array)a.digit; + return raw.cap >= _MIN_DIGIT_COUNT; +} +internal_is_initialized :: proc { internal_int_is_initialized, }; + +/* + This procedure will return `true` if the `Int` is zero, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_zero :: #force_inline proc(a: ^Int) -> (zero: bool) { + return a.used == 0; +} +internal_is_zero :: proc { internal_int_is_zero, }; + +/* + This procedure will return `true` if the `Int` is positive, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_positive :: #force_inline proc(a: ^Int) -> (positive: bool) { + return a.sign == .Zero_or_Positive; +} +internal_is_positive :: proc { internal_int_is_positive, }; + +/* + This procedure will return `true` if the `Int` is negative, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_negative :: #force_inline proc(a: ^Int) -> (negative: bool) { + return a.sign == .Negative; +} +internal_is_negative :: proc { internal_int_is_negative, }; + +/* + This procedure will return `true` if the `Int` is even, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_even :: #force_inline proc(a: ^Int) -> (even: bool) { + if internal_is_zero(a) { return true; } + + /* + `a.used` > 0 here, because the above handled `is_zero`. + We don't need to explicitly test it. + */ + return a.digit[0] & 1 == 0; +} +internal_is_even :: proc { internal_int_is_even, }; + +/* + This procedure will return `true` if the `Int` is even, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_odd :: #force_inline proc(a: ^Int) -> (odd: bool) { + return !internal_int_is_even(a); +} +internal_is_odd :: proc { internal_int_is_odd, }; + + +/* + This procedure will return `true` if the `Int` is a power of two, `false` if not. + Assumes `a` not to be `nil`. +*/ +internal_int_is_power_of_two :: #force_inline proc(a: ^Int) -> (power_of_two: bool) { + /* + Early out for Int == 0. + */ + if #force_inline internal_is_zero(a) { return true; } + + /* + For an `Int` to be a power of two, its bottom limb has to be a power of two. + */ + if ! #force_inline platform_int_is_power_of_two(int(a.digit[a.used - 1])) { return false; } + + /* + We've established that the bottom limb is a power of two. + If it's the only limb, that makes the entire Int a power of two. + */ + if a.used == 1 { return true; } + + /* + For an `Int` to be a power of two, all limbs except the top one have to be zero. + */ + for i := 1; i < a.used && a.digit[i - 1] != 0; i += 1 { return false; } + + return true; +} +internal_is_power_of_two :: proc { internal_int_is_power_of_two, }; + +/* + Compare two `Int`s, signed. + Returns -1 if `a` < `b`, 0 if `a` == `b` and 1 if `b` > `a`. + + Expects `a` and `b` both to be valid `Int`s, i.e. initialized and not `nil`. +*/ +internal_int_compare :: #force_inline proc(a, b: ^Int) -> (comparison: int) { + a_is_negative := #force_inline internal_is_negative(a); + + /* + Compare based on sign. + */ + if a.sign != b.sign { return -1 if a_is_negative else +1; } + + /* + If `a` is negative, compare in the opposite direction */ + if a_is_negative { return #force_inline internal_compare_magnitude(b, a); } + + return #force_inline internal_compare_magnitude(a, b); +} +internal_compare :: proc { internal_int_compare, internal_int_compare_digit, }; +internal_cmp :: internal_compare; + +/* + Compare an `Int` to an unsigned number upto `DIGIT & _MASK`. + Returns -1 if `a` < `b`, 0 if `a` == `b` and 1 if `b` > `a`. + + Expects: `a` and `b` both to be valid `Int`s, i.e. initialized and not `nil`. +*/ +internal_int_compare_digit :: #force_inline proc(a: ^Int, b: DIGIT) -> (comparison: int) { + a_is_negative := #force_inline internal_is_negative(a); + + switch { + /* + Compare based on sign first. + */ + case a_is_negative: return -1; + /* + Then compare on magnitude. + */ + case a.used > 1: return +1; + /* + We have only one digit. Compare it against `b`. + */ + case a.digit[0] < b: return -1; + case a.digit[0] == b: return 0; + case a.digit[0] > b: return +1; + /* + Unreachable. + Just here because Odin complains about a missing return value at the bottom of the proc otherwise. + */ + case: return; + } +} +internal_compare_digit :: proc { internal_int_compare_digit, }; +internal_cmp_digit :: internal_compare_digit; + +/* + Compare the magnitude of two `Int`s, unsigned. +*/ +internal_int_compare_magnitude :: #force_inline proc(a, b: ^Int) -> (comparison: int) { + /* + Compare based on used digits. + */ + if a.used != b.used { + if a.used > b.used { + return +1; + } + return -1; + } + + /* + Same number of used digits, compare based on their value. + */ + #no_bounds_check for n := a.used - 1; n >= 0; n -= 1 { + if a.digit[n] != b.digit[n] { + if a.digit[n] > b.digit[n] { + return +1; + } + return -1; + } + } + + return 0; +} +internal_compare_magnitude :: proc { internal_int_compare_magnitude, }; +internal_cmp_mag :: internal_compare_magnitude; + + +/* + ========================= Logs, powers and roots ============================ +*/ + +/* + Returns log_base(a). + Assumes `a` to not be `nil` and have been iniialized. +*/ +internal_int_log :: proc(a: ^Int, base: DIGIT) -> (res: int, err: Error) { + if base < 2 || DIGIT(base) > _DIGIT_MAX { return -1, .Invalid_Argument; } + + if internal_is_negative(a) { return -1, .Math_Domain_Error; } + if internal_is_zero(a) { return -1, .Math_Domain_Error; } + + /* + Fast path for bases that are a power of two. + */ + if platform_int_is_power_of_two(int(base)) { return _private_log_power_of_two(a, base); } + + /* + Fast path for `Int`s that fit within a single `DIGIT`. + */ + if a.used == 1 { return internal_log(a.digit[0], DIGIT(base)); } + + return _private_int_log(a, base); + +} + +/* + Returns log_base(a), where `a` is a DIGIT. +*/ +internal_digit_log :: proc(a: DIGIT, base: DIGIT) -> (log: int, err: Error) { + /* + If the number is smaller than the base, it fits within a fraction. + Therefore, we return 0. + */ + if a < base { return 0, nil; } + + /* + If a number equals the base, the log is 1. + */ + if a == base { return 1, nil; } + + N := _WORD(a); + bracket_low := _WORD(1); + bracket_high := _WORD(base); + high := 1; + low := 0; + + for bracket_high < N { + low = high; + bracket_low = bracket_high; + high <<= 1; + bracket_high *= bracket_high; + } + + for high - low > 1 { + mid := (low + high) >> 1; + bracket_mid := bracket_low * #force_inline internal_small_pow(_WORD(base), _WORD(mid - low)); + + if N < bracket_mid { + high = mid; + bracket_high = bracket_mid; + } + if N > bracket_mid { + low = mid; + bracket_low = bracket_mid; + } + if N == bracket_mid { + return mid, nil; + } + } + + if bracket_high == N { + return high, nil; + } else { + return low, nil; + } +} +internal_log :: proc { internal_int_log, internal_digit_log, }; + +/* + Calculate dest = base^power using a square-multiply algorithm. + Assumes `dest` and `base` not to be `nil` and to have been initialized. +*/ +internal_int_pow :: proc(dest, base: ^Int, power: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + power := power; + /* + Early outs. + */ + if #force_inline internal_is_zero(base) { + /* + A zero base is a special case. + */ + if power < 0 { + if err = internal_zero(dest); err != nil { return err; } + return .Math_Domain_Error; + } + if power == 0 { return internal_one(dest); } + if power > 0 { return internal_zero(dest); } + + } + if power < 0 { + /* + Fraction, so we'll return zero. + */ + return internal_zero(dest); + } + switch(power) { + case 0: + /* + Any base to the power zero is one. + */ + return #force_inline internal_one(dest); + case 1: + /* + Any base to the power one is itself. + */ + return copy(dest, base); + case 2: + return #force_inline internal_sqr(dest, base); + } + + g := &Int{}; + if err = internal_copy(g, base); err != nil { return err; } + + /* + Set initial result. + */ + if err = internal_one(dest); err != nil { return err; } + + loop: for power > 0 { + /* + If the bit is set, multiply. + */ + if power & 1 != 0 { + if err = internal_mul(dest, g, dest); err != nil { + break loop; + } + } + /* + Square. + */ + if power > 1 { + if err = #force_inline internal_sqr(g, g); err != nil { + break loop; + } + } + + /* shift to next bit */ + power >>= 1; + } + + internal_destroy(g); + return err; +} + +/* + Calculate `dest = base^power`. + Assumes `dest` not to be `nil` and to have been initialized. +*/ +internal_int_pow_int :: proc(dest: ^Int, base, power: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + base_t := &Int{}; + defer internal_destroy(base_t); + + if err = internal_set(base_t, base); err != nil { return err; } + + return #force_inline internal_int_pow(dest, base_t, power); +} + +internal_pow :: proc { internal_int_pow, internal_int_pow_int, }; +internal_exp :: pow; + +/* + +*/ +internal_small_pow :: proc(base: _WORD, exponent: _WORD) -> (result: _WORD) { + exponent := exponent; base := base; + result = _WORD(1); + + for exponent != 0 { + if exponent & 1 == 1 { + result *= base; + } + exponent >>= 1; + base *= base; + } + return result; +} + +/* + This function is less generic than `root_n`, simpler and faster. + Assumes `dest` and `src` not to be `nil` and to have been initialized. +*/ +internal_int_sqrt :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Must be positive. + */ + if #force_inline internal_is_negative(src) { return .Invalid_Argument; } + + /* + Easy out. If src is zero, so is dest. + */ + if #force_inline internal_is_zero(src) { return internal_zero(dest); } + + /* + Set up temporaries. + */ + x, y, t1, t2 := &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(x, y, t1, t2); + + count := #force_inline internal_count_bits(src); + + a, b := count >> 1, count & 1; + if err = internal_int_power_of_two(x, a+b, allocator); err != nil { return err; } + + for { + /* + y = (x + n // x) // 2 + */ + if err = internal_div(t1, src, x); err != nil { return err; } + if err = internal_add(t2, t1, x); err != nil { return err; } + if err = internal_shr(y, t2, 1); err != nil { return err; } + + if c := internal_cmp(y, x); c == 0 || c == 1 { + internal_swap(dest, x); + return nil; + } + internal_swap(x, y); + } + + internal_swap(dest, x); + return err; +} +internal_sqrt :: proc { internal_int_sqrt, }; + + +/* + Find the nth root of an Integer. + Result found such that `(dest)**n <= src` and `(dest+1)**n > src` + + This algorithm uses Newton's approximation `x[i+1] = x[i] - f(x[i])/f'(x[i])`, + which will find the root in `log(n)` time where each step involves a fair bit. + + Assumes `dest` and `src` not to be `nil` and have been initialized. +*/ +internal_int_root_n :: proc(dest, src: ^Int, n: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Fast path for n == 2 + */ + if n == 2 { return #force_inline internal_sqrt(dest, src); } + + if n < 0 || n > int(_DIGIT_MAX) { return .Invalid_Argument; } + + if n & 1 == 0 && #force_inline internal_is_negative(src) { return .Invalid_Argument; } + + /* + Set up temporaries. + */ + t1, t2, t3, a := &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(t1, t2, t3); + + /* + If `src` is negative fudge the sign but keep track. + */ + a.sign = .Zero_or_Positive; + a.used = src.used; + a.digit = src.digit; + + /* + If "n" is larger than INT_MAX it is also larger than + log_2(src) because the bit-length of the "src" is measured + with an int and hence the root is always < 2 (two). + */ + if n > max(int) / 2 { + err = set(dest, 1); + dest.sign = a.sign; + return err; + } + + /* + Compute seed: 2^(log_2(src)/n + 2) + */ + ilog2 := internal_count_bits(src); + + /* + "src" is smaller than max(int), we can cast safely. + */ + if ilog2 < n { + err = internal_one(dest); + dest.sign = a.sign; + return err; + } + + ilog2 /= n; + if ilog2 == 0 { + err = internal_one(dest); + dest.sign = a.sign; + return err; + } + + /* + Start value must be larger than root. + */ + ilog2 += 2; + if err = internal_int_power_of_two(t2, ilog2); err != nil { return err; } + + c: int; + iterations := 0; + for { + /* t1 = t2 */ + if err = internal_copy(t1, t2); err != nil { return err; } + + /* t2 = t1 - ((t1**b - a) / (b * t1**(b-1))) */ + + /* t3 = t1**(b-1) */ + if err = internal_pow(t3, t1, n-1); err != nil { return err; } + + /* numerator */ + /* t2 = t1**b */ + if err = internal_mul(t2, t1, t3); err != nil { return err; } + + /* t2 = t1**b - a */ + if err = internal_sub(t2, t2, a); err != nil { return err; } + + /* denominator */ + /* t3 = t1**(b-1) * b */ + if err = internal_mul(t3, t3, DIGIT(n)); err != nil { return err; } + + /* t3 = (t1**b - a)/(b * t1**(b-1)) */ + if err = internal_div(t3, t2, t3); err != nil { return err; } + if err = internal_sub(t2, t1, t3); err != nil { return err; } + + /* + Number of rounds is at most log_2(root). If it is more it + got stuck, so break out of the loop and do the rest manually. + */ + if ilog2 -= 1; ilog2 == 0 { break; } + if internal_cmp(t1, t2) == 0 { break; } + + iterations += 1; + if iterations == MAX_ITERATIONS_ROOT_N { + return .Max_Iterations_Reached; + } + } + + /* Result can be off by a few so check. */ + /* Loop beneath can overshoot by one if found root is smaller than actual root. */ + + iterations = 0; + for { + if err = internal_pow(t2, t1, n); err != nil { return err; } + + c = internal_cmp(t2, a); + if c == 0 { + swap(dest, t1); + return nil; + } else if c == -1 { + if err = internal_add(t1, t1, DIGIT(1)); err != nil { return err; } + } else { + break; + } + + iterations += 1; + if iterations == MAX_ITERATIONS_ROOT_N { + return .Max_Iterations_Reached; + } + } + + iterations = 0; + /* + Correct overshoot from above or from recurrence. + */ + for { + if err = internal_pow(t2, t1, n); err != nil { return err; } + + if internal_cmp(t2, a) != 1 { break; } + + if err = internal_sub(t1, t1, DIGIT(1)); err != nil { return err; } + + iterations += 1; + if iterations == MAX_ITERATIONS_ROOT_N { + return .Max_Iterations_Reached; + } + } + + /* + Set the result. + */ + internal_swap(dest, t1); + + /* + Set the sign of the result. + */ + dest.sign = src.sign; + + return err; +} +internal_root_n :: proc { internal_int_root_n, }; + +/* + Other internal helpers +*/ + +/* + Deallocates the backing memory of one or more `Int`s. + Asssumes none of the `integers` to be a `nil`. +*/ +internal_int_destroy :: proc(integers: ..^Int) { + integers := integers; + + for a in &integers { + raw := transmute(mem.Raw_Dynamic_Array)a.digit; + if raw.cap > 0 { + mem.zero_slice(a.digit[:]); + free(&a.digit[0]); + } + a = &Int{}; + } +} +internal_destroy :: proc{ internal_int_destroy, }; + +/* + Helpers to set an `Int` to a specific value. +*/ +internal_int_set_from_integer :: proc(dest: ^Int, src: $T, minimize := false, allocator := context.allocator) -> (err: Error) + where intrinsics.type_is_integer(T) { + context.allocator = allocator; + + src := src; + + if err = internal_error_if_immutable(dest); err != nil { return err; } + /* + Most internal procs asssume an Int to have already been initialize, + but as this is one of the procs that initializes, we have to check the following. + */ + if err = internal_clear_if_uninitialized_single(dest); err != nil { return err; } + + dest.flags = {}; // We're not -Inf, Inf, NaN or Immutable. + + dest.used = 0; + dest.sign = .Zero_or_Positive if src >= 0 else .Negative; + src = internal_abs(src); + + #no_bounds_check for src != 0 { + dest.digit[dest.used] = DIGIT(src) & _MASK; + dest.used += 1; + src >>= _DIGIT_BITS; + } + internal_zero_unused(dest); + return nil; +} + +internal_set :: proc { internal_int_set_from_integer, internal_int_copy }; + +internal_copy_digits :: #force_inline proc(dest, src: ^Int, digits: int) -> (err: Error) { + if err = #force_inline internal_error_if_immutable(dest); err != nil { return err; } + + /* + If dest == src, do nothing + */ + if (dest == src) { return nil; } + + #force_inline mem.copy_non_overlapping(&dest.digit[0], &src.digit[0], size_of(DIGIT) * digits); + return nil; +} + +/* + Copy one `Int` to another. +*/ +internal_int_copy :: proc(dest, src: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + If dest == src, do nothing + */ + if (dest == src) { return nil; } + + if err = internal_error_if_immutable(dest); err != nil { return err; } + + /* + Grow `dest` to fit `src`. + If `dest` is not yet initialized, it will be using `allocator`. + */ + needed := src.used if minimize else max(src.used, _DEFAULT_DIGIT_COUNT); + + if err = internal_grow(dest, needed, minimize); err != nil { return err; } + + /* + Copy everything over and zero high digits. + */ + internal_copy_digits(dest, src, src.used); + + dest.used = src.used; + dest.sign = src.sign; + dest.flags = src.flags &~ {.Immutable}; + + internal_zero_unused(dest); + return nil; +} +internal_copy :: proc { internal_int_copy, }; + +/* + In normal code, you can also write `a, b = b, a`. + However, that only swaps within the current scope. + This helper swaps completely. +*/ +internal_int_swap :: #force_inline proc(a, b: ^Int) { + a := a; b := b; + + a.used, b.used = b.used, a.used; + a.sign, b.sign = b.sign, a.sign; + a.digit, b.digit = b.digit, a.digit; +} +internal_swap :: proc { internal_int_swap, }; + +/* + Set `dest` to |`src`|. +*/ +internal_int_abs :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + If `dest == src`, just fix `dest`'s sign. + */ + if (dest == src) { + dest.sign = .Zero_or_Positive; + return nil; + } + + /* + Copy `src` to `dest` + */ + if err = internal_copy(dest, src); err != nil { + return err; + } + + /* + Fix sign. + */ + dest.sign = .Zero_or_Positive; + return nil; +} + +internal_platform_abs :: proc(n: $T) -> T where intrinsics.type_is_integer(T) { + return n if n >= 0 else -n; +} +internal_abs :: proc{ internal_int_abs, internal_platform_abs, }; + +/* + Set `dest` to `-src`. +*/ +internal_int_neg :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + If `dest == src`, just fix `dest`'s sign. + */ + sign := Sign.Negative; + if #force_inline internal_is_zero(src) || #force_inline internal_is_negative(src) { + sign = .Zero_or_Positive; + } + if dest == src { + dest.sign = sign; + return nil; + } + /* + Copy `src` to `dest` + */ + if err = internal_copy(dest, src); err != nil { return err; } + + /* + Fix sign. + */ + dest.sign = sign; + return nil; +} +internal_neg :: proc { internal_int_neg, }; + + +/* + Helpers to extract values from the `Int`. +*/ +internal_int_bitfield_extract_single :: proc(a: ^Int, offset: int) -> (bit: _WORD, err: Error) { + return #force_inline int_bitfield_extract(a, offset, 1); +} + +internal_int_bitfield_extract :: proc(a: ^Int, offset, count: int) -> (res: _WORD, err: Error) #no_bounds_check { + /* + Early out for single bit. + */ + if count == 1 { + limb := offset / _DIGIT_BITS; + if limb < 0 || limb >= a.used { return 0, .Invalid_Argument; } + i := _WORD(1 << _WORD((offset % _DIGIT_BITS))); + return 1 if ((_WORD(a.digit[limb]) & i) != 0) else 0, nil; + } + + if count > _WORD_BITS || count < 1 { return 0, .Invalid_Argument; } + + /* + There are 3 possible cases. + - [offset:][:count] covers 1 DIGIT, + e.g. offset: 0, count: 60 = bits 0..59 + - [offset:][:count] covers 2 DIGITS, + e.g. offset: 5, count: 60 = bits 5..59, 0..4 + e.g. offset: 0, count: 120 = bits 0..59, 60..119 + - [offset:][:count] covers 3 DIGITS, + e.g. offset: 40, count: 100 = bits 40..59, 0..59, 0..19 + e.g. offset: 40, count: 120 = bits 40..59, 0..59, 0..39 + */ + + limb := offset / _DIGIT_BITS; + bits_left := count; + bits_offset := offset % _DIGIT_BITS; + + num_bits := min(bits_left, _DIGIT_BITS - bits_offset); + + shift := offset % _DIGIT_BITS; + mask := (_WORD(1) << uint(num_bits)) - 1; + res = (_WORD(a.digit[limb]) >> uint(shift)) & mask; + + bits_left -= num_bits; + if bits_left == 0 { return res, nil; } + + res_shift := num_bits; + num_bits = min(bits_left, _DIGIT_BITS); + mask = (1 << uint(num_bits)) - 1; + + res |= (_WORD(a.digit[limb + 1]) & mask) << uint(res_shift); + + bits_left -= num_bits; + if bits_left == 0 { return res, nil; } + + mask = (1 << uint(bits_left)) - 1; + res_shift += _DIGIT_BITS; + + res |= (_WORD(a.digit[limb + 2]) & mask) << uint(res_shift); + + return res, nil; +} + +/* + Resize backing store. + We don't need to pass the allocator, because the storage itself stores it. + + Assumes `a` not to be `nil`, and to have already been initialized. +*/ +internal_int_shrink :: proc(a: ^Int) -> (err: Error) { + needed := max(_MIN_DIGIT_COUNT, a.used); + + if a.used != needed { return internal_grow(a, needed, true); } + return nil; +} +internal_shrink :: proc { internal_int_shrink, }; + +internal_int_grow :: proc(a: ^Int, digits: int, allow_shrink := false, allocator := context.allocator) -> (err: Error) { + raw := transmute(mem.Raw_Dynamic_Array)a.digit; + + /* + We need at least _MIN_DIGIT_COUNT or a.used digits, whichever is bigger. + The caller is asking for `digits`. Let's be accomodating. + */ + needed := max(_MIN_DIGIT_COUNT, a.used, digits); + if !allow_shrink { + needed = max(needed, raw.cap); + } + + /* + If not yet iniialized, initialize the `digit` backing with the allocator we were passed. + */ + if raw.cap == 0 { + a.digit = mem.make_dynamic_array_len_cap([dynamic]DIGIT, needed, needed, allocator); + } else if raw.cap != needed { + /* + `[dynamic]DIGIT` already knows what allocator was used for it, so resize will do the right thing. + */ + resize(&a.digit, needed); + } + /* + Let's see if the allocation/resize worked as expected. + */ + if len(a.digit) != needed { + return .Out_Of_Memory; + } + return nil; +} +internal_grow :: proc { internal_int_grow, }; + +/* + Clear `Int` and resize it to the default size. + Assumes `a` not to be `nil`. +*/ +internal_int_clear :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + raw := transmute(mem.Raw_Dynamic_Array)a.digit; + if raw.cap != 0 { + mem.zero_slice(a.digit[:a.used]); + } + a.sign = .Zero_or_Positive; + a.used = 0; + + return #force_inline internal_grow(a, a.used, minimize, allocator); +} +internal_clear :: proc { internal_int_clear, }; +internal_zero :: internal_clear; + +/* + Set the `Int` to 1 and optionally shrink it to the minimum backing size. +*/ +internal_int_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + return internal_copy(a, INT_ONE, minimize, allocator); +} +internal_one :: proc { internal_int_one, }; + +/* + Set the `Int` to -1 and optionally shrink it to the minimum backing size. +*/ +internal_int_minus_one :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + return internal_copy(a, INT_MINUS_ONE, minimize, allocator); +} +internal_minus_one :: proc { internal_int_minus_one, }; + +/* + Set the `Int` to Inf and optionally shrink it to the minimum backing size. +*/ +internal_int_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + return internal_copy(a, INT_INF, minimize, allocator); +} +internal_inf :: proc { internal_int_inf, }; + +/* + Set the `Int` to -Inf and optionally shrink it to the minimum backing size. +*/ +internal_int_minus_inf :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + return internal_copy(a, INT_MINUS_INF, minimize, allocator); +} +internal_minus_inf :: proc { internal_int_inf, }; + +/* + Set the `Int` to NaN and optionally shrink it to the minimum backing size. +*/ +internal_int_nan :: proc(a: ^Int, minimize := false, allocator := context.allocator) -> (err: Error) { + return internal_copy(a, INT_NAN, minimize, allocator); +} +internal_nan :: proc { internal_int_nan, }; + +internal_int_power_of_two :: proc(a: ^Int, power: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if power < 0 || power > _MAX_BIT_COUNT { return .Invalid_Argument; } + + /* + Grow to accomodate the single bit. + */ + a.used = (power / _DIGIT_BITS) + 1; + if err = internal_grow(a, a.used); err != nil { return err; } + /* + Zero the entirety. + */ + mem.zero_slice(a.digit[:]); + + /* + Set the bit. + */ + a.digit[power / _DIGIT_BITS] = 1 << uint((power % _DIGIT_BITS)); + return nil; +} + +internal_int_get_u128 :: proc(a: ^Int) -> (res: u128, err: Error) { + return internal_int_get(a, u128); +} +internal_get_u128 :: proc { internal_int_get_u128, }; + +internal_int_get_i128 :: proc(a: ^Int) -> (res: i128, err: Error) { + return internal_int_get(a, i128); +} +internal_get_i128 :: proc { internal_int_get_i128, }; + +internal_int_get_u64 :: proc(a: ^Int) -> (res: u64, err: Error) { + return internal_int_get(a, u64); +} +internal_get_u64 :: proc { internal_int_get_u64, }; + +internal_int_get_i64 :: proc(a: ^Int) -> (res: i64, err: Error) { + return internal_int_get(a, i64); +} +internal_get_i64 :: proc { internal_int_get_i64, }; + +internal_int_get_u32 :: proc(a: ^Int) -> (res: u32, err: Error) { + return internal_int_get(a, u32); +} +internal_get_u32 :: proc { internal_int_get_u32, }; + +internal_int_get_i32 :: proc(a: ^Int) -> (res: i32, err: Error) { + return internal_int_get(a, i32); +} +internal_get_i32 :: proc { internal_int_get_i32, }; + +/* + TODO: Think about using `count_bits` to check if the value could be returned completely, + and maybe return max(T), .Integer_Overflow if not? +*/ +internal_int_get :: proc(a: ^Int, $T: typeid) -> (res: T, err: Error) where intrinsics.type_is_integer(T) { + size_in_bits := int(size_of(T) * 8); + i := int((size_in_bits + _DIGIT_BITS - 1) / _DIGIT_BITS); + i = min(int(a.used), i); + + #no_bounds_check for ; i >= 0; i -= 1 { + res <<= uint(0) if size_in_bits <= _DIGIT_BITS else _DIGIT_BITS; + res |= T(a.digit[i]); + if size_in_bits <= _DIGIT_BITS { + break; + }; + } + + when !intrinsics.type_is_unsigned(T) { + /* + Mask off sign bit. + */ + res ~= 1 << uint(size_in_bits - 1); + /* + Set the sign. + */ + if a.sign == .Negative { res = -res; } + } + return; +} +internal_get :: proc { internal_int_get, }; + +internal_int_get_float :: proc(a: ^Int) -> (res: f64, err: Error) { + l := min(a.used, 17); // log2(max(f64)) is approximately 1020, or 17 legs. + fac := f64(1 << _DIGIT_BITS); + d := 0.0; + + #no_bounds_check for i := l; i >= 0; i -= 1 { + d = (d * fac) + f64(a.digit[i]); + } + + res = -d if a.sign == .Negative else d; + return; +} + +/* + The `and`, `or` and `xor` binops differ in two lines only. + We could handle those with a switch, but that adds overhead. + + TODO: Implement versions that take a DIGIT immediate. +*/ + +/* + 2's complement `and`, returns `dest = a & b;` +*/ +internal_int_and :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + used := max(a.used, b.used) + 1; + /* + Grow the destination to accomodate the result. + */ + if err = internal_grow(dest, used); err != nil { return err; } + + neg_a := #force_inline internal_is_negative(a); + neg_b := #force_inline internal_is_negative(b); + neg := neg_a && neg_b; + + ac, bc, cc := DIGIT(1), DIGIT(1), DIGIT(1); + + #no_bounds_check for i := 0; i < used; i += 1 { + x, y: DIGIT; + + /* + Convert to 2's complement if negative. + */ + if neg_a { + ac += _MASK if i >= a.used else (~a.digit[i] & _MASK); + x = ac & _MASK; + ac >>= _DIGIT_BITS; + } else { + x = 0 if i >= a.used else a.digit[i]; + } + + /* + Convert to 2's complement if negative. + */ + if neg_b { + bc += _MASK if i >= b.used else (~b.digit[i] & _MASK); + y = bc & _MASK; + bc >>= _DIGIT_BITS; + } else { + y = 0 if i >= b.used else b.digit[i]; + } + + dest.digit[i] = x & y; + + /* + Convert to to sign-magnitude if negative. + */ + if neg { + cc += ~dest.digit[i] & _MASK; + dest.digit[i] = cc & _MASK; + cc >>= _DIGIT_BITS; + } + } + + dest.used = used; + dest.sign = .Negative if neg else .Zero_or_Positive; + return internal_clamp(dest); +} +internal_and :: proc { internal_int_and, }; + +/* + 2's complement `or`, returns `dest = a | b;` +*/ +internal_int_or :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + used := max(a.used, b.used) + 1; + /* + Grow the destination to accomodate the result. + */ + if err = internal_grow(dest, used); err != nil { return err; } + + neg_a := #force_inline internal_is_negative(a); + neg_b := #force_inline internal_is_negative(b); + neg := neg_a || neg_b; + + ac, bc, cc := DIGIT(1), DIGIT(1), DIGIT(1); + + #no_bounds_check for i := 0; i < used; i += 1 { + x, y: DIGIT; + + /* + Convert to 2's complement if negative. + */ + if neg_a { + ac += _MASK if i >= a.used else (~a.digit[i] & _MASK); + x = ac & _MASK; + ac >>= _DIGIT_BITS; + } else { + x = 0 if i >= a.used else a.digit[i]; + } + + /* + Convert to 2's complement if negative. + */ + if neg_b { + bc += _MASK if i >= b.used else (~b.digit[i] & _MASK); + y = bc & _MASK; + bc >>= _DIGIT_BITS; + } else { + y = 0 if i >= b.used else b.digit[i]; + } + + dest.digit[i] = x | y; + + /* + Convert to to sign-magnitude if negative. + */ + if neg { + cc += ~dest.digit[i] & _MASK; + dest.digit[i] = cc & _MASK; + cc >>= _DIGIT_BITS; + } + } + + dest.used = used; + dest.sign = .Negative if neg else .Zero_or_Positive; + return internal_clamp(dest); +} +internal_or :: proc { internal_int_or, }; + +/* + 2's complement `xor`, returns `dest = a ~ b;` +*/ +internal_int_xor :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + used := max(a.used, b.used) + 1; + /* + Grow the destination to accomodate the result. + */ + if err = internal_grow(dest, used); err != nil { return err; } + + neg_a := #force_inline internal_is_negative(a); + neg_b := #force_inline internal_is_negative(b); + neg := neg_a != neg_b; + + ac, bc, cc := DIGIT(1), DIGIT(1), DIGIT(1); + + #no_bounds_check for i := 0; i < used; i += 1 { + x, y: DIGIT; + + /* + Convert to 2's complement if negative. + */ + if neg_a { + ac += _MASK if i >= a.used else (~a.digit[i] & _MASK); + x = ac & _MASK; + ac >>= _DIGIT_BITS; + } else { + x = 0 if i >= a.used else a.digit[i]; + } + + /* + Convert to 2's complement if negative. + */ + if neg_b { + bc += _MASK if i >= b.used else (~b.digit[i] & _MASK); + y = bc & _MASK; + bc >>= _DIGIT_BITS; + } else { + y = 0 if i >= b.used else b.digit[i]; + } + + dest.digit[i] = x ~ y; + + /* + Convert to to sign-magnitude if negative. + */ + if neg { + cc += ~dest.digit[i] & _MASK; + dest.digit[i] = cc & _MASK; + cc >>= _DIGIT_BITS; + } + } + + dest.used = used; + dest.sign = .Negative if neg else .Zero_or_Positive; + return internal_clamp(dest); +} +internal_xor :: proc { internal_int_xor, }; + +/* + dest = ~src +*/ +internal_int_complement :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Temporarily fix sign. + */ + old_sign := src.sign; + + neg := #force_inline internal_is_zero(src) || #force_inline internal_is_positive(src); + + src.sign = .Negative if neg else .Zero_or_Positive; + + err = #force_inline internal_sub(dest, src, 1); + /* + Restore sign. + */ + src.sign = old_sign; + + return err; +} +internal_complement :: proc { internal_int_complement, }; + +/* + quotient, remainder := numerator >> bits; + `remainder` is allowed to be passed a `nil`, in which case `mod` won't be computed. +*/ +internal_int_shrmod :: proc(quotient, remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + bits := bits; + if bits < 0 { return .Invalid_Argument; } + + if err = internal_copy(quotient, numerator); err != nil { return err; } + + /* + Shift right by a certain bit count (store quotient and optional remainder.) + `numerator` should not be used after this. + */ + if remainder != nil { + if err = internal_int_mod_bits(remainder, numerator, bits); err != nil { return err; } + } + + /* + Shift by as many digits in the bit count. + */ + if bits >= _DIGIT_BITS { + if err = internal_shr_digit(quotient, bits / _DIGIT_BITS); err != nil { return err; } + } + + /* + Shift any bit count < _DIGIT_BITS. + */ + bits %= _DIGIT_BITS; + if bits != 0 { + mask := DIGIT(1 << uint(bits)) - 1; + shift := DIGIT(_DIGIT_BITS - bits); + carry := DIGIT(0); + + #no_bounds_check for x := quotient.used - 1; x >= 0; x -= 1 { + /* + Get the lower bits of this word in a temp. + */ + fwd_carry := quotient.digit[x] & mask; + + /* + Shift the current word and mix in the carry bits from the previous word. + */ + quotient.digit[x] = (quotient.digit[x] >> uint(bits)) | (carry << shift); + + /* + Update carry from forward carry. + */ + carry = fwd_carry; + } + + } + return internal_clamp(numerator); +} +internal_shrmod :: proc { internal_int_shrmod, }; + +internal_int_shr :: proc(dest, source: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + return #force_inline internal_shrmod(dest, nil, source, bits, allocator); +} +internal_shr :: proc { internal_int_shr, }; + +/* + Shift right by `digits` * _DIGIT_BITS bits. +*/ +internal_int_shr_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if digits <= 0 { return nil; } + + /* + If digits > used simply zero and return. + */ + if digits > quotient.used { return internal_zero(quotient); } + + /* + Much like `int_shl_digit`, this is implemented using a sliding window, + except the window goes the other way around. + + b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> + /\ | ----> + \-------------------/ ----> + */ + + #no_bounds_check for x := 0; x < (quotient.used - digits); x += 1 { + quotient.digit[x] = quotient.digit[x + digits]; + } + quotient.used -= digits; + internal_zero_unused(quotient); + return internal_clamp(quotient); +} +internal_shr_digit :: proc { internal_int_shr_digit, }; + +/* + Shift right by a certain bit count with sign extension. +*/ +internal_int_shr_signed :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if src.sign == .Zero_or_Positive { + return internal_shr(dest, src, bits); + } + if err = internal_int_add_digit(dest, src, DIGIT(1)); err != nil { return err; } + + if err = internal_shr(dest, dest, bits); err != nil { return err; } + return internal_sub(dest, src, DIGIT(1)); +} + +internal_shr_signed :: proc { internal_int_shr_signed, }; + +/* + Shift left by a certain bit count. +*/ +internal_int_shl :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + bits := bits; + + if bits < 0 { return .Invalid_Argument; } + + if err = internal_copy(dest, src); err != nil { return err; } + + /* + Grow `dest` to accommodate the additional bits. + */ + digits_needed := dest.used + (bits / _DIGIT_BITS) + 1; + if err = internal_grow(dest, digits_needed); err != nil { return err; } + dest.used = digits_needed; + /* + Shift by as many digits in the bit count as we have. + */ + if bits >= _DIGIT_BITS { + if err = internal_shl_digit(dest, bits / _DIGIT_BITS); err != nil { return err; } + } + + /* + Shift any remaining bit count < _DIGIT_BITS + */ + bits %= _DIGIT_BITS; + if bits != 0 { + mask := (DIGIT(1) << uint(bits)) - DIGIT(1); + shift := DIGIT(_DIGIT_BITS - bits); + carry := DIGIT(0); + + #no_bounds_check for x:= 0; x < dest.used; x+= 1 { + fwd_carry := (dest.digit[x] >> shift) & mask; + dest.digit[x] = (dest.digit[x] << uint(bits) | carry) & _MASK; + carry = fwd_carry; + } + + /* + Use final carry. + */ + if carry != 0 { + dest.digit[dest.used] = carry; + dest.used += 1; + } + } + return internal_clamp(dest); +} +internal_shl :: proc { internal_int_shl, }; + + +/* + Shift left by `digits` * _DIGIT_BITS bits. +*/ +internal_int_shl_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if digits <= 0 { return nil; } + + /* + No need to shift a zero. + */ + if #force_inline internal_is_zero(quotient) { return {}; } + + /* + Resize `quotient` to accomodate extra digits. + */ + if err = #force_inline internal_grow(quotient, quotient.used + digits); err != nil { return err; } + + /* + Increment the used by the shift amount then copy upwards. + */ + + /* + Much like `int_shr_digit`, this is implemented using a sliding window, + except the window goes the other way around. + */ + #no_bounds_check for x := quotient.used; x > 0; x -= 1 { + quotient.digit[x+digits-1] = quotient.digit[x-1]; + } + + quotient.used += digits; + mem.zero_slice(quotient.digit[:digits]); + return nil; +} +internal_shl_digit :: proc { internal_int_shl_digit, }; + +/* + Count bits in an `Int`. + Assumes `a` not to be `nil` and to have been initialized. +*/ +internal_count_bits :: proc(a: ^Int) -> (count: int) { + /* + Fast path for zero. + */ + if #force_inline internal_is_zero(a) { return {}; } + /* + Get the number of DIGITs and use it. + */ + count = (a.used - 1) * _DIGIT_BITS; + /* + Take the last DIGIT and count the bits in it. + */ + clz := int(intrinsics.count_leading_zeros(a.digit[a.used - 1])); + count += (_DIGIT_TYPE_BITS - clz); + return; +} + +/* + Returns the number of trailing zeroes before the first one. + Differs from regular `ctz` in that 0 returns 0. + + Assumes `a` not to be `nil` and have been initialized. +*/ +internal_int_count_lsb :: proc(a: ^Int) -> (count: int, err: Error) { + /* + Easy out. + */ + if #force_inline internal_is_zero(a) { return {}, nil; } + + /* + Scan lower digits until non-zero. + */ + x: int; + #no_bounds_check for x = 0; x < a.used && a.digit[x] == 0; x += 1 {} + + q := a.digit[x]; + x *= _DIGIT_BITS; + x += internal_count_lsb(q); + return x, nil; +} + +internal_platform_count_lsb :: #force_inline proc(a: $T) -> (count: int) + where intrinsics.type_is_integer(T) && intrinsics.type_is_unsigned(T) { + return int(intrinsics.count_trailing_zeros(a)) if a > 0 else 0; +} + +internal_count_lsb :: proc { internal_int_count_lsb, internal_platform_count_lsb, }; + +internal_int_random_digit :: proc(r: ^rnd.Rand = nil) -> (res: DIGIT) { + when _DIGIT_BITS == 60 { // DIGIT = u64 + return DIGIT(rnd.uint64(r)) & _MASK; + } else when _DIGIT_BITS == 28 { // DIGIT = u32 + return DIGIT(rnd.uint32(r)) & _MASK; + } else { + panic("Unsupported DIGIT size."); + } + + return 0; // We shouldn't get here. +} + +internal_int_rand :: proc(dest: ^Int, bits: int, r: ^rnd.Rand = nil, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + bits := bits; + + if bits <= 0 { return .Invalid_Argument; } + + digits := bits / _DIGIT_BITS; + bits %= _DIGIT_BITS; + + if bits > 0 { + digits += 1; + } + + if err = #force_inline internal_grow(dest, digits); err != nil { return err; } + + for i := 0; i < digits; i += 1 { + dest.digit[i] = int_random_digit(r) & _MASK; + } + if bits > 0 { + dest.digit[digits - 1] &= ((1 << uint(bits)) - 1); + } + dest.used = digits; + return nil; +} +internal_rand :: proc { internal_int_rand, }; + +/* + Internal helpers. +*/ +internal_assert_initialized :: proc(a: ^Int, loc := #caller_location) { + assert(internal_is_initialized(a), "`Int` was not properly initialized.", loc); +} + +internal_clear_if_uninitialized_single :: proc(arg: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if ! #force_inline internal_is_initialized(arg) { + return #force_inline internal_grow(arg, _DEFAULT_DIGIT_COUNT); + } + return err; +} + +internal_clear_if_uninitialized_multi :: proc(args: ..^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + for i in args { + if ! #force_inline internal_is_initialized(i) { + e := #force_inline internal_grow(i, _DEFAULT_DIGIT_COUNT); + if e != nil { err = e; } + } + } + return err; +} +internal_clear_if_uninitialized :: proc {internal_clear_if_uninitialized_single, internal_clear_if_uninitialized_multi, }; + +internal_error_if_immutable_single :: proc(arg: ^Int) -> (err: Error) { + if arg != nil && .Immutable in arg.flags { return .Assignment_To_Immutable; } + return nil; +} + +internal_error_if_immutable_multi :: proc(args: ..^Int) -> (err: Error) { + for i in args { + if i != nil && .Immutable in i.flags { return .Assignment_To_Immutable; } + } + return nil; +} +internal_error_if_immutable :: proc {internal_error_if_immutable_single, internal_error_if_immutable_multi, }; + +/* + Allocates several `Int`s at once. +*/ +internal_int_init_multi :: proc(integers: ..^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + integers := integers; + for a in &integers { + if err = internal_clear(a); err != nil { return err; } + } + return nil; +} + +internal_init_multi :: proc { internal_int_init_multi, }; + +/* + Trim unused digits. + + This is used to ensure that leading zero digits are trimmed and the leading "used" digit will be non-zero. + Typically very fast. Also fixes the sign if there are no more leading digits. +*/ +internal_clamp :: proc(a: ^Int) -> (err: Error) { + for a.used > 0 && a.digit[a.used - 1] == 0 { a.used -= 1; } + + if #force_inline internal_is_zero(a) { a.sign = .Zero_or_Positive; } + + return nil; +} + + +internal_int_zero_unused :: #force_inline proc(dest: ^Int, old_used := -1) { + /* + If we don't pass the number of previously used DIGITs, we zero all remaining ones. + */ + zero_count: int; + if old_used == -1 { + zero_count = len(dest.digit) - dest.used; + } else { + zero_count = old_used - dest.used; + } + + /* + Zero remainder. + */ + if zero_count > 0 && dest.used < len(dest.digit) { + mem.zero_slice(dest.digit[dest.used:][:zero_count]); + } +} +internal_zero_unused :: proc { internal_int_zero_unused, }; + +/* + ========================== End of low-level routines ========================== +*/ \ No newline at end of file diff --git a/core/math/big/logical.odin b/core/math/big/logical.odin new file mode 100644 index 000000000..1eb26332a --- /dev/null +++ b/core/math/big/logical.odin @@ -0,0 +1,144 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + This file contains logical operations like `and`, `or` and `xor`. +*/ + +/* + The `and`, `or` and `xor` binops differ in two lines only. + We could handle those with a switch, but that adds overhead. + + TODO: Implement versions that take a DIGIT immediate. +*/ + +/* + 2's complement `and`, returns `dest = a & b;` +*/ +int_and :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a, b); err != nil { return err; } + return #force_inline internal_int_and(dest, a, b); +} +and :: proc { int_and, }; + +/* + 2's complement `or`, returns `dest = a | b;` +*/ +int_or :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a, b); err != nil { return err; } + return #force_inline internal_int_or(dest, a, b); +} +or :: proc { int_or, }; + +/* + 2's complement `xor`, returns `dest = a ^ b;` +*/ +int_xor :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a, b); err != nil { return err; } + return #force_inline internal_int_xor(dest, a, b); +} +xor :: proc { int_xor, }; + +/* + dest = ~src +*/ +int_complement :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + /* + Check that `src` and `dest` are usable. + */ + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + return #force_inline internal_int_complement(dest, src); +} +complement :: proc { int_complement, }; + +/* + quotient, remainder := numerator >> bits; + `remainder` is allowed to be passed a `nil`, in which case `mod` won't be computed. +*/ +int_shrmod :: proc(quotient, remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(quotient, numerator); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(quotient, numerator); err != nil { return err; } + return #force_inline internal_int_shrmod(quotient, remainder, numerator, bits); +} +shrmod :: proc { int_shrmod, }; + +int_shr :: proc(dest, source: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + return #force_inline shrmod(dest, nil, source, bits, allocator); +} +shr :: proc { int_shr, }; + +/* + Shift right by `digits` * _DIGIT_BITS bits. +*/ +int_shr_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + /* + Check that `quotient` is usable. + */ + assert_if_nil(quotient); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(quotient); err != nil { return err; } + return #force_inline internal_int_shr_digit(quotient, digits); +} +shr_digit :: proc { int_shr_digit, }; + +/* + Shift right by a certain bit count with sign extension. +*/ +int_shr_signed :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + return #force_inline internal_int_shr_signed(dest, src, bits); +} + +shr_signed :: proc { int_shr_signed, }; + +/* + Shift left by a certain bit count. +*/ +int_shl :: proc(dest, src: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + return #force_inline internal_int_shl(dest, src, bits); +} +shl :: proc { int_shl, }; + + +/* + Shift left by `digits` * _DIGIT_BITS bits. +*/ +int_shl_digit :: proc(quotient: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + /* + Check that `quotient` is usable. + */ + assert_if_nil(quotient); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(quotient); err != nil { return err; } + return #force_inline internal_int_shl_digit(quotient, digits); +} +shl_digit :: proc { int_shl_digit, }; \ No newline at end of file diff --git a/core/math/big/prime.odin b/core/math/big/prime.odin new file mode 100644 index 000000000..388ac3f98 --- /dev/null +++ b/core/math/big/prime.odin @@ -0,0 +1,33 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + This file contains prime finding operations. +*/ + +/* + Determines if an Integer is divisible by one of the _PRIME_TABLE primes. + Returns true if it is, false if not. +*/ +int_prime_is_divisible :: proc(a: ^Int, allocator := context.allocator) -> (res: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return {}, err; } + + rem: DIGIT; + for prime in _private_prime_table { + if rem, err = #force_inline int_mod_digit(a, prime); err != nil { return false, err; } + if rem == 0 { return true, nil; } + } + /* + Default to not divisible. + */ + return false, nil; +} diff --git a/core/math/big/private.odin b/core/math/big/private.odin new file mode 100644 index 000000000..0a5cd6163 --- /dev/null +++ b/core/math/big/private.odin @@ -0,0 +1,1247 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + ============================= Private procedures ============================= + + Private procedures used by the above low-level routines follow. + + Don't call these yourself unless you really know what you're doing. + They include implementations that are optimimal for certain ranges of input only. + + These aren't exported for the same reasons. +*/ + +import "core:intrinsics" +import "core:mem" + +/* + Multiplies |a| * |b| and only computes upto digs digits of result. + HAC pp. 595, Algorithm 14.12 Modified so you can control how + many digits of output are created. +*/ +_private_int_mul :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Can we use the fast multiplier? + */ + if digits < _WARRAY && min(a.used, b.used) < _MAX_COMBA { + return #force_inline _private_int_mul_comba(dest, a, b, digits); + } + + /* + Set up temporary output `Int`, which we'll swap for `dest` when done. + */ + + t := &Int{}; + + if err = internal_grow(t, max(digits, _DEFAULT_DIGIT_COUNT)); err != nil { return err; } + t.used = digits; + + /* + Compute the digits of the product directly. + */ + pa := a.used; + for ix := 0; ix < pa; ix += 1 { + /* + Limit ourselves to `digits` DIGITs of output. + */ + pb := min(b.used, digits - ix); + carry := _WORD(0); + iy := 0; + + /* + Compute the column of the output and propagate the carry. + */ + #no_bounds_check for iy = 0; iy < pb; iy += 1 { + /* + Compute the column as a _WORD. + */ + column := _WORD(t.digit[ix + iy]) + _WORD(a.digit[ix]) * _WORD(b.digit[iy]) + carry; + + /* + The new column is the lower part of the result. + */ + t.digit[ix + iy] = DIGIT(column & _WORD(_MASK)); + + /* + Get the carry word from the result. + */ + carry = column >> _DIGIT_BITS; + } + /* + Set carry if it is placed below digits + */ + if ix + iy < digits { + t.digit[ix + pb] = DIGIT(carry); + } + } + + internal_swap(dest, t); + internal_destroy(t); + return internal_clamp(dest); +} + +/* + Fast (comba) multiplier + + This is the fast column-array [comba] multiplier. It is + designed to compute the columns of the product first + then handle the carries afterwards. This has the effect + of making the nested loops that compute the columns very + simple and schedulable on super-scalar processors. + + This has been modified to produce a variable number of + digits of output so if say only a half-product is required + you don't have to compute the upper half (a feature + required for fast Barrett reduction). + + Based on Algorithm 14.12 on pp.595 of HAC. +*/ +_private_int_mul_comba :: proc(dest, a, b: ^Int, digits: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Set up array. + */ + W: [_WARRAY]DIGIT = ---; + + /* + Grow the destination as required. + */ + if err = internal_grow(dest, digits); err != nil { return err; } + + /* + Number of output digits to produce. + */ + pa := min(digits, a.used + b.used); + + /* + Clear the carry + */ + _W := _WORD(0); + + ix: int; + for ix = 0; ix < pa; ix += 1 { + tx, ty, iy, iz: int; + + /* + Get offsets into the two bignums. + */ + ty = min(b.used - 1, ix); + tx = ix - ty; + + /* + This is the number of times the loop will iterate, essentially. + while (tx++ < a->used && ty-- >= 0) { ... } + */ + + iy = min(a.used - tx, ty + 1); + + /* + Execute loop. + */ + #no_bounds_check for iz = 0; iz < iy; iz += 1 { + _W += _WORD(a.digit[tx + iz]) * _WORD(b.digit[ty - iz]); + } + + /* + Store term. + */ + W[ix] = DIGIT(_W) & _MASK; + + /* + Make next carry. + */ + _W = _W >> _WORD(_DIGIT_BITS); + } + + /* + Setup dest. + */ + old_used := dest.used; + dest.used = pa; + + /* + Now extract the previous digit [below the carry]. + */ + copy_slice(dest.digit[0:], W[:pa]); + + /* + Clear unused digits [that existed in the old copy of dest]. + */ + internal_zero_unused(dest, old_used); + + /* + Adjust dest.used based on leading zeroes. + */ + + return internal_clamp(dest); +} + +/* + Low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 + Assumes `dest` and `src` to not be `nil`, and `src` to have been initialized. +*/ +_private_int_sqr :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + pa := src.used; + + t := &Int{}; ix, iy: int; + /* + Grow `t` to maximum needed size, or `_DEFAULT_DIGIT_COUNT`, whichever is bigger. + */ + if err = internal_grow(t, max((2 * pa) + 1, _DEFAULT_DIGIT_COUNT)); err != nil { return err; } + t.used = (2 * pa) + 1; + + #no_bounds_check for ix = 0; ix < pa; ix += 1 { + carry := DIGIT(0); + /* + First calculate the digit at 2*ix; calculate double precision result. + */ + r := _WORD(t.digit[ix+ix]) + (_WORD(src.digit[ix]) * _WORD(src.digit[ix])); + + /* + Store lower part in result. + */ + t.digit[ix+ix] = DIGIT(r & _WORD(_MASK)); + /* + Get the carry. + */ + carry = DIGIT(r >> _DIGIT_BITS); + + #no_bounds_check for iy = ix + 1; iy < pa; iy += 1 { + /* + First calculate the product. + */ + r = _WORD(src.digit[ix]) * _WORD(src.digit[iy]); + + /* Now calculate the double precision result. Nóte we use + * addition instead of *2 since it's easier to optimize + */ + r = _WORD(t.digit[ix+iy]) + r + r + _WORD(carry); + + /* + Store lower part. + */ + t.digit[ix+iy] = DIGIT(r & _WORD(_MASK)); + + /* + Get carry. + */ + carry = DIGIT(r >> _DIGIT_BITS); + } + /* + Propagate upwards. + */ + #no_bounds_check for carry != 0 { + r = _WORD(t.digit[ix+iy]) + _WORD(carry); + t.digit[ix+iy] = DIGIT(r & _WORD(_MASK)); + carry = DIGIT(r >> _WORD(_DIGIT_BITS)); + iy += 1; + } + } + + err = internal_clamp(t); + internal_swap(dest, t); + internal_destroy(t); + return err; +} + +/* + The jist of squaring... + You do like mult except the offset of the tmpx [one that starts closer to zero] can't equal the offset of tmpy. + So basically you set up iy like before then you min it with (ty-tx) so that it never happens. + You double all those you add in the inner loop. After that loop you do the squares and add them in. + + Assumes `dest` and `src` not to be `nil` and `src` to have been initialized. +*/ +_private_int_sqr_comba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + W: [_WARRAY]DIGIT = ---; + + /* + Grow the destination as required. + */ + pa := uint(src.used) + uint(src.used); + if err = internal_grow(dest, int(pa)); err != nil { return err; } + + /* + Number of output digits to produce. + */ + W1 := _WORD(0); + _W : _WORD = ---; + ix := uint(0); + + #no_bounds_check for ; ix < pa; ix += 1 { + /* + Clear counter. + */ + _W = {}; + + /* + Get offsets into the two bignums. + */ + ty := min(uint(src.used) - 1, ix); + tx := ix - ty; + + /* + This is the number of times the loop will iterate, + essentially while (tx++ < a->used && ty-- >= 0) { ... } + */ + iy := min(uint(src.used) - tx, ty + 1); + + /* + Now for squaring, tx can never equal ty. + We halve the distance since they approach at a rate of 2x, + and we have to round because odd cases need to be executed. + */ + iy = min(iy, ((ty - tx) + 1) >> 1 ); + + /* + Execute loop. + */ + #no_bounds_check for iz := uint(0); iz < iy; iz += 1 { + _W += _WORD(src.digit[tx + iz]) * _WORD(src.digit[ty - iz]); + } + + /* + Double the inner product and add carry. + */ + _W = _W + _W + W1; + + /* + Even columns have the square term in them. + */ + if ix & 1 == 0 { + _W += _WORD(src.digit[ix >> 1]) * _WORD(src.digit[ix >> 1]); + } + + /* + Store it. + */ + W[ix] = DIGIT(_W & _WORD(_MASK)); + + /* + Make next carry. + */ + W1 = _W >> _DIGIT_BITS; + } + + /* + Setup dest. + */ + old_used := dest.used; + dest.used = src.used + src.used; + + #no_bounds_check for ix = 0; ix < pa; ix += 1 { + dest.digit[ix] = W[ix] & _MASK; + } + + /* + Clear unused digits [that existed in the old copy of dest]. + */ + internal_zero_unused(dest, old_used); + + return internal_clamp(dest); +} + +/* + Karatsuba squaring, computes `dest` = `src` * `src` using three half-size squarings. + + See comments of `_private_int_mul_karatsuba` for details. + It is essentially the same algorithm but merely tuned to perform recursive squarings. +*/ +_private_int_sqr_karatsuba :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + x0, x1, t1, t2, x0x0, x1x1 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(x0, x1, t1, t2, x0x0, x1x1); + + /* + Min # of digits, divided by two. + */ + B := src.used >> 1; + + /* + Init temps. + */ + if err = internal_grow(x0, B); err != nil { return err; } + if err = internal_grow(x1, src.used - B); err != nil { return err; } + if err = internal_grow(t1, src.used * 2); err != nil { return err; } + if err = internal_grow(t2, src.used * 2); err != nil { return err; } + if err = internal_grow(x0x0, B * 2 ); err != nil { return err; } + if err = internal_grow(x1x1, (src.used - B) * 2); err != nil { return err; } + + /* + Now shift the digits. + */ + x0.used = B; + x1.used = src.used - B; + + #force_inline internal_copy_digits(x0, src, x0.used); + #force_inline mem.copy_non_overlapping(&x1.digit[0], &src.digit[B], size_of(DIGIT) * x1.used); + #force_inline internal_clamp(x0); + + /* + Now calc the products x0*x0 and x1*x1. + */ + if err = internal_sqr(x0x0, x0); err != nil { return err; } + if err = internal_sqr(x1x1, x1); err != nil { return err; } + + /* + Now calc (x1+x0)^2 + */ + if err = internal_add(t1, x0, x1); err != nil { return err; } + if err = internal_sqr(t1, t1); err != nil { return err; } + + /* + Add x0y0 + */ + if err = internal_add(t2, x0x0, x1x1); err != nil { return err; } + if err = internal_sub(t1, t1, t2); err != nil { return err; } + + /* + Shift by B. + */ + if err = internal_shl_digit(t1, B); err != nil { return err; } + if err = internal_shl_digit(x1x1, B * 2); err != nil { return err; } + if err = internal_add(t1, t1, x0x0); err != nil { return err; } + if err = internal_add(dest, t1, x1x1); err != nil { return err; } + + return #force_inline internal_clamp(dest); +} + +/* + Squaring using Toom-Cook 3-way algorithm. + + Setup and interpolation from algorithm SQR_3 in Chung, Jaewook, and M. Anwar Hasan. "Asymmetric squaring formulae." + 18th IEEE Symposium on Computer Arithmetic (ARITH'07). IEEE, 2007. +*/ +_private_int_sqr_toom :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + S0, a0, a1, a2 := &Int{}, &Int{}, &Int{}, &Int{}; + defer destroy(S0, a0, a1, a2); + + /* + Init temps. + */ + if err = internal_zero(S0); err != nil { return err; } + + /* + B + */ + B := src.used / 3; + + /* + a = a2 * x^2 + a1 * x + a0; + */ + if err = internal_grow(a0, B); err != nil { return err; } + if err = internal_grow(a1, B); err != nil { return err; } + if err = internal_grow(a2, src.used - (2 * B)); err != nil { return err; } + + a0.used = B; + a1.used = B; + a2.used = src.used - 2 * B; + + #force_inline mem.copy_non_overlapping(&a0.digit[0], &src.digit[ 0], size_of(DIGIT) * a0.used); + #force_inline mem.copy_non_overlapping(&a1.digit[0], &src.digit[ B], size_of(DIGIT) * a1.used); + #force_inline mem.copy_non_overlapping(&a2.digit[0], &src.digit[2 * B], size_of(DIGIT) * a2.used); + + internal_clamp(a0); + internal_clamp(a1); + internal_clamp(a2); + + /** S0 = a0^2; */ + if err = internal_sqr(S0, a0); err != nil { return err; } + + /** \\S1 = (a2 + a1 + a0)^2 */ + /** \\S2 = (a2 - a1 + a0)^2 */ + /** \\S1 = a0 + a2; */ + /** a0 = a0 + a2; */ + if err = internal_add(a0, a0, a2); err != nil { return err; } + /** \\S2 = S1 - a1; */ + /** b = a0 - a1; */ + if err = internal_sub(dest, a0, a1); err != nil { return err; } + /** \\S1 = S1 + a1; */ + /** a0 = a0 + a1; */ + if err = internal_add(a0, a0, a1); err != nil { return err; } + /** \\S1 = S1^2; */ + /** a0 = a0^2; */ + if err = internal_sqr(a0, a0); err != nil { return err; } + /** \\S2 = S2^2; */ + /** b = b^2; */ + if err = internal_sqr(dest, dest); err != nil { return err; } + /** \\ S3 = 2 * a1 * a2 */ + /** \\S3 = a1 * a2; */ + /** a1 = a1 * a2; */ + if err = internal_mul(a1, a1, a2); err != nil { return err; } + /** \\S3 = S3 << 1; */ + /** a1 = a1 << 1; */ + if err = internal_shl(a1, a1, 1); err != nil { return err; } + /** \\S4 = a2^2; */ + /** a2 = a2^2; */ + if err = internal_sqr(a2, a2); err != nil { return err; } + /** \\ tmp = (S1 + S2)/2 */ + /** \\tmp = S1 + S2; */ + /** b = a0 + b; */ + if err = internal_add(dest, a0, dest); err != nil { return err; } + /** \\tmp = tmp >> 1; */ + /** b = b >> 1; */ + if err = internal_shr(dest, dest, 1); err != nil { return err; } + /** \\ S1 = S1 - tmp - S3 */ + /** \\S1 = S1 - tmp; */ + /** a0 = a0 - b; */ + if err = internal_sub(a0, a0, dest); err != nil { return err; } + /** \\S1 = S1 - S3; */ + /** a0 = a0 - a1; */ + if err = internal_sub(a0, a0, a1); err != nil { return err; } + /** \\S2 = tmp - S4 -S0 */ + /** \\S2 = tmp - S4; */ + /** b = b - a2; */ + if err = internal_sub(dest, dest, a2); err != nil { return err; } + /** \\S2 = S2 - S0; */ + /** b = b - S0; */ + if err = internal_sub(dest, dest, S0); err != nil { return err; } + /** \\P = S4*x^4 + S3*x^3 + S2*x^2 + S1*x + S0; */ + /** P = a2*x^4 + a1*x^3 + b*x^2 + a0*x + S0; */ + if err = internal_shl_digit( a2, 4 * B); err != nil { return err; } + if err = internal_shl_digit( a1, 3 * B); err != nil { return err; } + if err = internal_shl_digit(dest, 2 * B); err != nil { return err; } + if err = internal_shl_digit( a0, 1 * B); err != nil { return err; } + + if err = internal_add(a2, a2, a1); err != nil { return err; } + if err = internal_add(dest, dest, a2); err != nil { return err; } + if err = internal_add(dest, dest, a0); err != nil { return err; } + if err = internal_add(dest, dest, S0); err != nil { return err; } + /** a^2 - P */ + + return #force_inline internal_clamp(dest); +} + +/* + Divide by three (based on routine from MPI and the GMP manual). +*/ +_private_int_div_3 :: proc(quotient, numerator: ^Int, allocator := context.allocator) -> (remainder: DIGIT, err: Error) { + context.allocator = allocator; + + /* + b = 2^_DIGIT_BITS / 3 + */ + b := _WORD(1) << _WORD(_DIGIT_BITS) / _WORD(3); + + q := &Int{}; + if err = internal_grow(q, numerator.used); err != nil { return 0, err; } + q.used = numerator.used; + q.sign = numerator.sign; + + w, t: _WORD; + #no_bounds_check for ix := numerator.used; ix >= 0; ix -= 1 { + w = (w << _WORD(_DIGIT_BITS)) | _WORD(numerator.digit[ix]); + if w >= 3 { + /* + Multiply w by [1/3]. + */ + t = (w * b) >> _WORD(_DIGIT_BITS); + + /* + Now subtract 3 * [w/3] from w, to get the remainder. + */ + w -= t+t+t; + + /* + Fixup the remainder as required since the optimization is not exact. + */ + for w >= 3 { + t += 1; + w -= 3; + } + } else { + t = 0; + } + q.digit[ix] = DIGIT(t); + } + remainder = DIGIT(w); + + /* + [optional] store the quotient. + */ + if quotient != nil { + err = clamp(q); + internal_swap(q, quotient); + } + internal_destroy(q); + return remainder, nil; +} + +/* + Signed Integer Division + + c*b + d == a [i.e. a/b, c=quotient, d=remainder], HAC pp.598 Algorithm 14.20 + + Note that the description in HAC is horribly incomplete. + For example, it doesn't consider the case where digits are removed from 'x' in + the inner loop. + + It also doesn't consider the case that y has fewer than three digits, etc. + The overall algorithm is as described as 14.20 from HAC but fixed to treat these cases. +*/ +_private_int_div_school :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if err = error_if_immutable(quotient, remainder); err != nil { return err; } + + q, x, y, t1, t2 := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(q, x, y, t1, t2); + + if err = internal_grow(q, numerator.used + 2); err != nil { return err; } + q.used = numerator.used + 2; + + if err = internal_init_multi(t1, t2); err != nil { return err; } + if err = internal_copy(x, numerator); err != nil { return err; } + if err = internal_copy(y, denominator); err != nil { return err; } + + /* + Fix the sign. + */ + neg := numerator.sign != denominator.sign; + x.sign = .Zero_or_Positive; + y.sign = .Zero_or_Positive; + + /* + Normalize both x and y, ensure that y >= b/2, [b == 2**MP_DIGIT_BIT] + */ + norm := internal_count_bits(y) % _DIGIT_BITS; + + if norm < _DIGIT_BITS - 1 { + norm = (_DIGIT_BITS - 1) - norm; + if err = internal_shl(x, x, norm); err != nil { return err; } + if err = internal_shl(y, y, norm); err != nil { return err; } + } else { + norm = 0; + } + + /* + Note: HAC does 0 based, so if used==5 then it's 0,1,2,3,4, i.e. use 4 + */ + n := x.used - 1; + t := y.used - 1; + + /* + while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } + y = y*b**{n-t} + */ + + if err = internal_shl_digit(y, n - t); err != nil { return err; } + + c := internal_cmp(x, y); + for c != -1 { + q.digit[n - t] += 1; + if err = internal_sub(x, x, y); err != nil { return err; } + c = internal_cmp(x, y); + } + + /* + Reset y by shifting it back down. + */ + internal_shr_digit(y, n - t); + + /* + Step 3. for i from n down to (t + 1). + */ + #no_bounds_check for i := n; i >= (t + 1); i -= 1 { + if (i > x.used) { continue; } + + /* + step 3.1 if xi == yt then set q{i-t-1} to b-1, otherwise set q{i-t-1} to (xi*b + x{i-1})/yt + */ + if x.digit[i] == y.digit[t] { + q.digit[(i - t) - 1] = 1 << (_DIGIT_BITS - 1); + } else { + + tmp := _WORD(x.digit[i]) << _DIGIT_BITS; + tmp |= _WORD(x.digit[i - 1]); + tmp /= _WORD(y.digit[t]); + if tmp > _WORD(_MASK) { + tmp = _WORD(_MASK); + } + q.digit[(i - t) - 1] = DIGIT(tmp & _WORD(_MASK)); + } + + /* while (q{i-t-1} * (yt * b + y{t-1})) > + xi * b**2 + xi-1 * b + xi-2 + + do q{i-t-1} -= 1; + */ + + iter := 0; + + q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] + 1) & _MASK; + #no_bounds_check for { + q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK; + + /* + Find left hand. + */ + internal_zero(t1); + t1.digit[0] = ((t - 1) < 0) ? 0 : y.digit[t - 1]; + t1.digit[1] = y.digit[t]; + t1.used = 2; + if err = internal_mul(t1, t1, q.digit[(i - t) - 1]); err != nil { return err; } + + /* + Find right hand. + */ + t2.digit[0] = ((i - 2) < 0) ? 0 : x.digit[i - 2]; + t2.digit[1] = x.digit[i - 1]; /* i >= 1 always holds */ + t2.digit[2] = x.digit[i]; + t2.used = 3; + + if t1_t2 := internal_cmp_mag(t1, t2); t1_t2 != 1 { + break; + } + iter += 1; if iter > 100 { return .Max_Iterations_Reached; } + } + + /* + Step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} + */ + if err = int_mul_digit(t1, y, q.digit[(i - t) - 1]); err != nil { return err; } + if err = internal_shl_digit(t1, (i - t) - 1); err != nil { return err; } + if err = internal_sub(x, x, t1); err != nil { return err; } + + /* + if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } + */ + if x.sign == .Negative { + if err = internal_copy(t1, y); err != nil { return err; } + if err = internal_shl_digit(t1, (i - t) - 1); err != nil { return err; } + if err = internal_add(x, x, t1); err != nil { return err; } + + q.digit[(i - t) - 1] = (q.digit[(i - t) - 1] - 1) & _MASK; + } + } + + /* + Now q is the quotient and x is the remainder, [which we have to normalize] + Get sign before writing to c. + */ + z, _ := is_zero(x); + x.sign = .Zero_or_Positive if z else numerator.sign; + + if quotient != nil { + internal_clamp(q); + internal_swap(q, quotient); + quotient.sign = .Negative if neg else .Zero_or_Positive; + } + + if remainder != nil { + if err = internal_shr(x, x, norm); err != nil { return err; } + internal_swap(x, remainder); + } + + return nil; +} + +/* + Slower bit-bang division... also smaller. +*/ +@(deprecated="Use `_int_div_school`, it's 3.5x faster.") +_private_int_div_small :: proc(quotient, remainder, numerator, denominator: ^Int) -> (err: Error) { + + ta, tb, tq, q := &Int{}, &Int{}, &Int{}, &Int{}; + c: int; + + goto_end: for { + if err = internal_one(tq); err != nil { break goto_end; } + + num_bits, _ := count_bits(numerator); + den_bits, _ := count_bits(denominator); + n := num_bits - den_bits; + + if err = abs(ta, numerator); err != nil { break goto_end; } + if err = abs(tb, denominator); err != nil { break goto_end; } + if err = shl(tb, tb, n); err != nil { break goto_end; } + if err = shl(tq, tq, n); err != nil { break goto_end; } + + for n >= 0 { + if c, _ = cmp_mag(ta, tb); c == 0 || c == 1 { + // ta -= tb + if err = sub(ta, ta, tb); err != nil { break goto_end; } + // q += tq + if err = add( q, q, tq); err != nil { break goto_end; } + } + if err = shr1(tb, tb); err != nil { break goto_end; } + if err = shr1(tq, tq); err != nil { break goto_end; } + + n -= 1; + } + + /* + Now q == quotient and ta == remainder. + */ + neg := numerator.sign != denominator.sign; + if quotient != nil { + swap(quotient, q); + z, _ := is_zero(quotient); + quotient.sign = .Negative if neg && !z else .Zero_or_Positive; + } + if remainder != nil { + swap(remainder, ta); + z, _ := is_zero(numerator); + remainder.sign = .Zero_or_Positive if z else numerator.sign; + } + + break goto_end; + } + destroy(ta, tb, tq, q); + return err; +} + + + +/* + Binary split factorial algo due to: http://www.luschny.de/math/factorial/binarysplitfact.html +*/ +_private_int_factorial_binary_split :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) { + + inner, outer, start, stop, temp := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer internal_destroy(inner, outer, start, stop, temp); + + if err = internal_one(inner, false, allocator); err != nil { return err; } + if err = internal_one(outer, false, allocator); err != nil { return err; } + + bits_used := int(_DIGIT_TYPE_BITS - intrinsics.count_leading_zeros(n)); + + for i := bits_used; i >= 0; i -= 1 { + start := (n >> (uint(i) + 1)) + 1 | 1; + stop := (n >> uint(i)) + 1 | 1; + if err = _private_int_recursive_product(temp, start, stop, 0, allocator); err != nil { return err; } + if err = internal_mul(inner, inner, temp, allocator); err != nil { return err; } + if err = internal_mul(outer, outer, inner, allocator); err != nil { return err; } + } + shift := n - intrinsics.count_ones(n); + + return internal_shl(res, outer, int(shift), allocator); +} + +/* + Recursive product used by binary split factorial algorithm. +*/ +_private_int_recursive_product :: proc(res: ^Int, start, stop: int, level := int(0), allocator := context.allocator) -> (err: Error) { + t1, t2 := &Int{}, &Int{}; + defer internal_destroy(t1, t2); + + if level > FACTORIAL_BINARY_SPLIT_MAX_RECURSIONS { return .Max_Iterations_Reached; } + + num_factors := (stop - start) >> 1; + if num_factors == 2 { + if err = internal_set(t1, start, false, allocator); err != nil { return err; } + when true { + if err = internal_grow(t2, t1.used + 1, false, allocator); err != nil { return err; } + if err = internal_add(t2, t1, 2, allocator); err != nil { return err; } + } else { + if err = add(t2, t1, 2); err != nil { return err; } + } + return internal_mul(res, t1, t2, allocator); + } + + if num_factors > 1 { + mid := (start + num_factors) | 1; + if err = _private_int_recursive_product(t1, start, mid, level + 1, allocator); err != nil { return err; } + if err = _private_int_recursive_product(t2, mid, stop, level + 1, allocator); err != nil { return err; } + return internal_mul(res, t1, t2, allocator); + } + + if num_factors == 1 { return #force_inline internal_set(res, start, true, allocator); } + + return #force_inline internal_one(res, true, allocator); +} + +/* + Internal function computing both GCD using the binary method, + and, if target isn't `nil`, also LCM. + + Expects the `a` and `b` to have been initialized + and one or both of `res_gcd` or `res_lcm` not to be `nil`. + + If both `a` and `b` are zero, return zero. + If either `a` or `b`, return the other one. + + The `gcd` and `lcm` wrappers have already done this test, + but `gcd_lcm` wouldn't have, so we still need to perform it. + + If neither result is wanted, we have nothing to do. +*/ +_private_int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + if res_gcd == nil && res_lcm == nil { return nil; } + + /* + We need a temporary because `res_gcd` is allowed to be `nil`. + */ + if a.used == 0 && b.used == 0 { + /* + GCD(0, 0) and LCM(0, 0) are both 0. + */ + if res_gcd != nil { + if err = internal_zero(res_gcd); err != nil { return err; } + } + if res_lcm != nil { + if err = internal_zero(res_lcm); err != nil { return err; } + } + return nil; + } else if a.used == 0 { + /* + We can early out with GCD = B and LCM = 0 + */ + if res_gcd != nil { + if err = internal_abs(res_gcd, b); err != nil { return err; } + } + if res_lcm != nil { + if err = internal_zero(res_lcm); err != nil { return err; } + } + return nil; + } else if b.used == 0 { + /* + We can early out with GCD = A and LCM = 0 + */ + if res_gcd != nil { + if err = internal_abs(res_gcd, a); err != nil { return err; } + } + if res_lcm != nil { + if err = internal_zero(res_lcm); err != nil { return err; } + } + return nil; + } + + temp_gcd_res := &Int{}; + defer internal_destroy(temp_gcd_res); + + /* + If neither `a` or `b` was zero, we need to compute `gcd`. + Get copies of `a` and `b` we can modify. + */ + u, v := &Int{}, &Int{}; + defer internal_destroy(u, v); + if err = internal_copy(u, a); err != nil { return err; } + if err = internal_copy(v, b); err != nil { return err; } + + /* + Must be positive for the remainder of the algorithm. + */ + u.sign = .Zero_or_Positive; v.sign = .Zero_or_Positive; + + /* + B1. Find the common power of two for `u` and `v`. + */ + u_lsb, _ := internal_count_lsb(u); + v_lsb, _ := internal_count_lsb(v); + k := min(u_lsb, v_lsb); + + if k > 0 { + /* + Divide the power of two out. + */ + if err = internal_shr(u, u, k); err != nil { return err; } + if err = internal_shr(v, v, k); err != nil { return err; } + } + + /* + Divide any remaining factors of two out. + */ + if u_lsb != k { + if err = internal_shr(u, u, u_lsb - k); err != nil { return err; } + } + if v_lsb != k { + if err = internal_shr(v, v, v_lsb - k); err != nil { return err; } + } + + for v.used != 0 { + /* + Make sure `v` is the largest. + */ + if internal_cmp_mag(u, v) == 1 { + /* + Swap `u` and `v` to make sure `v` is >= `u`. + */ + internal_swap(u, v); + } + + /* + Subtract smallest from largest. + */ + if err = internal_sub(v, v, u); err != nil { return err; } + + /* + Divide out all factors of two. + */ + b, _ := internal_count_lsb(v); + if err = internal_shr(v, v, b); err != nil { return err; } + } + + /* + Multiply by 2**k which we divided out at the beginning. + */ + if err = internal_shl(temp_gcd_res, u, k); err != nil { return err; } + temp_gcd_res.sign = .Zero_or_Positive; + + /* + We've computed `gcd`, either the long way, or because one of the inputs was zero. + If we don't want `lcm`, we're done. + */ + if res_lcm == nil { + internal_swap(temp_gcd_res, res_gcd); + return nil; + } + + /* + Computes least common multiple as `|a*b|/gcd(a,b)` + Divide the smallest by the GCD. + */ + if internal_cmp_mag(a, b) == -1 { + /* + Store quotient in `t2` such that `t2 * b` is the LCM. + */ + if err = internal_div(res_lcm, a, temp_gcd_res); err != nil { return err; } + err = internal_mul(res_lcm, res_lcm, b); + } else { + /* + Store quotient in `t2` such that `t2 * a` is the LCM. + */ + if err = internal_div(res_lcm, a, temp_gcd_res); err != nil { return err; } + err = internal_mul(res_lcm, res_lcm, b); + } + + if res_gcd != nil { + internal_swap(temp_gcd_res, res_gcd); + } + + /* + Fix the sign to positive and return. + */ + res_lcm.sign = .Zero_or_Positive; + return err; +} + +/* + Internal implementation of log. + Assumes `a` not to be `nil` and to have been initialized. +*/ +_private_int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) { + bracket_low, bracket_high, bracket_mid, t, bi_base := &Int{}, &Int{}, &Int{}, &Int{}, &Int{}; + defer destroy(bracket_low, bracket_high, bracket_mid, t, bi_base); + + ic := #force_inline internal_cmp(a, base); + if ic == -1 || ic == 0 { + return 1 if ic == 0 else 0, nil; + } + + if err = internal_set(bi_base, base, true, allocator); err != nil { return -1, err; } + if err = internal_clear(bracket_mid, false, allocator); err != nil { return -1, err; } + if err = internal_clear(t, false, allocator); err != nil { return -1, err; } + if err = internal_one(bracket_low, false, allocator); err != nil { return -1, err; } + if err = internal_set(bracket_high, base, false, allocator); err != nil { return -1, err; } + + low := 0; high := 1; + + /* + A kind of Giant-step/baby-step algorithm. + Idea shamelessly stolen from https://programmingpraxis.com/2010/05/07/integer-logarithms/2/ + The effect is asymptotic, hence needs benchmarks to test if the Giant-step should be skipped + for small n. + */ + + for { + /* + Iterate until `a` is bracketed between low + high. + */ + if #force_inline internal_cmp(bracket_high, a) != -1 { break; } + + low = high; + if err = #force_inline internal_copy(bracket_low, bracket_high); err != nil { return -1, err; } + high <<= 1; + if err = #force_inline internal_sqr(bracket_high, bracket_high); err != nil { return -1, err; } + } + + for (high - low) > 1 { + mid := (high + low) >> 1; + + if err = #force_inline internal_pow(t, bi_base, mid - low); err != nil { return -1, err; } + + if err = #force_inline internal_mul(bracket_mid, bracket_low, t); err != nil { return -1, err; } + + mc := #force_inline internal_cmp(a, bracket_mid); + switch mc { + case -1: + high = mid; + internal_swap(bracket_mid, bracket_high); + case 0: + return mid, nil; + case 1: + low = mid; + internal_swap(bracket_mid, bracket_low); + } + } + + fc := #force_inline internal_cmp(bracket_high, a); + res = high if fc == 0 else low; + + return; +} + +/* + Returns the log2 of an `Int`. + Assumes `a` not to be `nil` and to have been initialized. + Also assumes `base` is a power of two. +*/ +_private_log_power_of_two :: proc(a: ^Int, base: DIGIT) -> (log: int, err: Error) { + base := base; + y: int; + for y = 0; base & 1 == 0; { + y += 1; + base >>= 1; + } + log = internal_count_bits(a); + return (log - 1) / y, err; +} + +/* + Copies DIGITs from `src` to `dest`. + Assumes `src` and `dest` to not be `nil` and have been initialized. +*/ +_private_copy_digits :: proc(dest, src: ^Int, digits: int) -> (err: Error) { + digits := digits; + /* + If dest == src, do nothing + */ + if dest == src { return nil; } + + digits = min(digits, len(src.digit), len(dest.digit)); + mem.copy_non_overlapping(&dest.digit[0], &src.digit[0], size_of(DIGIT) * digits); + return nil; +} + +/* + ======================== End of private procedures ======================= + + =============================== Private tables =============================== + + Tables used by `internal_*` and `_*`. +*/ + +_private_prime_table := []DIGIT{ + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, + + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653, +}; + +when MATH_BIG_FORCE_64_BIT || (!MATH_BIG_FORCE_32_BIT && size_of(rawptr) == 8) { + _factorial_table := [35]_WORD{ +/* f(00): */ 1, +/* f(01): */ 1, +/* f(02): */ 2, +/* f(03): */ 6, +/* f(04): */ 24, +/* f(05): */ 120, +/* f(06): */ 720, +/* f(07): */ 5_040, +/* f(08): */ 40_320, +/* f(09): */ 362_880, +/* f(10): */ 3_628_800, +/* f(11): */ 39_916_800, +/* f(12): */ 479_001_600, +/* f(13): */ 6_227_020_800, +/* f(14): */ 87_178_291_200, +/* f(15): */ 1_307_674_368_000, +/* f(16): */ 20_922_789_888_000, +/* f(17): */ 355_687_428_096_000, +/* f(18): */ 6_402_373_705_728_000, +/* f(19): */ 121_645_100_408_832_000, +/* f(20): */ 2_432_902_008_176_640_000, +/* f(21): */ 51_090_942_171_709_440_000, +/* f(22): */ 1_124_000_727_777_607_680_000, +/* f(23): */ 25_852_016_738_884_976_640_000, +/* f(24): */ 620_448_401_733_239_439_360_000, +/* f(25): */ 15_511_210_043_330_985_984_000_000, +/* f(26): */ 403_291_461_126_605_635_584_000_000, +/* f(27): */ 10_888_869_450_418_352_160_768_000_000, +/* f(28): */ 304_888_344_611_713_860_501_504_000_000, +/* f(29): */ 8_841_761_993_739_701_954_543_616_000_000, +/* f(30): */ 265_252_859_812_191_058_636_308_480_000_000, +/* f(31): */ 8_222_838_654_177_922_817_725_562_880_000_000, +/* f(32): */ 263_130_836_933_693_530_167_218_012_160_000_000, +/* f(33): */ 8_683_317_618_811_886_495_518_194_401_280_000_000, +/* f(34): */ 295_232_799_039_604_140_847_618_609_643_520_000_000, + }; +} else { + _factorial_table := [21]_WORD{ +/* f(00): */ 1, +/* f(01): */ 1, +/* f(02): */ 2, +/* f(03): */ 6, +/* f(04): */ 24, +/* f(05): */ 120, +/* f(06): */ 720, +/* f(07): */ 5_040, +/* f(08): */ 40_320, +/* f(09): */ 362_880, +/* f(10): */ 3_628_800, +/* f(11): */ 39_916_800, +/* f(12): */ 479_001_600, +/* f(13): */ 6_227_020_800, +/* f(14): */ 87_178_291_200, +/* f(15): */ 1_307_674_368_000, +/* f(16): */ 20_922_789_888_000, +/* f(17): */ 355_687_428_096_000, +/* f(18): */ 6_402_373_705_728_000, +/* f(19): */ 121_645_100_408_832_000, +/* f(20): */ 2_432_902_008_176_640_000, + }; +}; + +/* + ========================= End of private tables ======================== +*/ \ No newline at end of file diff --git a/core/math/big/public.odin b/core/math/big/public.odin new file mode 100644 index 000000000..b7a00fbc0 --- /dev/null +++ b/core/math/big/public.odin @@ -0,0 +1,559 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + This file contains basic arithmetic operations like `add`, `sub`, `mul`, `div`, ... +*/ + +/* + =========================== + User-level routines + =========================== +*/ + +/* + High-level addition. Handles sign. +*/ +int_add :: proc(dest, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, a, b); err != nil { return err; } + /* + All parameters have been initialized. + */ + return #force_inline internal_int_add_signed(dest, a, b); +} + +/* + Adds the unsigned `DIGIT` immediate to an `Int`, + such that the `DIGIT` doesn't have to be turned into an `Int` first. + + dest = a + digit; +*/ +int_add_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return err; } + /* + Grow destination as required. + */ + if err = grow(dest, a.used + 1); err != nil { return err; } + + /* + All parameters have been initialized. + */ + return #force_inline internal_int_add_digit(dest, a, digit); +} + +/* + High-level subtraction, dest = number - decrease. Handles signs. +*/ +int_sub :: proc(dest, number, decrease: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, number, decrease); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, number, decrease); err != nil { return err; } + /* + All parameters have been initialized. + */ + return #force_inline internal_int_sub_signed(dest, number, decrease); +} + +/* + Adds the unsigned `DIGIT` immediate to an `Int`, + such that the `DIGIT` doesn't have to be turned into an `Int` first. + + dest = a - digit; +*/ +int_sub_digit :: proc(dest, a: ^Int, digit: DIGIT, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return err; } + /* + Grow destination as required. + */ + if err = grow(dest, a.used + 1); err != nil { return err; } + + /* + All parameters have been initialized. + */ + return #force_inline internal_int_sub_digit(dest, a, digit); +} + +/* + dest = src / 2 + dest = src >> 1 +*/ +int_halve :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + /* + Grow destination as required. + */ + if dest != src { if err = grow(dest, src.used + 1); err != nil { return err; } } + + return #force_inline internal_int_shr1(dest, src); +} +halve :: proc { int_halve, }; +shr1 :: halve; + +/* + dest = src * 2 + dest = src << 1 +*/ +int_double :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + /* + Grow destination as required. + */ + if dest != src { if err = grow(dest, src.used + 1); err != nil { return err; } } + + return #force_inline internal_int_shl1(dest, src); +} +double :: proc { int_double, }; +shl1 :: double; + +/* + Multiply by a DIGIT. +*/ +int_mul_digit :: proc(dest, src: ^Int, multiplier: DIGIT, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(src, dest); err != nil { return err; } + + return #force_inline internal_int_mul_digit(dest, src, multiplier); +} + +/* + High level multiplication (handles sign). +*/ +int_mul :: proc(dest, src, multiplier: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src, multiplier); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src, multiplier); err != nil { return err; } + + return #force_inline internal_int_mul(dest, src, multiplier); +} + +mul :: proc { int_mul, int_mul_digit, }; + +sqr :: proc(dest, src: ^Int) -> (err: Error) { return mul(dest, src, src); } + +/* + divmod. + Both the quotient and remainder are optional and may be passed a nil. +*/ +int_divmod :: proc(quotient, remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Early out if neither of the results is wanted. + */ + if quotient == nil && remainder == nil { return nil; } + if err = internal_clear_if_uninitialized(numerator, denominator); err != nil { return err; } + + return #force_inline internal_divmod(quotient, remainder, numerator, denominator); +} + +int_divmod_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) { + assert_if_nil(quotient, numerator); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(numerator); err != nil { return 0, err; } + + return #force_inline internal_divmod(quotient, numerator, denominator); +} +divmod :: proc{ int_divmod, int_divmod_digit, }; + +int_div :: proc(quotient, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(quotient, numerator, denominator); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(numerator, denominator); err != nil { return err; } + + return #force_inline internal_divmod(quotient, nil, numerator, denominator); +} + +int_div_digit :: proc(quotient, numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (err: Error) { + assert_if_nil(quotient, numerator); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(numerator); err != nil { return err; } + + remainder: DIGIT; + remainder, err = #force_inline internal_divmod(quotient, numerator, denominator); + return err; +} +div :: proc { int_div, int_div_digit, }; + +/* + remainder = numerator % denominator. + 0 <= remainder < denominator if denominator > 0 + denominator < remainder <= 0 if denominator < 0 +*/ +int_mod :: proc(remainder, numerator, denominator: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(remainder, numerator, denominator); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(numerator, denominator); err != nil { return err; } + + return #force_inline internal_int_mod(remainder, numerator, denominator); +} + +int_mod_digit :: proc(numerator: ^Int, denominator: DIGIT, allocator := context.allocator) -> (remainder: DIGIT, err: Error) { + return #force_inline internal_divmod(nil, numerator, denominator, allocator); +} + +mod :: proc { int_mod, int_mod_digit, }; + +/* + remainder = (number + addend) % modulus. +*/ +int_addmod :: proc(remainder, number, addend, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(remainder, number, addend); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(number, addend, modulus); err != nil { return err; } + + return #force_inline internal_addmod(remainder, number, addend, modulus); +} +addmod :: proc { int_addmod, }; + +/* + remainder = (number - decrease) % modulus. +*/ +int_submod :: proc(remainder, number, decrease, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(remainder, number, decrease); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(number, decrease, modulus); err != nil { return err; } + + return #force_inline internal_submod(remainder, number, decrease, modulus); +} +submod :: proc { int_submod, }; + +/* + remainder = (number * multiplicand) % modulus. +*/ +int_mulmod :: proc(remainder, number, multiplicand, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(remainder, number, multiplicand); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(number, multiplicand, modulus); err != nil { return err; } + + return #force_inline internal_mulmod(remainder, number, multiplicand, modulus); +} +mulmod :: proc { int_mulmod, }; + +/* + remainder = (number * number) % modulus. +*/ +int_sqrmod :: proc(remainder, number, modulus: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(remainder, number, modulus); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(number, modulus); err != nil { return err; } + + return #force_inline internal_sqrmod(remainder, number, modulus); +} +sqrmod :: proc { int_sqrmod, }; + + +int_factorial :: proc(res: ^Int, n: int, allocator := context.allocator) -> (err: Error) { + if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; } + assert_if_nil(res); + + return #force_inline internal_int_factorial(res, n, allocator); +} +factorial :: proc { int_factorial, }; + + +/* + Number of ways to choose `k` items from `n` items. + Also known as the binomial coefficient. + + TODO: Speed up. + + Could be done faster by reusing code from factorial and reusing the common "prefix" results for n!, k! and n-k! + We know that n >= k, otherwise we early out with res = 0. + + So: + n-k, keep result + n, start from previous result + k, start from previous result + +*/ +int_choose_digit :: proc(res: ^Int, n, k: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(res); + context.allocator = allocator; + + if n < 0 || n > FACTORIAL_MAX_N { return .Invalid_Argument; } + if k > n { return internal_zero(res); } + + /* + res = n! / (k! * (n - k)!) + */ + n_fac, k_fac, n_minus_k_fac := &Int{}, &Int{}, &Int{}; + defer internal_destroy(n_fac, k_fac, n_minus_k_fac); + + if err = #force_inline internal_int_factorial(n_minus_k_fac, n - k); err != nil { return err; } + if err = #force_inline internal_int_factorial(k_fac, k); err != nil { return err; } + if err = #force_inline internal_mul(k_fac, k_fac, n_minus_k_fac); err != nil { return err; } + + if err = #force_inline internal_int_factorial(n_fac, n); err != nil { return err; } + if err = #force_inline internal_div(res, n_fac, k_fac); err != nil { return err; } + + return err; +} +choose :: proc { int_choose_digit, }; + +/* + Function computing both GCD and (if target isn't `nil`) also LCM. +*/ +int_gcd_lcm :: proc(res_gcd, res_lcm, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + if res_gcd == nil && res_lcm == nil { return nil; } + assert_if_nil(a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a, b); err != nil { return err; } + return #force_inline internal_int_gcd_lcm(res_gcd, res_lcm, a, b); +} +gcd_lcm :: proc { int_gcd_lcm, }; + +/* + Greatest Common Divisor. +*/ +int_gcd :: proc(res, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + return #force_inline int_gcd_lcm(res, nil, a, b, allocator); +} +gcd :: proc { int_gcd, }; + +/* + Least Common Multiple. +*/ +int_lcm :: proc(res, a, b: ^Int, allocator := context.allocator) -> (err: Error) { + return #force_inline int_gcd_lcm(nil, res, a, b, allocator); +} +lcm :: proc { int_lcm, }; + +/* + remainder = numerator % (1 << bits) +*/ +int_mod_bits :: proc(remainder, numerator: ^Int, bits: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(remainder, numerator); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(remainder, numerator); err != nil { return err; } + if bits < 0 { return .Invalid_Argument; } + + return #force_inline internal_int_mod_bits(remainder, numerator, bits); +} + +mod_bits :: proc { int_mod_bits, }; + + +/* + Logs and roots and such. +*/ +int_log :: proc(a: ^Int, base: DIGIT, allocator := context.allocator) -> (res: int, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return 0, err; } + + return #force_inline internal_int_log(a, base); +} + +digit_log :: proc(a: DIGIT, base: DIGIT) -> (log: int, err: Error) { + return #force_inline internal_digit_log(a, base); +} +log :: proc { int_log, digit_log, }; + +/* + Calculate `dest = base^power` using a square-multiply algorithm. +*/ +int_pow :: proc(dest, base: ^Int, power: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, base); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, base); err != nil { return err; } + + return #force_inline internal_int_pow(dest, base, power); +} + +/* + Calculate `dest = base^power` using a square-multiply algorithm. +*/ +int_pow_int :: proc(dest: ^Int, base, power: int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest); + + return #force_inline internal_pow(dest, base, power, allocator); +} + +pow :: proc { int_pow, int_pow_int, small_pow, }; +exp :: pow; + +small_pow :: proc(base: _WORD, exponent: _WORD) -> (result: _WORD) { + return #force_inline internal_small_pow(base, exponent); +} + +/* + This function is less generic than `root_n`, simpler and faster. +*/ +int_sqrt :: proc(dest, src: ^Int, allocator := context.allocator) -> (err: Error) { + assert_if_nil(dest, src); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + + return #force_inline internal_int_sqrt(dest, src); +} +sqrt :: proc { int_sqrt, }; + + +/* + Find the nth root of an Integer. + Result found such that `(dest)**n <= src` and `(dest+1)**n > src` + + This algorithm uses Newton's approximation `x[i+1] = x[i] - f(x[i])/f'(x[i])`, + which will find the root in `log(n)` time where each step involves a fair bit. +*/ +int_root_n :: proc(dest, src: ^Int, n: int, allocator := context.allocator) -> (err: Error) { + context.allocator = allocator; + + /* + Fast path for n == 2. + */ + if n == 2 { return sqrt(dest, src); } + + assert_if_nil(dest, src); + /* + Initialize dest + src if needed. + */ + if err = internal_clear_if_uninitialized(dest, src); err != nil { return err; } + + return #force_inline internal_int_root_n(dest, src, n); +} +root_n :: proc { int_root_n, }; + +/* + Comparison routines. +*/ + +int_is_initialized :: proc(a: ^Int) -> bool { + if a == nil { return false; } + + return #force_inline internal_int_is_initialized(a); +} + +int_is_zero :: proc(a: ^Int, allocator := context.allocator) -> (zero: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return false, err; } + + return #force_inline internal_is_zero(a), nil; +} + +int_is_positive :: proc(a: ^Int, allocator := context.allocator) -> (positive: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return false, err; } + + return #force_inline internal_is_positive(a), nil; +} + +int_is_negative :: proc(a: ^Int, allocator := context.allocator) -> (negative: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return false, err; } + + return #force_inline internal_is_negative(a), nil; +} + +int_is_even :: proc(a: ^Int, allocator := context.allocator) -> (even: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return false, err; } + + return #force_inline internal_is_even(a), nil; +} + +int_is_odd :: proc(a: ^Int, allocator := context.allocator) -> (odd: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return false, err; } + + return #force_inline internal_is_odd(a), nil; +} + +platform_int_is_power_of_two :: #force_inline proc(a: int) -> bool { + return ((a) != 0) && (((a) & ((a) - 1)) == 0); +} + +int_is_power_of_two :: proc(a: ^Int, allocator := context.allocator) -> (res: bool, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return false, err; } + + return #force_inline internal_is_power_of_two(a), nil; +} + +/* + Compare two `Int`s, signed. +*/ +int_compare :: proc(a, b: ^Int, allocator := context.allocator) -> (comparison: int, err: Error) { + assert_if_nil(a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a, b); err != nil { return 0, err; } + + return #force_inline internal_cmp(a, b), nil; +} +int_cmp :: int_compare; + +/* + Compare an `Int` to an unsigned number upto the size of the backing type. +*/ +int_compare_digit :: proc(a: ^Int, b: DIGIT, allocator := context.allocator) -> (comparison: int, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a); err != nil { return 0, err; } + + return #force_inline internal_cmp_digit(a, b), nil; +} +int_cmp_digit :: int_compare_digit; + +/* + Compare the magnitude of two `Int`s, unsigned. +*/ +int_compare_magnitude :: proc(a, b: ^Int, allocator := context.allocator) -> (res: int, err: Error) { + assert_if_nil(a, b); + context.allocator = allocator; + + if err = internal_clear_if_uninitialized(a, b); err != nil { return 0, err; } + + return #force_inline internal_cmp_mag(a, b), nil; +} \ No newline at end of file diff --git a/core/math/big/radix.odin b/core/math/big/radix.odin new file mode 100644 index 000000000..01c0489d8 --- /dev/null +++ b/core/math/big/radix.odin @@ -0,0 +1,485 @@ +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + This file contains radix conversions, `string_to_int` (atoi) and `int_to_string` (itoa). + + TODO: + - Use Barrett reduction for non-powers-of-two. + - Also look at extracting and splatting several digits at once. +*/ + +import "core:intrinsics" +import "core:mem" + +/* + This version of `itoa` allocates one behalf of the caller. The caller must free the string. +*/ +int_itoa_string :: proc(a: ^Int, radix := i8(-1), zero_terminate := false, allocator := context.allocator) -> (res: string, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + a := a; radix := radix; + if err = clear_if_uninitialized(a); err != nil { return "", err; } + /* + Radix defaults to 10. + */ + radix = radix if radix > 0 else 10; + + /* + TODO: If we want to write a prefix for some of the radixes, we can oversize the buffer. + Then after the digits are written and the string is reversed + */ + + /* + Calculate the size of the buffer we need, and + */ + size: int; + /* + Exit if calculating the size returned an error. + */ + if size, err = radix_size(a, radix, zero_terminate); err != nil { + return "", err; + } + + /* + Allocate the buffer we need. + */ + buffer := make([]u8, size); + + /* + Write the digits out into the buffer. + */ + written: int; + written, err = int_itoa_raw(a, radix, buffer, size, zero_terminate); + + return string(buffer[:written]), err; +} + +/* + This version of `itoa` allocates one behalf of the caller. The caller must free the string. +*/ +int_itoa_cstring :: proc(a: ^Int, radix := i8(-1), allocator := context.allocator) -> (res: cstring, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + a := a; radix := radix; + if err = clear_if_uninitialized(a); err != nil { return "", err; } + /* + Radix defaults to 10. + */ + radix = radix if radix > 0 else 10; + + s: string; + s, err = int_itoa_string(a, radix, true); + return cstring(raw_data(s)), err; +} + +/* + A low-level `itoa` using a caller-provided buffer. `itoa_string` and `itoa_cstring` use this. + You can use also use it if you want to pre-allocate a buffer and optionally reuse it. + + Use `radix_size` or `radix_size_estimate` to determine a buffer size big enough. + + You can pass the output of `radix_size` to `size` if you've previously called it to size + the output buffer. If you haven't, this routine will call it. This way it knows if the buffer + is the appropriate size, and we can write directly in place without a reverse step at the end. + + === === === IMPORTANT === === === + + If you determined the buffer size using `radix_size_estimate`, or have a buffer + that you reuse that you know is large enough, don't pass this size unless you know what you are doing, + because we will always write backwards starting at last byte of the buffer. + + Keep in mind that if you set `size` yourself and it's smaller than the buffer, + it'll result in buffer overflows, as we use it to avoid reversing at the end + and having to perform a buffer overflow check each character. +*/ +int_itoa_raw :: proc(a: ^Int, radix: i8, buffer: []u8, size := int(-1), zero_terminate := false) -> (written: int, err: Error) { + assert_if_nil(a); + a := a; radix := radix; size := size; + if err = clear_if_uninitialized(a); err != nil { return 0, err; } + /* + Radix defaults to 10. + */ + radix = radix if radix > 0 else 10; + if radix < 2 || radix > 64 { + return 0, .Invalid_Argument; + } + + /* + We weren't given a size. Let's compute it. + */ + if size == -1 { + if size, err = radix_size(a, radix, zero_terminate); err != nil { + return 0, err; + } + } + + /* + Early exit if the buffer we were given is too small. + */ + available := len(buffer); + if available < size { + return 0, .Buffer_Overflow; + } + /* + Fast path for when `Int` == 0 or the entire `Int` fits in a single radix digit. + */ + z, _ := is_zero(a); + if z || (a.used == 1 && a.digit[0] < DIGIT(radix)) { + if zero_terminate { + available -= 1; + buffer[available] = 0; + } + available -= 1; + buffer[available] = RADIX_TABLE[a.digit[0]]; + + if n, _ := is_neg(a); n { + available -= 1; + buffer[available] = '-'; + } + + /* + If we overestimated the size, we need to move the buffer left. + */ + written = len(buffer) - available; + if written < size { + diff := size - written; + mem.copy(&buffer[0], &buffer[diff], written); + } + return written, nil; + } + + /* + Fast path for when `Int` fits within a `_WORD`. + */ + if a.used == 1 || a.used == 2 { + if zero_terminate { + available -= 1; + buffer[available] = 0; + } + + val := _WORD(a.digit[1]) << _DIGIT_BITS + _WORD(a.digit[0]); + for val > 0 { + q := val / _WORD(radix); + available -= 1; + buffer[available] = RADIX_TABLE[val - (q * _WORD(radix))]; + + val = q; + } + if n, _ := is_neg(a); n { + available -= 1; + buffer[available] = '-'; + } + + /* + If we overestimated the size, we need to move the buffer left. + */ + written = len(buffer) - available; + if written < size { + diff := size - written; + mem.copy(&buffer[0], &buffer[diff], written); + } + return written, nil; + } + + /* + Fast path for radixes that are a power of two. + */ + if is_power_of_two(int(radix)) { + if zero_terminate { + available -= 1; + buffer[available] = 0; + } + + shift, count: int; + // mask := _WORD(radix - 1); + shift, err = log(DIGIT(radix), 2); + count, err = count_bits(a); + digit: _WORD; + + for offset := 0; offset < count; offset += shift { + bits_to_get := int(min(count - offset, shift)); + + digit, err = int_bitfield_extract(a, offset, bits_to_get); + if err != nil { + return len(buffer) - available, .Invalid_Argument; + } + available -= 1; + buffer[available] = RADIX_TABLE[digit]; + } + + if n, _ := is_neg(a); n { + available -= 1; + buffer[available] = '-'; + } + + /* + If we overestimated the size, we need to move the buffer left. + */ + written = len(buffer) - available; + if written < size { + diff := size - written; + mem.copy(&buffer[0], &buffer[diff], written); + } + return written, nil; + } + + return _itoa_raw_full(a, radix, buffer, zero_terminate); +} + +itoa :: proc{int_itoa_string, int_itoa_raw}; +int_to_string :: int_itoa_string; +int_to_cstring :: int_itoa_cstring; + +/* + Read a string [ASCII] in a given radix. +*/ +int_atoi :: proc(res: ^Int, input: string, radix: i8, allocator := context.allocator) -> (err: Error) { + assert_if_nil(res); + input := input; + context.allocator = allocator; + + /* + Make sure the radix is ok. + */ + + if radix < 2 || radix > 64 { return .Invalid_Argument; } + + /* + Set the integer to the default of zero. + */ + if err = internal_zero(res); err != nil { return err; } + + /* + We'll interpret an empty string as zero. + */ + if len(input) == 0 { return nil; } + + /* + If the leading digit is a minus set the sign to negative. + Given the above early out, the length should be at least 1. + */ + sign := Sign.Zero_or_Positive; + if input[0] == '-' { + input = input[1:]; + sign = .Negative; + } + + /* + Process each digit of the string. + */ + ch: rune; + for len(input) > 0 { + /* if the radix <= 36 the conversion is case insensitive + * this allows numbers like 1AB and 1ab to represent the same value + * [e.g. in hex] + */ + + ch = rune(input[0]); + if radix <= 36 && ch >= 'a' && ch <= 'z' { + ch -= 32; // 'a' - 'A' + } + + pos := ch - '+'; + if RADIX_TABLE_REVERSE_SIZE <= pos { + break; + } + y := RADIX_TABLE_REVERSE[pos]; + /* if the char was found in the map + * and is less than the given radix add it + * to the number, otherwise exit the loop. + */ + if y >= u8(radix) { + break; + } + + if err = internal_mul(res, res, DIGIT(radix)); err != nil { return err; } + if err = internal_add(res, res, DIGIT(y)); err != nil { return err; } + + input = input[1:]; + } + /* + If an illegal character was found, fail. + */ + if len(input) > 0 && ch != 0 && ch != '\r' && ch != '\n' { + return .Invalid_Argument; + } + /* + Set the sign only if res != 0. + */ + if res.used > 0 { + res.sign = sign; + } + + return nil; +} + + +atoi :: proc { int_atoi, }; + +/* + We size for `string` by default. +*/ +radix_size :: proc(a: ^Int, radix: i8, zero_terminate := false, allocator := context.allocator) -> (size: int, err: Error) { + a := a; + assert_if_nil(a); + + if radix < 2 || radix > 64 { return -1, .Invalid_Argument; } + if err = clear_if_uninitialized(a); err != nil { return {}, err; } + + if internal_is_zero(a) { + if zero_terminate { + return 2, nil; + } + return 1, nil; + } + + if internal_is_power_of_two(a) { + /* + Calculate `log` on a temporary "copy" with its sign set to positive. + */ + t := &Int{ + used = a.used, + sign = .Zero_or_Positive, + digit = a.digit, + }; + + if size, err = internal_log(t, DIGIT(radix)); err != nil { return {}, err; } + } else { + la, k := &Int{}, &Int{}; + defer internal_destroy(la, k); + + /* la = floor(log_2(a)) + 1 */ + bit_count := internal_count_bits(a); + if err = internal_set(la, bit_count); err != nil { return {}, err; } + + /* k = floor(2^29/log_2(radix)) + 1 */ + lb := _log_bases; + if err = internal_set(k, lb[radix]); err != nil { return {}, err; } + + /* n = floor((la * k) / 2^29) + 1 */ + if err = internal_mul(k, la, k); err != nil { return 0, err; } + if err = internal_shr(k, k, _RADIX_SIZE_SCALE); err != nil { return {}, err; } + + /* The "+1" here is the "+1" in "floor((la * k) / 2^29) + 1" */ + /* n = n + 1 + EOS + sign */ + size_, _ := internal_get(k, u128); + size = int(size_); + } + + /* + log truncates to zero, so we need to add one more, and one for `-` if negative. + */ + size += 2 if a.sign == .Negative else 1; + size += 1 if zero_terminate else 0; + return size, nil; +} + +/* + Overestimate the size needed for the bigint to string conversion by a very small amount. + The error is about 10^-8; it will overestimate the result by at most 11 elements for + a number of the size 2^(2^31)-1 which is currently the largest possible in this library. + Some short tests gave no results larger than 5 (plus 2 for sign and EOS). + */ + +/* + Table of {0, INT(log_2([1..64])*2^p)+1 } where p is the scale + factor defined in MP_RADIX_SIZE_SCALE and INT() extracts the integer part (truncating). + Good for 32 bit "int". Set MP_RADIX_SIZE_SCALE = 61 and recompute values + for 64 bit "int". + */ + +_RADIX_SIZE_SCALE :: 29; +_log_bases :: [65]u32{ + 0, 0, 0x20000001, 0x14309399, 0x10000001, + 0xdc81a35, 0xc611924, 0xb660c9e, 0xaaaaaab, 0xa1849cd, + 0x9a209a9, 0x94004e1, 0x8ed19c2, 0x8a5ca7d, 0x867a000, + 0x830cee3, 0x8000001, 0x7d42d60, 0x7ac8b32, 0x7887847, + 0x7677349, 0x749131f, 0x72d0163, 0x712f657, 0x6fab5db, + 0x6e40d1b, 0x6ced0d0, 0x6badbde, 0x6a80e3b, 0x6964c19, + 0x6857d31, 0x6758c38, 0x6666667, 0x657fb21, 0x64a3b9f, + 0x63d1ab4, 0x6308c92, 0x624869e, 0x618ff47, 0x60dedea, + 0x6034ab0, 0x5f90e7b, 0x5ef32cb, 0x5e5b1b2, 0x5dc85c3, + 0x5d3aa02, 0x5cb19d9, 0x5c2d10f, 0x5bacbbf, 0x5b3064f, + 0x5ab7d68, 0x5a42df0, 0x59d1506, 0x5962ffe, 0x58f7c57, + 0x588f7bc, 0x582a000, 0x57c7319, 0x5766f1d, 0x5709243, + 0x56adad9, 0x565474d, 0x55fd61f, 0x55a85e8, 0x5555556, +}; + +/* + Characters used in radix conversions. +*/ +RADIX_TABLE := "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; +RADIX_TABLE_REVERSE := [RADIX_TABLE_REVERSE_SIZE]u8{ + 0x3e, 0xff, 0xff, 0xff, 0x3f, 0x00, 0x01, 0x02, 0x03, 0x04, /* +,-./01234 */ + 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, /* 56789:;<=> */ + 0xff, 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, /* ?@ABCDEFGH */ + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, /* IJKLMNOPQR */ + 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0xff, 0xff, /* STUVWXYZ[\ */ + 0xff, 0xff, 0xff, 0xff, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, /* ]^_`abcdef */ + 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, /* ghijklmnop */ + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, /* qrstuvwxyz */ +}; +RADIX_TABLE_REVERSE_SIZE :: 80; + +/* + Stores a bignum as a ASCII string in a given radix (2..64) + The buffer must be appropriately sized. This routine doesn't check. +*/ +_itoa_raw_full :: proc(a: ^Int, radix: i8, buffer: []u8, zero_terminate := false, allocator := context.allocator) -> (written: int, err: Error) { + assert_if_nil(a); + context.allocator = allocator; + + temp, denominator := &Int{}, &Int{}; + + if err = internal_copy(temp, a); err != nil { return 0, err; } + if err = internal_set(denominator, radix); err != nil { return 0, err; } + + available := len(buffer); + if zero_terminate { + available -= 1; + buffer[available] = 0; + } + + if a.sign == .Negative { + temp.sign = .Zero_or_Positive; + } + + remainder: DIGIT; + for { + if remainder, err = #force_inline internal_divmod(temp, temp, DIGIT(radix)); err != nil { + internal_destroy(temp, denominator); + return len(buffer) - available, err; + } + available -= 1; + buffer[available] = RADIX_TABLE[remainder]; + if temp.used == 0 { + break; + } + } + + if a.sign == .Negative { + available -= 1; + buffer[available] = '-'; + } + + internal_destroy(temp, denominator); + + /* + If we overestimated the size, we need to move the buffer left. + */ + written = len(buffer) - available; + if written < len(buffer) { + diff := len(buffer) - written; + mem.copy(&buffer[0], &buffer[diff], written); + } + return written, nil; +} \ No newline at end of file diff --git a/core/math/big/test.odin b/core/math/big/test.odin new file mode 100644 index 000000000..2846dac6d --- /dev/null +++ b/core/math/big/test.odin @@ -0,0 +1,369 @@ +//+ignore +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + An arbitrary precision mathematics implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. + + This file exports procedures for use with the test.py test suite. +*/ + +/* + TODO: Write tests for `internal_*` and test reusing parameters with the public implementations. +*/ + +import "core:runtime" +import "core:strings" + +PyRes :: struct { + res: cstring, + err: Error, +} + +@export test_initialize_constants :: proc "c" () -> (res: u64) { + context = runtime.default_context(); + return u64(initialize_constants()); +} + +@export test_error_string :: proc "c" (err: Error) -> (res: cstring) { + context = runtime.default_context(); + es := Error_String; + return strings.clone_to_cstring(es[err], context.temp_allocator); +} + +@export test_add :: proc "c" (a, b: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + aa, bb, sum := &Int{}, &Int{}, &Int{}; + defer internal_destroy(aa, bb, sum); + + if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":add:atoi(a):", err=err}; } + if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":add:atoi(b):", err=err}; } + if bb.used == 1 { + if err = #force_inline internal_add(sum, aa, bb.digit[0]); err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; } + } else { + if err = #force_inline internal_add(sum, aa, bb); err != nil { return PyRes{res=":add:add(sum,a,b):", err=err}; } + } + + r: cstring; + r, err = int_itoa_cstring(sum, 16, context.temp_allocator); + if err != nil { return PyRes{res=":add:itoa(sum):", err=err}; } + return PyRes{res = r, err = nil}; +} + +@export test_sub :: proc "c" (a, b: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + aa, bb, sum := &Int{}, &Int{}, &Int{}; + defer internal_destroy(aa, bb, sum); + + if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":sub:atoi(a):", err=err}; } + if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":sub:atoi(b):", err=err}; } + if bb.used == 1 { + if err = #force_inline internal_sub(sum, aa, bb.digit[0]); err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; } + } else { + if err = #force_inline internal_sub(sum, aa, bb); err != nil { return PyRes{res=":sub:sub(sum,a,b):", err=err}; } + } + + r: cstring; + r, err = int_itoa_cstring(sum, 16, context.temp_allocator); + if err != nil { return PyRes{res=":sub:itoa(sum):", err=err}; } + return PyRes{res = r, err = nil}; +} + +@export test_mul :: proc "c" (a, b: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + aa, bb, product := &Int{}, &Int{}, &Int{}; + defer internal_destroy(aa, bb, product); + + if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":mul:atoi(a):", err=err}; } + if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":mul:atoi(b):", err=err}; } + if err = #force_inline internal_mul(product, aa, bb); err != nil { return PyRes{res=":mul:mul(product,a,b):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(product, 16, context.temp_allocator); + if err != nil { return PyRes{res=":mul:itoa(product):", err=err}; } + return PyRes{res = r, err = nil}; +} + +@export test_sqr :: proc "c" (a: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + aa, square := &Int{}, &Int{}; + defer internal_destroy(aa, square); + + if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":sqr:atoi(a):", err=err}; } + if err = #force_inline internal_sqr(square, aa); err != nil { return PyRes{res=":sqr:sqr(square,a):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(square, 16, context.temp_allocator); + if err != nil { return PyRes{res=":sqr:itoa(square):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + NOTE(Jeroen): For simplicity, we don't return the quotient and the remainder, just the quotient. +*/ +@export test_div :: proc "c" (a, b: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + aa, bb, quotient := &Int{}, &Int{}, &Int{}; + defer internal_destroy(aa, bb, quotient); + + if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":div:atoi(a):", err=err}; } + if err = atoi(bb, string(b), 16); err != nil { return PyRes{res=":div:atoi(b):", err=err}; } + if err = #force_inline internal_div(quotient, aa, bb); err != nil { return PyRes{res=":div:div(quotient,a,b):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(quotient, 16, context.temp_allocator); + if err != nil { return PyRes{res=":div:itoa(quotient):", err=err}; } + return PyRes{res = r, err = nil}; +} + + +/* + res = log(a, base) +*/ +@export test_log :: proc "c" (a: cstring, base := DIGIT(2)) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + l: int; + + aa := &Int{}; + defer internal_destroy(aa); + + if err = atoi(aa, string(a), 16); err != nil { return PyRes{res=":log:atoi(a):", err=err}; } + if l, err = #force_inline internal_log(aa, base); err != nil { return PyRes{res=":log:log(a, base):", err=err}; } + + #force_inline internal_zero(aa); + aa.digit[0] = DIGIT(l) & _MASK; + aa.digit[1] = DIGIT(l) >> _DIGIT_BITS; + aa.used = 2; + clamp(aa); + + r: cstring; + r, err = int_itoa_cstring(aa, 16, context.temp_allocator); + if err != nil { return PyRes{res=":log:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = base^power +*/ +@export test_pow :: proc "c" (base: cstring, power := int(2)) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + dest, bb := &Int{}, &Int{}; + defer internal_destroy(dest, bb); + + if err = atoi(bb, string(base), 16); err != nil { return PyRes{res=":pow:atoi(base):", err=err}; } + if err = #force_inline internal_pow(dest, bb, power); err != nil { return PyRes{res=":pow:pow(dest, base, power):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(dest, 16, context.temp_allocator); + if err != nil { return PyRes{res=":log:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = sqrt(src) +*/ +@export test_sqrt :: proc "c" (source: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":sqrt:atoi(src):", err=err}; } + if err = #force_inline internal_sqrt(src, src); err != nil { return PyRes{res=":sqrt:sqrt(src):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":log:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = root_n(src, power) +*/ +@export test_root_n :: proc "c" (source: cstring, power: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":root_n:atoi(src):", err=err}; } + if err = #force_inline internal_root_n(src, src, power); err != nil { return PyRes{res=":root_n:root_n(src):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":root_n:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = shr_digit(src, digits) +*/ +@export test_shr_digit :: proc "c" (source: cstring, digits: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shr_digit:atoi(src):", err=err}; } + if err = #force_inline internal_shr_digit(src, digits); err != nil { return PyRes{res=":shr_digit:shr_digit(src):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":shr_digit:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = shl_digit(src, digits) +*/ +@export test_shl_digit :: proc "c" (source: cstring, digits: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shl_digit:atoi(src):", err=err}; } + if err = #force_inline internal_shl_digit(src, digits); err != nil { return PyRes{res=":shl_digit:shr_digit(src):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":shl_digit:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = shr(src, bits) +*/ +@export test_shr :: proc "c" (source: cstring, bits: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shr:atoi(src):", err=err}; } + if err = #force_inline internal_shr(src, src, bits); err != nil { return PyRes{res=":shr:shr(src, bits):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":shr:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = shr_signed(src, bits) +*/ +@export test_shr_signed :: proc "c" (source: cstring, bits: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shr_signed:atoi(src):", err=err}; } + if err = #force_inline internal_shr_signed(src, src, bits); err != nil { return PyRes{res=":shr_signed:shr_signed(src, bits):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":shr_signed:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = shl(src, bits) +*/ +@export test_shl :: proc "c" (source: cstring, bits: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + src := &Int{}; + defer internal_destroy(src); + + if err = atoi(src, string(source), 16); err != nil { return PyRes{res=":shl:atoi(src):", err=err}; } + if err = #force_inline internal_shl(src, src, bits); err != nil { return PyRes{res=":shl:shl(src, bits):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(src, 16, context.temp_allocator); + if err != nil { return PyRes{res=":shl:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = factorial(n) +*/ +@export test_factorial :: proc "c" (n: int) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + dest := &Int{}; + defer internal_destroy(dest); + + if err = #force_inline internal_int_factorial(dest, n); err != nil { return PyRes{res=":factorial:factorial(n):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(dest, 16, context.temp_allocator); + if err != nil { return PyRes{res=":factorial:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = gcd(a, b) +*/ +@export test_gcd :: proc "c" (a, b: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + ai, bi, dest := &Int{}, &Int{}, &Int{}; + defer internal_destroy(ai, bi, dest); + + if err = atoi(ai, string(a), 16); err != nil { return PyRes{res=":gcd:atoi(a):", err=err}; } + if err = atoi(bi, string(b), 16); err != nil { return PyRes{res=":gcd:atoi(b):", err=err}; } + if err = #force_inline internal_int_gcd_lcm(dest, nil, ai, bi); err != nil { return PyRes{res=":gcd:gcd(a, b):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(dest, 16, context.temp_allocator); + if err != nil { return PyRes{res=":gcd:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + +/* + dest = lcm(a, b) +*/ +@export test_lcm :: proc "c" (a, b: cstring) -> (res: PyRes) { + context = runtime.default_context(); + err: Error; + + ai, bi, dest := &Int{}, &Int{}, &Int{}; + defer internal_destroy(ai, bi, dest); + + if err = atoi(ai, string(a), 16); err != nil { return PyRes{res=":lcm:atoi(a):", err=err}; } + if err = atoi(bi, string(b), 16); err != nil { return PyRes{res=":lcm:atoi(b):", err=err}; } + if err = #force_inline internal_int_gcd_lcm(nil, dest, ai, bi); err != nil { return PyRes{res=":lcm:lcm(a, b):", err=err}; } + + r: cstring; + r, err = int_itoa_cstring(dest, 16, context.temp_allocator); + if err != nil { return PyRes{res=":lcm:itoa(res):", err=err}; } + return PyRes{res = r, err = nil}; +} + diff --git a/core/math/big/test.py b/core/math/big/test.py new file mode 100644 index 000000000..f9398389c --- /dev/null +++ b/core/math/big/test.py @@ -0,0 +1,668 @@ +from ctypes import * +from random import * +import math +import os +import platform +import time +import gc +from enum import Enum +import argparse + + +parser = argparse.ArgumentParser( + description = "Odin core:math/big test suite", + epilog = "By default we run regression and random tests with preset parameters.", + formatter_class = argparse.ArgumentDefaultsHelpFormatter, +) +# +# Normally, we report the number of passes and fails. With this option set, we exit at first fail. +# +parser.add_argument( + "-exit-on-fail", + help = "Exit when a test fails", + action = "store_true", +) +# +# We skip randomized tests altogether if this is set. +# + +no_random = parser.add_mutually_exclusive_group() + +no_random.add_argument( + "-no-random", + help = "No random tests", + action = "store_true", +) +# +# Normally we run a given number of cycles on each test. +# Timed tests budget 1 second per 20_000 bits instead. +# +# For timed tests we budget a second per `n` bits and iterate until we hit that time. +# + +timed_or_fast = no_random.add_mutually_exclusive_group() + +timed_or_fast.add_argument( + "-timed", + type = bool, + default = False, + help = "Timed tests instead of a preset number of iterations.", +) +parser.add_argument( + "-timed-bits", + type = int, + metavar = "BITS", + default = 20_000, + help = "Timed tests. Every `BITS` worth of input is given a second of running time.", +) +# +# For normal tests (non-timed), `-fast-tests` cuts down on the number of iterations. +# +timed_or_fast.add_argument( + "-fast-tests", + help = "Cut down on the number of iterations of each test", + action = "store_true", +) + +args = parser.parse_args() + +# +# How many iterations of each random test do we want to run? +# +BITS_AND_ITERATIONS = [ + ( 120, 10_000), + ( 1_200, 1_000), + ( 4_096, 100), + (12_000, 10), +] + +if args.fast_tests: + for k in range(len(BITS_AND_ITERATIONS)): + b, i = BITS_AND_ITERATIONS[k] + BITS_AND_ITERATIONS[k] = (b, i // 10 if i >= 100 else 5) + +if args.no_random: + BITS_AND_ITERATIONS = [] + +# +# Where is the DLL? If missing, build using: `odin build . -build-mode:shared` +# +if platform.system() == "Windows": + LIB_PATH = os.getcwd() + os.sep + "big.dll" +elif platform.system() == "Linux": + LIB_PATH = os.getcwd() + os.sep + "big.so" +elif platform.system() == "Darwin": + LIB_PATH = os.getcwd() + os.sep + "big.dylib" +else: + print("Platform is unsupported.") + exit(1) + + +TOTAL_TIME = 0 +UNTIL_TIME = 0 +UNTIL_ITERS = 0 + +def we_iterate(): + if args.timed: + return TOTAL_TIME < UNTIL_TIME + else: + global UNTIL_ITERS + UNTIL_ITERS -= 1 + return UNTIL_ITERS != -1 + +# +# Error enum values +# +class Error(Enum): + Okay = 0 + Out_Of_Memory = 1 + Invalid_Pointer = 2 + Invalid_Argument = 3 + Unknown_Error = 4 + Max_Iterations_Reached = 5 + Buffer_Overflow = 6 + Integer_Overflow = 7 + Division_by_Zero = 8 + Math_Domain_Error = 9 + Unimplemented = 127 + +# +# Disable garbage collection +# +gc.disable() + +# +# Set up exported procedures +# + +try: + l = cdll.LoadLibrary(LIB_PATH) +except: + print("Couldn't find or load " + LIB_PATH + ".") + exit(1) + +def load(export_name, args, res): + export_name.argtypes = args + export_name.restype = res + return export_name + +# +# Result values will be passed in a struct { res: cstring, err: Error } +# +class Res(Structure): + _fields_ = [("res", c_char_p), ("err", c_uint64)] + +initialize_constants = load(l.test_initialize_constants, [], c_uint64) +initialize_constants() + +error_string = load(l.test_error_string, [c_byte], c_char_p) + +add = load(l.test_add, [c_char_p, c_char_p], Res) +sub = load(l.test_sub, [c_char_p, c_char_p], Res) +mul = load(l.test_mul, [c_char_p, c_char_p], Res) +sqr = load(l.test_sqr, [c_char_p ], Res) +div = load(l.test_div, [c_char_p, c_char_p], Res) + +# Powers and such +int_log = load(l.test_log, [c_char_p, c_longlong], Res) +int_pow = load(l.test_pow, [c_char_p, c_longlong], Res) +int_sqrt = load(l.test_sqrt, [c_char_p ], Res) +int_root_n = load(l.test_root_n, [c_char_p, c_longlong], Res) + +# Logical operations + +int_shl_digit = load(l.test_shl_digit, [c_char_p, c_longlong], Res) +int_shr_digit = load(l.test_shr_digit, [c_char_p, c_longlong], Res) +int_shl = load(l.test_shl, [c_char_p, c_longlong], Res) +int_shr = load(l.test_shr, [c_char_p, c_longlong], Res) +int_shr_signed = load(l.test_shr_signed, [c_char_p, c_longlong], Res) + +int_factorial = load(l.test_factorial, [c_uint64], Res) +int_gcd = load(l.test_gcd, [c_char_p, c_char_p], Res) +int_lcm = load(l.test_lcm, [c_char_p, c_char_p], Res) + +def test(test_name: "", res: Res, param=[], expected_error = Error.Okay, expected_result = "", radix=16): + passed = True + r = None + err = Error(res.err) + + if err != expected_error: + error_loc = res.res.decode('utf-8') + error = "{}: {} in '{}'".format(test_name, err, error_loc) + + if len(param): + error += " with params {}".format(param) + + print(error, flush=True) + passed = False + elif err == Error.Okay: + r = None + try: + r = res.res.decode('utf-8') + r = int(res.res, radix) + except: + pass + + if r != expected_result: + error = "{}: Result was '{}', expected '{}'".format(test_name, r, expected_result) + if len(param): + error += " with params {}".format(param) + + print(error, flush=True) + passed = False + + if args.exit_on_fail and not passed: exit(res.err) + + return passed + +def arg_to_odin(a): + if a >= 0: + s = hex(a)[2:] + else: + s = '-' + hex(a)[3:] + return s.encode('utf-8') + +def test_add(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), arg_to_odin(b)] + res = add(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = a + b + return test("test_add", res, [a, b], expected_error, expected_result) + +def test_sub(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), arg_to_odin(b)] + res = sub(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = a - b + return test("test_sub", res, [a, b], expected_error, expected_result) + +def test_mul(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), arg_to_odin(b)] + try: + res = mul(*args) + except OSError as e: + print("{} while trying to multiply {} x {}.".format(e, a, b)) + if EXIT_ON_FAIL: exit(3) + return False + + expected_result = None + if expected_error == Error.Okay: + expected_result = a * b + return test("test_mul", res, [a, b], expected_error, expected_result) + +def test_sqr(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a)] + try: + res = sqr(*args) + except OSError as e: + print("{} while trying to square {} x {}.".format(e, a)) + if EXIT_ON_FAIL: exit(3) + return False + + expected_result = None + if expected_error == Error.Okay: + expected_result = a * a + return test("test_sqr", res, [a], expected_error, expected_result) + +def test_div(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), arg_to_odin(b)] + res = div(*args) + expected_result = None + if expected_error == Error.Okay: + # + # We don't round the division results, so if one component is negative, we're off by one. + # + if a < 0 and b > 0: + expected_result = int(-(abs(a) // b)) + elif b < 0 and a > 0: + expected_result = int(-(a // abs((b)))) + else: + expected_result = a // b if b != 0 else None + return test("test_div", res, [a, b], expected_error, expected_result) + + +def test_log(a = 0, base = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), base] + res = int_log(*args) + + expected_result = None + if expected_error == Error.Okay: + expected_result = int(math.log(a, base)) + return test("test_log", res, [a, base], expected_error, expected_result) + +def test_pow(base = 0, power = 0, expected_error = Error.Okay): + args = [arg_to_odin(base), power] + res = int_pow(*args) + + expected_result = None + if expected_error == Error.Okay: + if power < 0: + expected_result = 0 + else: + # NOTE(Jeroen): Don't use `math.pow`, it's a floating point approximation. + # Use built-in `pow` or `a**b` instead. + expected_result = pow(base, power) + return test("test_pow", res, [base, power], expected_error, expected_result) + +def test_sqrt(number = 0, expected_error = Error.Okay): + args = [arg_to_odin(number)] + try: + res = int_sqrt(*args) + except OSError as e: + print("{} while trying to sqrt {}.".format(e, number)) + if EXIT_ON_FAIL: exit(3) + return False + + expected_result = None + if expected_error == Error.Okay: + if number < 0: + expected_result = 0 + else: + expected_result = int(math.isqrt(number)) + return test("test_sqrt", res, [number], expected_error, expected_result) + +def root_n(number, root): + u, s = number, number + 1 + while u < s: + s = u + t = (root-1) * s + number // pow(s, root - 1) + u = t // root + return s + +def test_root_n(number = 0, root = 0, expected_error = Error.Okay): + args = [arg_to_odin(number), root] + res = int_root_n(*args) + expected_result = None + if expected_error == Error.Okay: + if number < 0: + expected_result = 0 + else: + expected_result = root_n(number, root) + + return test("test_root_n", res, [number, root], expected_error, expected_result) + +def test_shl_digit(a = 0, digits = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), digits] + res = int_shl_digit(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = a << (digits * 60) + return test("test_shl_digit", res, [a, digits], expected_error, expected_result) + +def test_shr_digit(a = 0, digits = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), digits] + res = int_shr_digit(*args) + expected_result = None + if expected_error == Error.Okay: + if a < 0: + # Don't pass negative numbers. We have a shr_signed. + return False + else: + expected_result = a >> (digits * 60) + + return test("test_shr_digit", res, [a, digits], expected_error, expected_result) + +def test_shl(a = 0, bits = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), bits] + res = int_shl(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = a << bits + return test("test_shl", res, [a, bits], expected_error, expected_result) + +def test_shr(a = 0, bits = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), bits] + res = int_shr(*args) + expected_result = None + if expected_error == Error.Okay: + if a < 0: + # Don't pass negative numbers. We have a shr_signed. + return False + else: + expected_result = a >> bits + + return test("test_shr", res, [a, bits], expected_error, expected_result) + +def test_shr_signed(a = 0, bits = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), bits] + res = int_shr_signed(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = a >> bits + + return test("test_shr_signed", res, [a, bits], expected_error, expected_result) + +def test_factorial(n = 0, expected_error = Error.Okay): + args = [n] + res = int_factorial(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = math.factorial(n) + + return test("test_factorial", res, [n], expected_error, expected_result) + +def test_gcd(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), arg_to_odin(b)] + res = int_gcd(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = math.gcd(a, b) + + return test("test_gcd", res, [a, b], expected_error, expected_result) + +def test_lcm(a = 0, b = 0, expected_error = Error.Okay): + args = [arg_to_odin(a), arg_to_odin(b)] + res = int_lcm(*args) + expected_result = None + if expected_error == Error.Okay: + expected_result = math.lcm(a, b) + + return test("test_lcm", res, [a, b], expected_error, expected_result) + +# TODO(Jeroen): Make sure tests cover edge cases, fast paths, and so on. +# +# The last two arguments in tests are the expected error and expected result. +# +# The expected error defaults to None. +# By default the Odin implementation will be tested against the Python one. +# You can override that by supplying an expected result as the last argument instead. + +TESTS = { + test_add: [ + [ 1234, 5432], + ], + test_sub: [ + [ 1234, 5432], + ], + test_mul: [ + [ 1234, 5432], + [ 0xd3b4e926aaba3040e1c12b5ea553b5, 0x1a821e41257ed9281bee5bc7789ea7 ], + ], + test_sqr: [ + [ 5432], + [ 0xd3b4e926aaba3040e1c12b5ea553b5 ], + ], + test_div: [ + [ 54321, 12345], + [ 55431, 0, Error.Division_by_Zero], + [ 12980742146337069150589594264770969721, 4611686018427387904 ], + [ 831956404029821402159719858789932422, 243087903122332132 ], + ], + test_log: [ + [ 3192, 1, Error.Invalid_Argument], + [ -1234, 2, Error.Math_Domain_Error], + [ 0, 2, Error.Math_Domain_Error], + [ 1024, 2], + ], + test_pow: [ + [ 0, -1, Error.Math_Domain_Error ], # Math + [ 0, 0 ], # 1 + [ 0, 2 ], # 0 + [ 42, -1,], # 0 + [ 42, 1 ], # 1 + [ 42, 0 ], # 42 + [ 42, 2 ], # 42*42 + ], + test_sqrt: [ + [ -1, Error.Invalid_Argument, ], + [ 42, Error.Okay, ], + [ 12345678901234567890, Error.Okay, ], + [ 1298074214633706907132624082305024, Error.Okay, ], + [ 686885735734829009541949746871140768343076607029752932751182108475420900392874228486622313727012705619148037570309621219533087263900443932890792804879473795673302686046941536636874184361869252299636701671980034458333859202703255467709267777184095435235980845369829397344182319113372092844648570818726316581751114346501124871729572474923695509057166373026411194094493240101036672016770945150422252961487398124677567028263059046193391737576836378376192651849283925197438927999526058932679219572030021792914065825542626400207956134072247020690107136531852625253942429167557531123651471221455967386267137846791963149859804549891438562641323068751514370656287452006867713758971418043865298618635213551059471668293725548570452377976322899027050925842868079489675596835389444833567439058609775325447891875359487104691935576723532407937236505941186660707032433807075470656782452889754501872408562496805517394619388777930253411467941214807849472083814447498068636264021405175653742244368865090604940094889189800007448083930490871954101880815781177612910234741529950538835837693870921008635195545246771593130784786737543736434086434015200264933536294884482218945403958647118802574342840790536176272341586020230110889699633073513016344826709214, Error.Okay, ], + ], + test_root_n: [ + [ 1298074214633706907132624082305024, 2, Error.Okay, ], + ], + test_shl_digit: [ + [ 3192, 1 ], + [ 1298074214633706907132624082305024, 2 ], + [ 1024, 3 ], + ], + test_shr_digit: [ + [ 3680125442705055547392, 1 ], + [ 1725436586697640946858688965569256363112777243042596638790631055949824, 2 ], + [ 219504133884436710204395031992179571, 2 ], + ], + test_shl: [ + [ 3192, 1 ], + [ 1298074214633706907132624082305024, 2 ], + [ 1024, 3 ], + ], + test_shr: [ + [ 3680125442705055547392, 1 ], + [ 1725436586697640946858688965569256363112777243042596638790631055949824, 2 ], + [ 219504133884436710204395031992179571, 2 ], + ], + test_shr_signed: [ + [ -611105530635358368578155082258244262, 12 ], + [ -149195686190273039203651143129455, 12 ], + [ 611105530635358368578155082258244262, 12 ], + [ 149195686190273039203651143129455, 12 ], + ], + test_factorial: [ + [ 6_000 ], # Regular factorial, see cutoff in common.odin. + [ 12_345 ], # Binary split factorial + ], + test_gcd: [ + [ 23, 25, ], + [ 125, 25, ], + [ 125, 0, ], + [ 0, 0, ], + [ 0, 125,], + ], + test_lcm: [ + [ 23, 25,], + [ 125, 25, ], + [ 125, 0, ], + [ 0, 0, ], + [ 0, 125,], + ], +} + +if not args.fast_tests: + TESTS[test_factorial].append( + # This one on its own takes around 800ms, so we exclude it for FAST_TESTS + [ 100_000 ], + ) + +total_passes = 0 +total_failures = 0 + +# +# test_shr_signed also tests shr, so we're not going to test shr randomly. +# +RANDOM_TESTS = [ + test_add, test_sub, test_mul, test_sqr, test_div, + test_log, test_pow, test_sqrt, test_root_n, + test_shl_digit, test_shr_digit, test_shl, test_shr_signed, + test_gcd, test_lcm, +] +SKIP_LARGE = [ + test_pow, test_root_n, # test_gcd, +] +SKIP_LARGEST = [] + +# Untimed warmup. +for test_proc in TESTS: + for t in TESTS[test_proc]: + res = test_proc(*t) + +if __name__ == '__main__': + print("\n---- math/big tests ----") + print() + + for test_proc in TESTS: + count_pass = 0 + count_fail = 0 + TIMINGS = {} + for t in TESTS[test_proc]: + start = time.perf_counter() + res = test_proc(*t) + diff = time.perf_counter() - start + TOTAL_TIME += diff + + if test_proc not in TIMINGS: + TIMINGS[test_proc] = diff + else: + TIMINGS[test_proc] += diff + + if res: + count_pass += 1 + total_passes += 1 + else: + count_fail += 1 + total_failures += 1 + + print("{name}: {count_pass:,} passes and {count_fail:,} failures in {timing:.3f} ms.".format(name=test_proc.__name__, count_pass=count_pass, count_fail=count_fail, timing=TIMINGS[test_proc] * 1_000)) + + for BITS, ITERATIONS in BITS_AND_ITERATIONS: + print() + print("---- math/big with two random {bits:,} bit numbers ----".format(bits=BITS)) + print() + + # + # We've already tested up to the 10th root. + # + TEST_ROOT_N_PARAMS = [2, 3, 4, 5, 6] + + for test_proc in RANDOM_TESTS: + if BITS > 1_200 and test_proc in SKIP_LARGE: continue + if BITS > 4_096 and test_proc in SKIP_LARGEST: continue + + count_pass = 0 + count_fail = 0 + TIMINGS = {} + + UNTIL_ITERS = ITERATIONS + if test_proc == test_root_n and BITS == 1_200: + UNTIL_ITERS /= 10 + + UNTIL_TIME = TOTAL_TIME + BITS / args.timed_bits + # We run each test for a second per 20k bits + + index = 0 + + while we_iterate(): + a = randint(-(1 << BITS), 1 << BITS) + b = randint(-(1 << BITS), 1 << BITS) + + if test_proc == test_div: + # We've already tested division by zero above. + bits = int(BITS * 0.6) + b = randint(-(1 << bits), 1 << bits) + if b == 0: + b == 42 + elif test_proc == test_log: + # We've already tested log's domain errors. + a = randint(1, 1 << BITS) + b = randint(2, 1 << 60) + elif test_proc == test_pow: + b = randint(1, 10) + elif test_proc == test_sqrt: + a = randint(1, 1 << BITS) + b = Error.Okay + elif test_proc == test_root_n: + a = randint(1, 1 << BITS) + b = TEST_ROOT_N_PARAMS[index] + index = (index + 1) % len(TEST_ROOT_N_PARAMS) + elif test_proc == test_shl_digit: + b = randint(0, 10); + elif test_proc == test_shr_digit: + a = abs(a) + b = randint(0, 10); + elif test_proc == test_shl: + b = randint(0, min(BITS, 120)); + elif test_proc == test_shr_signed: + b = randint(0, min(BITS, 120)); + else: + b = randint(0, 1 << BITS) + + res = None + + start = time.perf_counter() + res = test_proc(a, b) + diff = time.perf_counter() - start + + TOTAL_TIME += diff + + if test_proc not in TIMINGS: + TIMINGS[test_proc] = diff + else: + TIMINGS[test_proc] += diff + + if res: + count_pass += 1; total_passes += 1 + else: + count_fail += 1; total_failures += 1 + + print("{name}: {count_pass:,} passes and {count_fail:,} failures in {timing:.3f} ms.".format(name=test_proc.__name__, count_pass=count_pass, count_fail=count_fail, timing=TIMINGS[test_proc] * 1_000)) + + print() + print("---- THE END ----") + print() + print("total: {count_pass:,} passes and {count_fail:,} failures in {timing:.3f} ms.".format(count_pass=total_passes, count_fail=total_failures, timing=TOTAL_TIME * 1_000)) + + if total_failures: + exit(1) \ No newline at end of file diff --git a/core/math/big/tune.odin b/core/math/big/tune.odin new file mode 100644 index 000000000..a51c04a78 --- /dev/null +++ b/core/math/big/tune.odin @@ -0,0 +1,80 @@ +//+ignore +package math_big + +/* + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-2 license. + + A BigInt implementation in Odin. + For the theoretical underpinnings, see Knuth's The Art of Computer Programming, Volume 2, section 4.3. + The code started out as an idiomatic source port of libTomMath, which is in the public domain, with thanks. +*/ + +import "core:fmt" +import "core:time" + +Category :: enum { + itoa, + atoi, + factorial, + factorial_bin, + choose, + lsb, + ctz, + sqr, + bitfield_extract, +}; + +Event :: struct { + ticks: time.Duration, + count: int, + cycles: u64, +} +Timings := [Category]Event{}; + +print_timings :: proc() { + duration :: proc(d: time.Duration) -> (res: string) { + switch { + case d < time.Microsecond: + return fmt.tprintf("%v ns", time.duration_nanoseconds(d)); + case d < time.Millisecond: + return fmt.tprintf("%v µs", time.duration_microseconds(d)); + case: + return fmt.tprintf("%v ms", time.duration_milliseconds(d)); + } + } + + for v in Timings { + if v.count > 0 { + fmt.println("\nTimings:"); + break; + } + } + + for v, i in Timings { + if v.count > 0 { + avg_ticks := time.Duration(f64(v.ticks) / f64(v.count)); + avg_cycles := f64(v.cycles) / f64(v.count); + + fmt.printf("\t%v: %s / %v cycles (avg), %s / %v cycles (total, %v calls)\n", i, duration(avg_ticks), avg_cycles, duration(v.ticks), v.cycles, v.count); + } + } +} + +@(deferred_in_out=_SCOPE_END) +SCOPED_TIMING :: #force_inline proc(c: Category) -> (ticks: time.Tick, cycles: u64) { + cycles = time.read_cycle_counter(); + ticks = time.tick_now(); + return; +} +_SCOPE_END :: #force_inline proc(c: Category, ticks: time.Tick, cycles: u64) { + cycles_now := time.read_cycle_counter(); + ticks_now := time.tick_now(); + + Timings[c].ticks = time.tick_diff(ticks, ticks_now); + Timings[c].cycles = cycles_now - cycles; + Timings[c].count += 1; +} +SCOPED_COUNT_ADD :: #force_inline proc(c: Category, count: int) { + Timings[c].count += count; +} diff --git a/core/math/rand/rand.odin b/core/math/rand/rand.odin index f5558bb8c..812cdc53d 100644 --- a/core/math/rand/rand.odin +++ b/core/math/rand/rand.odin @@ -95,7 +95,7 @@ int63_max :: proc(n: i64, r: ^Rand = nil) -> i64 { int127_max :: proc(n: i128, r: ^Rand = nil) -> i128 { if n <= 0 { - panic("Invalid argument to int63_max"); + panic("Invalid argument to int127_max"); } if n&(n-1) == 0 { return int127(r) & (n-1);