Merge branch 'master' into syscall-fix

This commit is contained in:
gingerBill
2024-07-19 11:43:56 +01:00
committed by GitHub
653 changed files with 66514 additions and 22263 deletions

View File

@@ -2,147 +2,163 @@ name: CI
on: [push, pull_request, workflow_dispatch]
jobs:
build_linux:
name: Ubuntu Build, Check, and Test
build_netbsd:
name: NetBSD Build, Check, and Test
runs-on: ubuntu-latest
env:
PKGSRC_BRANCH: 2024Q1
steps:
- uses: actions/checkout@v4
- name: Build, Check, and Test
timeout-minutes: 15
uses: vmactions/netbsd-vm@v1
with:
release: "10.0"
envs: PKGSRC_BRANCH
usesh: true
copyback: false
prepare: |
PKG_PATH="https://cdn.NetBSD.org/pub/pkgsrc/packages/NetBSD/$(uname -p)/$(uname -r | cut -d_ -f1)_${PKGSRC_BRANCH}/All" /usr/sbin/pkg_add pkgin
pkgin -y in gmake git bash python311
pkgin -y in libxml2 perl zstd
/usr/sbin/pkg_add https://github.com/andreas-jonsson/llvm17-netbsd-bin/releases/download/pkgsrc-current/llvm-17.0.6.tgz
/usr/sbin/pkg_add https://github.com/andreas-jonsson/llvm17-netbsd-bin/releases/download/pkgsrc-current/clang-17.0.6.tgz
ln -s /usr/pkg/bin/python3.11 /usr/bin/python3
run: |
git config --global --add safe.directory $(pwd)
gmake release
./odin version
./odin report
gmake -C vendor/stb/src
gmake -C vendor/cgltf/src
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
(cd tests/issues; ./run.sh)
build_freebsd:
name: FreeBSD Build, Check, and Test
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Download LLVM
- uses: actions/checkout@v4
- name: Build, Check, and Test
timeout-minutes: 15
uses: vmactions/freebsd-vm@v1
with:
usesh: true
copyback: false
prepare: |
pkg install -y gmake git bash python3 libxml2 llvm17
run: |
# `set -e` is needed for test failures to register. https://github.com/vmactions/freebsd-vm/issues/72
set -e -x
git config --global --add safe.directory $(pwd)
gmake release
./odin version
./odin report
gmake -C vendor/stb/src
gmake -C vendor/cgltf/src
gmake -C vendor/miniaudio/src
./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/core/speed.odin -file -all-packages -o:speed -define:ODIN_TEST_FANCY=false
./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
(cd tests/issues; ./run.sh)
ci:
strategy:
fail-fast: false
matrix:
# MacOS 13 runs on Intel, 14 runs on ARM
os: [ubuntu-latest, macos-13, macos-14]
runs-on: ${{ matrix.os }}
name: ${{ matrix.os == 'macos-14' && 'MacOS ARM' || (matrix.os == 'macos-13' && 'MacOS Intel' || 'Ubuntu') }} Build, Check, and Test
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Download LLVM (Linux)
if: matrix.os == 'ubuntu-latest'
run: |
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 17
echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
- name: build odin
run: ./build_odin.sh release
- name: Odin version
run: ./odin version
timeout-minutes: 1
- name: Odin report
run: ./odin report
timeout-minutes: 1
- name: Odin check
run: ./odin check examples/demo -vet
timeout-minutes: 10
- name: Odin run
run: ./odin run examples/demo
timeout-minutes: 10
- name: Odin run -debug
run: ./odin run examples/demo -debug
timeout-minutes: 10
- name: Odin check examples/all
run: ./odin check examples/all -strict-style
timeout-minutes: 10
- name: Core library tests
run: |
cd tests/core
make
timeout-minutes: 10
- name: Vendor library tests
run: |
cd tests/vendor
make
timeout-minutes: 10
- name: Odin internals tests
run: |
cd tests/internal
make
timeout-minutes: 10
- name: Odin check examples/all for Linux i386
run: ./odin check examples/all -vet -strict-style -target:linux_i386
timeout-minutes: 10
- name: Odin check examples/all for Linux arm64
run: ./odin check examples/all -vet -strict-style -target:linux_arm64
timeout-minutes: 10
- name: Odin check examples/all for FreeBSD amd64
run: ./odin check examples/all -vet -strict-style -target:freebsd_amd64
timeout-minutes: 10
- name: Odin check examples/all for OpenBSD amd64
run: ./odin check examples/all -vet -strict-style -target:openbsd_amd64
timeout-minutes: 10
build_macOS:
name: MacOS Build, Check, and Test
runs-on: macos-latest
steps:
- uses: actions/checkout@v1
- name: Download LLVM, and setup PATH
- name: Download LLVM (MacOS Intel)
if: matrix.os == 'macos-13'
run: |
brew install llvm@17
echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
- name: build odin
run: ./build_odin.sh release
- name: Odin version
run: ./odin version
timeout-minutes: 1
- name: Odin report
run: ./odin report
timeout-minutes: 1
- name: Odin check
run: ./odin check examples/demo -vet
timeout-minutes: 10
- name: Odin run
run: ./odin run examples/demo
timeout-minutes: 10
- name: Odin run -debug
run: ./odin run examples/demo -debug
timeout-minutes: 10
- name: Odin check examples/all
run: ./odin check examples/all -strict-style
timeout-minutes: 10
- name: Core library tests
- name: Download LLVM (MacOS ARM)
if: matrix.os == 'macos-14'
run: |
cd tests/core
make
timeout-minutes: 10
- name: Odin internals tests
run: |
cd tests/internal
make
timeout-minutes: 10
build_macOS_arm:
name: MacOS ARM Build, Check, and Test
runs-on: macos-14 # This is an arm/m1 runner.
steps:
- uses: actions/checkout@v1
- name: Download LLVM and setup PATH
run: |
brew install llvm@17
brew install llvm@17 wasmtime
echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
- name: build odin
- name: Build Odin
run: ./build_odin.sh release
- name: Odin version
run: ./odin version
timeout-minutes: 1
- name: Odin report
run: ./odin report
timeout-minutes: 1
- name: Compile needed Vendor
run: |
make -C vendor/stb/src
make -C vendor/cgltf/src
make -C vendor/miniaudio/src
- name: Odin check
run: ./odin check examples/demo -vet
timeout-minutes: 10
- name: Odin run
run: ./odin run examples/demo
timeout-minutes: 10
- name: Odin run -debug
run: ./odin run examples/demo -debug
timeout-minutes: 10
- name: Odin check examples/all
run: ./odin check examples/all -strict-style
timeout-minutes: 10
- name: Core library tests
- name: Normal Core library tests
run: ./odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
- name: Optimized Core library tests
run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
- name: Vendor library tests
run: ./odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
- name: Internals tests
run: ./odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
- name: Core library benchmarks
run: ./odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
- name: GitHub Issue tests
run: |
cd tests/core
make
timeout-minutes: 10
- name: Odin internals tests
cd tests/issues
./run.sh
- name: Odin check examples/all for Linux i386
run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_i386
if: matrix.os == 'ubuntu-latest'
- name: Odin check examples/all for Linux arm64
run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_arm64
if: matrix.os == 'ubuntu-latest'
- name: Odin check examples/all for FreeBSD amd64
run: ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
if: matrix.os == 'ubuntu-latest'
- name: Odin check examples/all for OpenBSD amd64
run: ./odin check examples/all -vet -strict-style -disallow-do -target:openbsd_amd64
if: matrix.os == 'ubuntu-latest'
- name: Run demo on WASI WASM32
run: |
cd tests/internal
make
timeout-minutes: 10
./odin build examples/demo -target:wasi_wasm32 -vet -strict-style -disallow-do -out:demo.wasm
wasmtime ./demo.wasm
if: matrix.os == 'macos-14'
build_windows:
name: Windows Build, Check, and Test
runs-on: windows-2022
timeout-minutes: 15
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: build Odin
shell: cmd
run: |
@@ -150,72 +166,67 @@ jobs:
./build.bat 1
- name: Odin version
run: ./odin version
timeout-minutes: 1
- name: Odin report
run: ./odin report
timeout-minutes: 1
- name: Odin check
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check examples/demo -vet
timeout-minutes: 10
- name: Odin run
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin run examples/demo
timeout-minutes: 10
- name: Odin run -debug
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin run examples/demo -debug
timeout-minutes: 10
- name: Odin check examples/all
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check examples/all -strict-style
timeout-minutes: 10
- name: Core library tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
cd tests\core
call build.bat
timeout-minutes: 10
odin test tests/core/normal.odin -file -all-packages -define:ODIN_TEST_FANCY=false
- name: Optimized core library tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin test tests/core/speed.odin -o:speed -file -all-packages -define:ODIN_TEST_FANCY=false
- name: Core library benchmarks
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin test tests/benchmark -all-packages -define:ODIN_TEST_FANCY=false
- name: Vendor library tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
cd tests\vendor
call build.bat
timeout-minutes: 10
odin test tests/vendor -all-packages -define:ODIN_TEST_FANCY=false
- name: Odin internals tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
cd tests\internal
call build.bat
timeout-minutes: 10
odin test tests/internal -all-packages -define:ODIN_TEST_FANCY=false
- name: Odin documentation tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
cd tests\documentation
rem call build.bat
timeout-minutes: 10
call build.bat
- name: core:math/big tests
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
cd tests\core\math\big
call build.bat
timeout-minutes: 10
- name: Odin check examples/all for Windows 32bits
shell: cmd
run: |
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
odin check examples/all -strict-style -target:windows_i386
timeout-minutes: 10

View File

@@ -11,7 +11,7 @@ jobs:
if: github.repository == 'odin-lang/Odin'
runs-on: windows-2022
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: build Odin
shell: cmd
run: |
@@ -45,13 +45,13 @@ jobs:
if: github.repository == 'odin-lang/Odin'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: (Linux) Download LLVM
run: |
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 17
echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
sudo ./llvm.sh 18
echo "/usr/lib/llvm-18/bin" >> $GITHUB_PATH
- name: build odin
run: make nightly
- name: Odin run
@@ -77,13 +77,13 @@ jobs:
build_macos:
name: MacOS Build
if: github.repository == 'odin-lang/Odin'
runs-on: macos-latest
runs-on: macos-13
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: Download LLVM and setup PATH
run: |
brew install llvm@17 dylibbundler
echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
brew install llvm@18 dylibbundler
echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH
- name: build odin
# These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
# not link with libunwind bundled with LLVM but link with libunwind on the system.
@@ -113,11 +113,11 @@ jobs:
if: github.repository == 'odin-lang/Odin'
runs-on: macos-14 # ARM machine
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: Download LLVM and setup PATH
run: |
brew install llvm@17 dylibbundler
echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
brew install llvm@18 dylibbundler
echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH
- name: build odin
# These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
# not link with libunwind bundled with LLVM but link with libunwind on the system.
@@ -146,16 +146,16 @@ jobs:
runs-on: [ubuntu-latest]
needs: [build_windows, build_macos, build_macos_arm, build_ubuntu]
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- uses: actions/setup-python@v2
with:
python-version: '3.8.x'
- name: Install B2 CLI
- name: Install B2 SDK
shell: bash
run: |
python -m pip install --upgrade pip
pip install --upgrade b2
pip install --upgrade b2sdk
- name: Display Python version
run: python -c "import sys; print(sys.version)"
@@ -188,24 +188,9 @@ jobs:
BUCKET: ${{ secrets.B2_BUCKET }}
DAYS_TO_KEEP: ${{ secrets.B2_DAYS_TO_KEEP }}
run: |
echo Authorizing B2 account
b2 authorize-account "$APPID" "$APPKEY"
echo Uploading artifcates to B2
chmod +x ./ci/upload_create_nightly.sh
./ci/upload_create_nightly.sh "$BUCKET" windows-amd64 windows_artifacts/
./ci/upload_create_nightly.sh "$BUCKET" ubuntu-amd64 ubuntu_artifacts/dist.zip
./ci/upload_create_nightly.sh "$BUCKET" macos-amd64 macos_artifacts/dist.zip
./ci/upload_create_nightly.sh "$BUCKET" macos-arm64 macos_arm_artifacts/dist.zip
echo Deleting old artifacts in B2
python3 ci/delete_old_binaries.py "$BUCKET" "$DAYS_TO_KEEP"
echo Creating nightly.json
python3 ci/create_nightly_json.py "$BUCKET" > nightly.json
echo Uploading nightly.json
b2 upload-file "$BUCKET" nightly.json nightly.json
echo Clear B2 account info
b2 clear-account
python3 ci/nightly.py artifact windows-amd64 windows_artifacts/
python3 ci/nightly.py artifact ubuntu-amd64 ubuntu_artifacts/dist.zip
python3 ci/nightly.py artifact macos-amd64 macos_artifacts/dist.zip
python3 ci/nightly.py artifact macos-arm64 macos_arm_artifacts/dist.zip
python3 ci/nightly.py prune
python3 ci/nightly.py json

35
.gitignore vendored
View File

@@ -24,34 +24,6 @@ bld/
![Cc]ore/[Ll]og/
tests/documentation/verify/
tests/documentation/all.odin-doc
tests/internal/test_map
tests/internal/test_pow
tests/internal/test_rtti
tests/core/test_core_compress
tests/core/test_core_container
tests/core/test_core_filepath
tests/core/test_core_fmt
tests/core/test_core_i18n
tests/core/test_core_image
tests/core/test_core_libc
tests/core/test_core_match
tests/core/test_core_math
tests/core/test_core_net
tests/core/test_core_os_exit
tests/core/test_core_reflect
tests/core/test_core_strings
tests/core/test_crypto
tests/core/test_hash
tests/core/test_hxa
tests/core/test_json
tests/core/test_linalg_glsl_math
tests/core/test_noise
tests/core/test_varint
tests/core/test_xml
tests/core/test_core_slice
tests/core/test_core_thread
tests/core/test_core_runtime
tests/vendor/vendor_botan
# Visual Studio 2015 cache/options directory
.vs/
# Visual Studio Code options directory
@@ -59,6 +31,7 @@ tests/vendor/vendor_botan
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
demo
benchmark
# MSTest test Results
[Tt]est[Rr]esult*/
@@ -299,7 +272,7 @@ bin/
# - Linux/MacOS
odin
!odin/
odin.dSYM
**/*.dSYM
*.bin
demo.bin
libLLVM*.so*
@@ -318,4 +291,6 @@ build.sh
!core/debug/
# RAD debugger project file
*.raddbg
*.raddbg
misc/featuregen/featuregen

Binary file not shown.

View File

@@ -126,3 +126,5 @@ clamp :: proc(value, minimum, maximum: T) -> T ---
soa_zip :: proc(slices: ...) -> #soa[]Struct ---
soa_unzip :: proc(value: $S/#soa[]$E) -> (slices: ...) ---
unreachable :: proc() -> ! ---

View File

@@ -38,9 +38,12 @@ count_leading_zeros :: proc(x: $T) -> T where type_is_integer(T) || type_is_sim
reverse_bits :: proc(x: $T) -> T where type_is_integer(T) || type_is_simd_vector(T) ---
byte_swap :: proc(x: $T) -> T where type_is_integer(T) || type_is_float(T) ---
overflow_add :: proc(lhs, rhs: $T) -> (T, bool) ---
overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) ---
overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) ---
overflow_add :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
overflow_sub :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
overflow_mul :: proc(lhs, rhs: $T) -> (T, bool) where type_is_integer(T) #optional_ok ---
add_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
sub_sat :: proc(lhs, rhs: $T) -> T where type_is_integer(T) ---
sqrt :: proc(x: $T) -> T where type_is_float(T) || (type_is_simd_vector(T) && type_is_float(type_elem_type(T))) ---
@@ -73,6 +76,8 @@ expect :: proc(val, expected_val: T) -> T ---
// Linux and Darwin Only
syscall :: proc(id: uintptr, args: ..uintptr) -> uintptr ---
// FreeBSD, NetBSD, et cetera
syscall_bsd :: proc(id: uintptr, args: ..uintptr) -> (uintptr, bool) ---
// Atomics
@@ -167,17 +172,23 @@ type_is_matrix :: proc($T: typeid) -> bool ---
type_has_nil :: proc($T: typeid) -> bool ---
type_is_matrix_row_major :: proc($T: typeid) -> bool where type_is_matrix(T) ---
type_is_matrix_column_major :: proc($T: typeid) -> bool where type_is_matrix(T) ---
type_is_specialization_of :: proc($T, $S: typeid) -> bool ---
type_is_variant_of :: proc($U, $V: typeid) -> bool where type_is_union(U) ---
type_union_tag_type :: proc($T: typeid) -> typeid where type_is_union(T) ---
type_union_tag_offset :: proc($T: typeid) -> uintptr where type_is_union(T) ---
type_union_base_tag_value :: proc($T: typeid) -> int where type_is_union(U) ---
type_union_variant_count :: proc($T: typeid) -> int where type_is_union(T) ---
type_variant_type_of :: proc($T: typeid, $index: int) -> typeid where type_is_union(T) ---
type_variant_index_of :: proc($U, $V: typeid) -> int where type_is_union(U) ---
type_is_variant_of :: proc($U, $V: typeid) -> bool where type_is_union(U) ---
type_union_tag_type :: proc($T: typeid) -> typeid where type_is_union(T) ---
type_union_tag_offset :: proc($T: typeid) -> uintptr where type_is_union(T) ---
type_union_base_tag_value :: proc($T: typeid) -> int where type_is_union(U) ---
type_union_variant_count :: proc($T: typeid) -> int where type_is_union(T) ---
type_variant_type_of :: proc($T: typeid, $index: int) -> typeid where type_is_union(T) ---
type_variant_index_of :: proc($U, $V: typeid) -> int where type_is_union(U) ---
type_has_field :: proc($T: typeid, $name: string) -> bool ---
type_bit_set_elem_type :: proc($T: typeid) -> typeid where type_is_bit_set(T) ---
type_bit_set_underlying_type :: proc($T: typeid) -> typeid where type_is_bit_set(T) ---
type_has_field :: proc($T: typeid, $name: string) -> bool ---
type_field_type :: proc($T: typeid, $name: string) -> typeid ---
type_proc_parameter_count :: proc($T: typeid) -> int where type_is_proc(T) ---
@@ -186,7 +197,8 @@ type_proc_return_count :: proc($T: typeid) -> int where type_is_proc(T) ---
type_proc_parameter_type :: proc($T: typeid, index: int) -> typeid where type_is_proc(T) ---
type_proc_return_type :: proc($T: typeid, index: int) -> typeid where type_is_proc(T) ---
type_struct_field_count :: proc($T: typeid) -> int where type_is_struct(T) ---
type_struct_field_count :: proc($T: typeid) -> int where type_is_struct(T) ---
type_struct_has_implicit_padding :: proc($T: typeid) -> bool where type_is_struct(T) ---
type_polymorphic_record_parameter_count :: proc($T: typeid) -> typeid ---
type_polymorphic_record_parameter_value :: proc($T: typeid, index: int) -> $V ---
@@ -282,6 +294,16 @@ simd_reverse :: proc(a: #simd[N]T) -> #simd[N]T ---
simd_rotate_left :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
simd_rotate_right :: proc(a: #simd[N]T, $offset: int) -> #simd[N]T ---
// Checks if the current target supports the given target features.
//
// Takes a constant comma-seperated string (eg: "sha512,sse4.1"), or a procedure type which has either
// `@(require_target_feature)` or `@(enable_target_feature)` as its input and returns a boolean indicating
// if all listed features are supported.
has_target_feature :: proc($test: $T) -> bool where type_is_string(T) || type_is_proc(T) ---
// Returns the value of the procedure where `x` must be a call expression
procedure_of :: proc(x: $T) -> T where type_is_proc(T) ---
// WASM targets only
wasm_memory_grow :: proc(index, delta: uintptr) -> int ---
@@ -293,9 +315,9 @@ wasm_memory_size :: proc(index: uintptr) -> int ---
// 0 - indicates that the thread blocked and then was woken up
// 1 - the loaded value from `ptr` did not match `expected`, the thread did not block
// 2 - the thread blocked, but the timeout
@(enable_target_feature="atomics")
@(require_target_feature="atomics")
wasm_memory_atomic_wait32 :: proc(ptr: ^u32, expected: u32, timeout_ns: i64) -> u32 ---
@(enable_target_feature="atomics")
@(require_target_feature="atomics")
wasm_memory_atomic_notify32 :: proc(ptr: ^u32, waiters: u32) -> (waiters_woken_up: u32) ---
// x86 Targets (i386, amd64)

View File

@@ -66,7 +66,7 @@ Type_Info_Named :: struct {
name: string,
base: ^Type_Info,
pkg: string,
loc: Source_Code_Location,
loc: ^Source_Code_Location,
}
Type_Info_Integer :: struct {signed: bool, endianness: Platform_Endianness}
Type_Info_Rune :: struct {}
@@ -112,23 +112,32 @@ Type_Info_Parameters :: struct { // Only used for procedures parameters and resu
}
Type_Info_Tuple :: Type_Info_Parameters // Will be removed eventually
Type_Info_Struct :: struct {
types: []^Type_Info,
names: []string,
offsets: []uintptr,
usings: []bool,
tags: []string,
is_packed: bool,
is_raw_union: bool,
is_no_copy: bool,
custom_align: bool,
Type_Info_Struct_Flags :: distinct bit_set[Type_Info_Struct_Flag; u8]
Type_Info_Struct_Flag :: enum u8 {
packed = 0,
raw_union = 1,
no_copy = 2,
align = 3,
}
equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
Type_Info_Struct :: struct {
// Slice these with `field_count`
types: [^]^Type_Info `fmt:"v,field_count"`,
names: [^]string `fmt:"v,field_count"`,
offsets: [^]uintptr `fmt:"v,field_count"`,
usings: [^]bool `fmt:"v,field_count"`,
tags: [^]string `fmt:"v,field_count"`,
field_count: i32,
flags: Type_Info_Struct_Flags,
// These are only set iff this structure is an SOA structure
soa_kind: Type_Info_Struct_Soa_Kind,
soa_len: i32,
soa_base_type: ^Type_Info,
soa_len: int,
equal: Equal_Proc, // set only when the struct has .Comparable set but does not have .Simple_Compare set
}
Type_Info_Union :: struct {
variants: []^Type_Info,
@@ -142,9 +151,9 @@ Type_Info_Union :: struct {
shared_nil: bool,
}
Type_Info_Enum :: struct {
base: ^Type_Info,
names: []string,
values: []Type_Info_Enum_Value,
base: ^Type_Info,
names: []string,
values: []Type_Info_Enum_Value,
}
Type_Info_Map :: struct {
key: ^Type_Info,
@@ -187,11 +196,12 @@ Type_Info_Soa_Pointer :: struct {
}
Type_Info_Bit_Field :: struct {
backing_type: ^Type_Info,
names: []string,
types: []^Type_Info,
bit_sizes: []uintptr,
bit_offsets: []uintptr,
tags: []string,
names: [^]string `fmt:"v,field_count"`,
types: [^]^Type_Info `fmt:"v,field_count"`,
bit_sizes: [^]uintptr `fmt:"v,field_count"`,
bit_offsets: [^]uintptr `fmt:"v,field_count"`,
tags: [^]string `fmt:"v,field_count"`,
field_count: int,
}
Type_Info_Flag :: enum u8 {
@@ -273,14 +283,14 @@ Typeid_Kind :: enum u8 {
}
#assert(len(Typeid_Kind) < 32)
// Typeid_Bit_Field :: bit_field #align(align_of(uintptr)) {
// index: 8*size_of(uintptr) - 8,
// kind: 5, // Typeid_Kind
// named: 1,
// special: 1, // signed, cstring, etc
// reserved: 1,
// }
// #assert(size_of(Typeid_Bit_Field) == size_of(uintptr));
Typeid_Bit_Field :: bit_field uintptr {
index: uintptr | 8*size_of(uintptr) - 8,
kind: Typeid_Kind | 5, // Typeid_Kind
named: bool | 1,
special: bool | 1, // signed, cstring, etc
reserved: bool | 1,
}
#assert(size_of(Typeid_Bit_Field) == size_of(uintptr))
// NOTE(bill): only the ones that are needed (not all types)
// This will be set by the compiler
@@ -299,6 +309,8 @@ when ODIN_OS == .Windows {
Thread_Detach = 3,
}
dll_forward_reason: DLL_Forward_Reason
dll_instance: rawptr
}
// IMPORTANT NOTE(bill): Must be in this order (as the compiler relies upon it)
@@ -397,11 +409,34 @@ Logger :: struct {
options: Logger_Options,
}
Random_Generator_Mode :: enum {
Read,
Reset,
Query_Info,
}
Random_Generator_Query_Info_Flag :: enum u32 {
Cryptographic,
Uniform,
External_Entropy,
Resettable,
}
Random_Generator_Query_Info :: distinct bit_set[Random_Generator_Query_Info_Flag; u32]
Random_Generator_Proc :: #type proc(data: rawptr, mode: Random_Generator_Mode, p: []byte)
Random_Generator :: struct {
procedure: Random_Generator_Proc,
data: rawptr,
}
Context :: struct {
allocator: Allocator,
temp_allocator: Allocator,
assertion_failure_proc: Assertion_Failure_Proc,
logger: Logger,
random_generator: Random_Generator,
user_ptr: rawptr,
user_index: int,
@@ -470,6 +505,15 @@ Raw_Soa_Pointer :: struct {
index: int,
}
Raw_Complex32 :: struct {real, imag: f16}
Raw_Complex64 :: struct {real, imag: f32}
Raw_Complex128 :: struct {real, imag: f64}
Raw_Quaternion64 :: struct {imag, jmag, kmag: f16, real: f16}
Raw_Quaternion128 :: struct {imag, jmag, kmag: f32, real: f32}
Raw_Quaternion256 :: struct {imag, jmag, kmag: f64, real: f64}
Raw_Quaternion64_Vector_Scalar :: struct {vector: [3]f16, scalar: f16}
Raw_Quaternion128_Vector_Scalar :: struct {vector: [3]f32, scalar: f32}
Raw_Quaternion256_Vector_Scalar :: struct {vector: [3]f64, scalar: f64}
/*
@@ -481,7 +525,9 @@ Raw_Soa_Pointer :: struct {
Linux,
Essence,
FreeBSD,
Haiku,
OpenBSD,
NetBSD,
WASI,
JS,
Freestanding,
@@ -508,6 +554,7 @@ Odin_Arch_Type :: type_of(ODIN_ARCH)
Odin_Build_Mode_Type :: enum int {
Executable,
Dynamic,
Static,
Object,
Assembly,
LLVM_IR,
@@ -548,6 +595,19 @@ Odin_Platform_Subtarget_Type :: type_of(ODIN_PLATFORM_SUBTARGET)
*/
Odin_Sanitizer_Flags :: type_of(ODIN_SANITIZER_FLAGS)
/*
// Defined internally by the compiler
Odin_Optimization_Mode :: enum int {
None = -1,
Minimal = 0,
Size = 1,
Speed = 2,
Aggressive = 3,
}
ODIN_OPTIMIZATION_MODE // is a constant
*/
Odin_Optimization_Mode :: type_of(ODIN_OPTIMIZATION_MODE)
/////////////////////////////
// Init Startup Procedures //
@@ -683,13 +743,16 @@ __init_context :: proc "contextless" (c: ^Context) {
c.logger.procedure = default_logger_proc
c.logger.data = nil
c.random_generator.procedure = default_random_generator_proc
c.random_generator.data = nil
}
default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code_Location) -> ! {
when ODIN_OS == .Freestanding {
// Do nothing
} else {
when !ODIN_DISABLE_ASSERT {
when ODIN_OS != .Orca && !ODIN_DISABLE_ASSERT {
print_caller_location(loc)
print_string(" ")
}
@@ -698,7 +761,18 @@ default_assertion_failure_proc :: proc(prefix, message: string, loc: Source_Code
print_string(": ")
print_string(message)
}
print_byte('\n')
when ODIN_OS == .Orca {
assert_fail(
cstring(raw_data(loc.file_path)),
cstring(raw_data(loc.procedure)),
loc.line,
"",
cstring(raw_data(orca_stderr_buffer[:orca_stderr_buffer_idx])),
)
} else {
print_byte('\n')
}
}
trap()
}

View File

@@ -65,7 +65,7 @@ copy :: proc{copy_slice, copy_from_string}
// with the old value, and reducing the length of the dynamic array by 1.
//
// Note: This is an O(1) operation.
// Note: If you the elements to remain in their order, use `ordered_remove`.
// Note: If you want the elements to remain in their order, use `ordered_remove`.
// Note: If the index is out of bounds, this procedure will panic.
@builtin
unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
@@ -79,7 +79,7 @@ unordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_loca
// `ordered_remove` removed the element at the specified `index` whilst keeping the order of the other elements.
//
// Note: This is an O(N) operation.
// Note: If you the elements do not have to remain in their order, prefer `unordered_remove`.
// Note: If the elements do not have to remain in their order, prefer `unordered_remove`.
// Note: If the index is out of bounds, this procedure will panic.
@builtin
ordered_remove :: proc(array: ^$D/[dynamic]$T, index: int, loc := #caller_location) #no_bounds_check {
@@ -163,21 +163,43 @@ pop_front_safe :: proc "contextless" (array: ^$T/[dynamic]$E) -> (res: E, ok: bo
// `clear` will set the length of a passed dynamic array or map to `0`
@builtin
clear :: proc{clear_dynamic_array, clear_map}
clear :: proc{
clear_dynamic_array,
clear_map,
clear_soa_dynamic_array,
}
// `reserve` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
@builtin
reserve :: proc{reserve_dynamic_array, reserve_map}
reserve :: proc{
reserve_dynamic_array,
reserve_map,
reserve_soa,
}
@builtin
non_zero_reserve :: proc{non_zero_reserve_dynamic_array}
non_zero_reserve :: proc{
non_zero_reserve_dynamic_array,
non_zero_reserve_soa,
}
// `resize` will try to resize memory of a passed dynamic array to the requested element count (setting the `len`, and possibly `cap`).
@builtin
resize :: proc{resize_dynamic_array}
resize :: proc{
resize_dynamic_array,
resize_soa,
}
@builtin
non_zero_resize :: proc{non_zero_resize_dynamic_array}
non_zero_resize :: proc{
non_zero_resize_dynamic_array,
non_zero_resize_soa,
}
// Shrinks the capacity of a dynamic array or map down to the current length, or the given capacity.
@builtin
@@ -268,7 +290,7 @@ new_clone :: proc(data: $T, allocator := context.allocator, loc := #caller_locat
return
}
DEFAULT_RESERVE_CAPACITY :: 16
DEFAULT_DYNAMIC_ARRAY_CAPACITY :: 8
@(require_results)
make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
@@ -295,7 +317,7 @@ make_slice :: proc($T: typeid/[]$E, #any_int len: int, allocator := context.allo
// Note: Prefer using the procedure group `make`.
@(builtin, require_results)
make_dynamic_array :: proc($T: typeid/[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
return make_dynamic_array_len_cap(T, 0, DEFAULT_RESERVE_CAPACITY, allocator, loc)
return make_dynamic_array_len_cap(T, 0, 0, allocator, loc)
}
// `make_dynamic_array_len` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
@@ -311,16 +333,23 @@ make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, alloca
// Note: Prefer using the procedure group `make`.
@(builtin, require_results)
make_dynamic_array_len_cap :: proc($T: typeid/[dynamic]$E, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
make_dynamic_array_error_loc(loc, len, cap)
array.allocator = allocator // initialize allocator before just in case it fails to allocate any memory
data := mem_alloc_bytes(size_of(E)*cap, align_of(E), allocator, loc) or_return
s := Raw_Dynamic_Array{raw_data(data), len, cap, allocator}
if data == nil && size_of(E) != 0 {
s.len, s.cap = 0, 0
}
array = transmute(T)s
err = _make_dynamic_array_len_cap((^Raw_Dynamic_Array)(&array), size_of(E), align_of(E), len, cap, allocator, loc)
return
}
@(require_results)
_make_dynamic_array_len_cap :: proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, #any_int len: int, #any_int cap: int, allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
make_dynamic_array_error_loc(loc, len, cap)
array.allocator = allocator // initialize allocator before just in case it fails to allocate any memory
data := mem_alloc_bytes(size_of_elem*cap, align_of_elem, allocator, loc) or_return
use_zero := data == nil && size_of_elem != 0
array.data = raw_data(data)
array.len = 0 if use_zero else len
array.cap = 0 if use_zero else cap
array.allocator = allocator
return
}
// `make_map` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
// Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
//
@@ -364,6 +393,11 @@ make :: proc{
make_dynamic_array_len_cap,
make_map,
make_multi_pointer,
make_soa_slice,
make_soa_dynamic_array,
make_soa_dynamic_array_len,
make_soa_dynamic_array_len_cap,
}
@@ -383,7 +417,7 @@ clear_map :: proc "contextless" (m: ^$T/map[$K]$V) {
//
// Note: Prefer the procedure group `reserve`
@builtin
reserve_map :: proc(m: ^$T/map[$K]$V, capacity: int, loc := #caller_location) -> Allocator_Error {
reserve_map :: proc(m: ^$T/map[$K]$V, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
return __dynamic_map_reserve((^Raw_Map)(m), map_info(T), uint(capacity), loc) if m != nil else nil
}
@@ -413,106 +447,103 @@ delete_key :: proc(m: ^$T/map[$K]$V, key: K) -> (deleted_key: K, deleted_value:
return
}
_append_elem :: #force_inline proc(array: ^$T/[dynamic]$E, arg: E, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
_append_elem :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, arg_ptr: rawptr, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
if array == nil {
return 0, nil
return
}
when size_of(E) == 0 {
array := (^Raw_Dynamic_Array)(array)
array.len += 1
return 1, nil
} else {
if cap(array) < len(array)+1 {
cap := 2 * cap(array) + max(8, 1)
// do not 'or_return' here as it could be a partial success
if should_zero {
err = reserve(array, cap, loc)
} else {
err = non_zero_reserve(array, cap, loc)
}
}
if cap(array)-len(array) > 0 {
a := (^Raw_Dynamic_Array)(array)
when size_of(E) != 0 {
data := ([^]E)(a.data)
assert(data != nil, loc=loc)
data[a.len] = arg
}
a.len += 1
return 1, err
}
return 0, err
if array.cap < array.len+1 {
// Same behavior as _append_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
cap := 2 * array.cap + DEFAULT_DYNAMIC_ARRAY_CAPACITY
// do not 'or_return' here as it could be a partial success
err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
}
if array.cap-array.len > 0 {
data := ([^]byte)(array.data)
assert(data != nil, loc=loc)
data = data[array.len*size_of_elem:]
intrinsics.mem_copy_non_overlapping(data, arg_ptr, size_of_elem)
array.len += 1
n = 1
}
return
}
@builtin
append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_elem(array, arg, true, loc=loc)
when size_of(E) == 0 {
(^Raw_Dynamic_Array)(array).len += 1
return 1, nil
} else {
arg := arg
return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, true, loc=loc)
}
}
@builtin
non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_elem(array, arg, false, loc=loc)
when size_of(E) == 0 {
(^Raw_Dynamic_Array)(array).len += 1
return 1, nil
} else {
arg := arg
return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, false, loc=loc)
}
}
_append_elems :: #force_inline proc(array: ^$T/[dynamic]$E, should_zero: bool, loc := #caller_location, args: ..E) -> (n: int, err: Allocator_Error) #optional_allocator_error {
_append_elems :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, should_zero: bool, loc := #caller_location, args: rawptr, arg_len: int) -> (n: int, err: Allocator_Error) #optional_allocator_error {
if array == nil {
return 0, nil
}
arg_len := len(args)
if arg_len <= 0 {
return 0, nil
}
when size_of(E) == 0 {
array := (^Raw_Dynamic_Array)(array)
array.len += arg_len
return arg_len, nil
} else {
if cap(array) < len(array)+arg_len {
cap := 2 * cap(array) + max(8, arg_len)
if array.cap < array.len+arg_len {
cap := 2 * array.cap + max(DEFAULT_DYNAMIC_ARRAY_CAPACITY, arg_len)
// do not 'or_return' here as it could be a partial success
if should_zero {
err = reserve(array, cap, loc)
} else {
err = non_zero_reserve(array, cap, loc)
}
}
arg_len = min(cap(array)-len(array), arg_len)
if arg_len > 0 {
a := (^Raw_Dynamic_Array)(array)
when size_of(E) != 0 {
data := ([^]E)(a.data)
assert(data != nil, loc=loc)
intrinsics.mem_copy(&data[a.len], raw_data(args), size_of(E) * arg_len)
}
a.len += arg_len
}
return arg_len, err
// do not 'or_return' here as it could be a partial success
err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
}
arg_len := arg_len
arg_len = min(array.cap-array.len, arg_len)
if arg_len > 0 {
data := ([^]byte)(array.data)
assert(data != nil, loc=loc)
data = data[array.len*size_of_elem:]
intrinsics.mem_copy(data, args, size_of_elem * arg_len) // must be mem_copy (overlapping)
array.len += arg_len
}
return arg_len, err
}
@builtin
append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_elems(array, true, loc, ..args)
when size_of(E) == 0 {
a := (^Raw_Dynamic_Array)(array)
a.len += len(args)
return len(args), nil
} else {
return _append_elems((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), true, loc, raw_data(args), len(args))
}
}
@builtin
non_zero_append_elems :: proc(array: ^$T/[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_elems(array, false, loc, ..args)
when size_of(E) == 0 {
a := (^Raw_Dynamic_Array)(array)
a.len += len(args)
return len(args), nil
} else {
return _append_elems((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), false, loc, raw_data(args), len(args))
}
}
// The append_string built-in procedure appends a string to the end of a [dynamic]u8 like type
_append_elem_string :: proc(array: ^$T/[dynamic]$E/u8, arg: $A/string, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
args := transmute([]E)arg
if should_zero {
return append_elems(array, ..args, loc=loc)
} else {
return non_zero_append_elems(array, ..args, loc=loc)
}
return _append_elems((^Raw_Dynamic_Array)(array), 1, 1, should_zero, loc, raw_data(arg), len(arg))
}
@builtin
@@ -540,8 +571,23 @@ append_string :: proc(array: ^$T/[dynamic]$E/u8, args: ..string, loc := #caller_
}
// The append built-in procedure appends elements to the end of a dynamic array
@builtin append :: proc{append_elem, append_elems, append_elem_string}
@builtin non_zero_append :: proc{non_zero_append_elem, non_zero_append_elems, non_zero_append_elem_string}
@builtin append :: proc{
append_elem,
append_elems,
append_elem_string,
append_soa_elem,
append_soa_elems,
}
@builtin non_zero_append :: proc{
non_zero_append_elem,
non_zero_append_elems,
non_zero_append_elem_string,
non_zero_append_soa_elem,
non_zero_append_soa_elems,
}
@builtin
@@ -636,7 +682,7 @@ assign_at_elem :: proc(array: ^$T/[dynamic]$E, index: int, arg: E, loc := #calle
@builtin
assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
assign_at_elems :: proc(array: ^$T/[dynamic]$E, index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
new_size := index + len(args)
if len(args) == 0 {
ok = true
@@ -686,11 +732,10 @@ clear_dynamic_array :: proc "contextless" (array: ^$T/[dynamic]$E) {
// `reserve_dynamic_array` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
//
// Note: Prefer the procedure group `reserve`.
_reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
if array == nil {
_reserve_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
if a == nil {
return nil
}
a := (^Raw_Dynamic_Array)(array)
if capacity <= a.cap {
return nil
@@ -701,15 +746,15 @@ _reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: i
}
assert(a.allocator.procedure != nil)
old_size := a.cap * size_of(E)
new_size := capacity * size_of(E)
old_size := a.cap * size_of_elem
new_size := capacity * size_of_elem
allocator := a.allocator
new_data: []byte
if should_zero {
new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
} else {
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
}
if new_data == nil && new_size > 0 {
return .Out_Of_Memory
@@ -721,27 +766,24 @@ _reserve_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, capacity: i
}
@builtin
reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_dynamic_array(array, capacity, true, loc)
reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), capacity, true, loc)
}
@builtin
non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_dynamic_array(array, capacity, false, loc)
non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), capacity, false, loc)
}
// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
//
// Note: Prefer the procedure group `resize`
_resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
if array == nil {
_resize_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
if a == nil {
return nil
}
a := (^Raw_Dynamic_Array)(array)
if length <= a.cap {
if should_zero && a.len < length {
intrinsics.mem_zero(([^]E)(a.data)[a.len:], (length-a.len)*size_of(E))
intrinsics.mem_zero(([^]byte)(a.data)[a.len*size_of_elem:], (length-a.len)*size_of_elem)
}
a.len = max(length, 0)
return nil
@@ -752,15 +794,15 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
}
assert(a.allocator.procedure != nil)
old_size := a.cap * size_of(E)
new_size := length * size_of(E)
old_size := a.cap * size_of_elem
new_size := length * size_of_elem
allocator := a.allocator
new_data : []byte
if should_zero {
new_data = mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
} else {
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of(E), allocator, loc) or_return
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
}
if new_data == nil && new_size > 0 {
return .Out_Of_Memory
@@ -772,14 +814,17 @@ _resize_dynamic_array :: #force_inline proc(array: ^$T/[dynamic]$E, length: int,
return nil
}
// `resize_dynamic_array` will try to resize memory of a passed dynamic array or map to the requested element count (setting the `len`, and possibly `cap`).
//
// Note: Prefer the procedure group `resize`
@builtin
resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
return _resize_dynamic_array(array, length, true, loc=loc)
resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
return _resize_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), length, true, loc=loc)
}
@builtin
non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
return _resize_dynamic_array(array, length, false, loc=loc)
non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int length: int, loc := #caller_location) -> Allocator_Error {
return _resize_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), length, false, loc=loc)
}
/*
@@ -794,10 +839,13 @@ non_zero_resize_dynamic_array :: proc(array: ^$T/[dynamic]$E, length: int, loc :
Note: Prefer the procedure group `shrink`
*/
shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
if array == nil {
return _shrink_dynamic_array((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), new_cap, loc)
}
_shrink_dynamic_array :: proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, new_cap := -1, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
if a == nil {
return
}
a := (^Raw_Dynamic_Array)(array)
new_cap := new_cap if new_cap >= 0 else a.len
@@ -810,10 +858,10 @@ shrink_dynamic_array :: proc(array: ^$T/[dynamic]$E, new_cap := -1, loc := #call
}
assert(a.allocator.procedure != nil)
old_size := a.cap * size_of(E)
new_size := new_cap * size_of(E)
old_size := a.cap * size_of_elem
new_size := new_cap * size_of_elem
new_data := mem_resize(a.data, old_size, new_size, align_of(E), a.allocator, loc) or_return
new_data := mem_resize(a.data, old_size, new_size, align_of_elem, a.allocator, loc) or_return
a.data = raw_data(new_data)
a.len = min(new_cap, a.len)

View File

@@ -55,7 +55,7 @@ raw_soa_footer_slice :: proc(array: ^$T/#soa[]$E) -> (footer: ^Raw_SOA_Footer_Sl
if array == nil {
return nil
}
field_count := uintptr(intrinsics.type_struct_field_count(E))
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
footer = (^Raw_SOA_Footer_Slice)(uintptr(array) + field_count*size_of(rawptr))
return
}
@@ -64,12 +64,7 @@ raw_soa_footer_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) -> (footer: ^Ra
if array == nil {
return nil
}
field_count: uintptr
when intrinsics.type_is_array(E) {
field_count = len(E)
} else {
field_count = uintptr(intrinsics.type_struct_field_count(E))
}
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
footer = (^Raw_SOA_Footer_Dynamic_Array)(uintptr(array) + field_count*size_of(rawptr))
return
}
@@ -98,11 +93,11 @@ make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, alloc
ti = type_info_base(ti)
si := &ti.variant.(Type_Info_Struct)
field_count := uintptr(intrinsics.type_struct_field_count(E))
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
total_size := 0
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
total_size += type.size * length
total_size = align_forward_int(total_size, max_align)
}
@@ -126,7 +121,7 @@ make_soa_aligned :: proc($T: typeid/#soa[]$E, length: int, alignment: int, alloc
data := uintptr(&array)
offset := 0
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
offset = align_forward_int(offset, max_align)
@@ -147,7 +142,7 @@ make_soa_slice :: proc($T: typeid/#soa[]$E, length: int, allocator := context.al
@(builtin, require_results)
make_soa_dynamic_array :: proc($T: typeid/#soa[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
context.allocator = allocator
reserve_soa(&array, DEFAULT_RESERVE_CAPACITY, loc) or_return
reserve_soa(&array, 0, loc) or_return
return array, nil
}
@@ -187,8 +182,28 @@ resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_locat
return nil
}
@builtin
non_zero_resize_soa :: proc(array: ^$T/#soa[dynamic]$E, length: int, loc := #caller_location) -> Allocator_Error {
if array == nil {
return nil
}
non_zero_reserve_soa(array, length, loc) or_return
footer := raw_soa_footer(array)
footer.len = length
return nil
}
@builtin
reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_soa(array, capacity, true, loc)
}
@builtin
non_zero_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_location) -> Allocator_Error {
return _reserve_soa(array, capacity, false, loc)
}
_reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, zero_memory: bool, loc := #caller_location) -> Allocator_Error {
if array == nil {
return nil
}
@@ -213,12 +228,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
ti = type_info_base(ti)
si := &ti.variant.(Type_Info_Struct)
field_count: uintptr
when intrinsics.type_is_array(E) {
field_count = len(E)
} else {
field_count = uintptr(intrinsics.type_struct_field_count(E))
}
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
assert(footer.cap == old_cap)
old_size := 0
@@ -226,7 +236,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
max_align :: align_of(E)
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
old_size += type.size * old_cap
new_size += type.size * capacity
@@ -238,7 +248,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
old_data := (^rawptr)(array)^
new_bytes := array.allocator.procedure(
array.allocator.data, .Alloc, new_size, max_align,
array.allocator.data, .Alloc if zero_memory else .Alloc_Non_Zeroed, new_size, max_align,
nil, old_size, loc,
) or_return
new_data := raw_data(new_bytes)
@@ -249,7 +259,7 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
old_offset := 0
new_offset := 0
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
old_offset = align_forward_int(old_offset, max_align)
new_offset = align_forward_int(new_offset, max_align)
@@ -273,15 +283,26 @@ reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, loc := #caller_lo
return nil
}
@builtin
append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_soa_elem(array, true, arg, loc)
}
@builtin
non_zero_append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_soa_elem(array, false, arg, loc)
}
_append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, zero_memory: bool, #no_broadcast arg: E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
if array == nil {
return 0, nil
}
if cap(array) <= len(array) + 1 {
cap := 2 * cap(array) + 8
err = reserve_soa(array, cap, loc) // do not 'or_return' here as it could be a partial success
// Same behavior as append_soa_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
cap := 2 * cap(array) + DEFAULT_DYNAMIC_ARRAY_CAPACITY
err = _reserve_soa(array, cap, zero_memory, loc) // do not 'or_return' here as it could be a partial success
}
footer := raw_soa_footer(array)
@@ -290,12 +311,7 @@ append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_locat
ti := type_info_of(T)
ti = type_info_base(ti)
si := &ti.variant.(Type_Info_Struct)
field_count: uintptr
when intrinsics.type_is_array(E) {
field_count = len(E)
} else {
field_count = uintptr(intrinsics.type_struct_field_count(E))
}
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
data := (^rawptr)(array)^
@@ -307,7 +323,7 @@ append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_locat
max_align :: align_of(E)
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
soa_offset = align_forward_int(soa_offset, max_align)
item_offset = align_forward_int(item_offset, type.align)
@@ -326,7 +342,17 @@ append_soa_elem :: proc(array: ^$T/#soa[dynamic]$E, arg: E, loc := #caller_locat
}
@builtin
append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_soa_elems(array, true, args=args, loc=loc)
}
@builtin
non_zero_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, #no_broadcast args: ..E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
return _append_soa_elems(array, false, args=args, loc=loc)
}
_append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, zero_memory: bool, #no_broadcast args: []E, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
if array == nil {
return
}
@@ -337,8 +363,8 @@ append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_l
}
if cap(array) <= len(array)+arg_len {
cap := 2 * cap(array) + max(8, arg_len)
err = reserve_soa(array, cap, loc) // do not 'or_return' here as it could be a partial success
cap := 2 * cap(array) + max(DEFAULT_DYNAMIC_ARRAY_CAPACITY, arg_len)
err = _reserve_soa(array, cap, zero_memory, loc) // do not 'or_return' here as it could be a partial success
}
arg_len = min(cap(array)-len(array), arg_len)
@@ -347,7 +373,7 @@ append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_l
ti := type_info_of(typeid_of(T))
ti = type_info_base(ti)
si := &ti.variant.(Type_Info_Struct)
field_count := uintptr(intrinsics.type_struct_field_count(E))
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
data := (^rawptr)(array)^
@@ -358,7 +384,7 @@ append_soa_elems :: proc(array: ^$T/#soa[dynamic]$E, args: ..E, loc := #caller_l
max_align :: align_of(E)
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
soa_offset = align_forward_int(soa_offset, max_align)
item_offset = align_forward_int(item_offset, type.align)
@@ -389,7 +415,8 @@ append_soa :: proc{
delete_soa_slice :: proc(array: $T/#soa[]$E, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
when intrinsics.type_struct_field_count(E) != 0 {
field_count :: len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
when field_count != 0 {
array := array
ptr := (^rawptr)(&array)^
free(ptr, allocator, loc) or_return
@@ -398,7 +425,8 @@ delete_soa_slice :: proc(array: $T/#soa[]$E, allocator := context.allocator, loc
}
delete_soa_dynamic_array :: proc(array: $T/#soa[dynamic]$E, loc := #caller_location) -> Allocator_Error {
when intrinsics.type_struct_field_count(E) != 0 {
field_count :: len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
when field_count != 0 {
array := array
ptr := (^rawptr)(&array)^
footer := raw_soa_footer(&array)
@@ -416,7 +444,8 @@ delete_soa :: proc{
clear_soa_dynamic_array :: proc(array: ^$T/#soa[dynamic]$E) {
when intrinsics.type_struct_field_count(E) != 0 {
field_count :: len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
when field_count != 0 {
footer := raw_soa_footer(array)
footer.len = 0
}
@@ -438,12 +467,7 @@ into_dynamic_soa :: proc(array: $T/#soa[]$E) -> #soa[dynamic]E {
allocator = nil_allocator(),
}
field_count: uintptr
when intrinsics.type_is_array(E) {
field_count = len(E)
} else {
field_count = uintptr(intrinsics.type_struct_field_count(E))
}
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
array := array
dynamic_data := ([^]rawptr)(&d)[:field_count]
@@ -467,16 +491,11 @@ unordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #cal
ti = type_info_base(ti)
si := &ti.variant.(Type_Info_Struct)
field_count: uintptr
when intrinsics.type_is_array(E) {
field_count = len(E)
} else {
field_count = uintptr(intrinsics.type_struct_field_count(E))
}
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
data := uintptr(array)
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
offset := rawptr((^uintptr)(data)^ + uintptr(index*type.size))
final := rawptr((^uintptr)(data)^ + uintptr((len(array)-1)*type.size))
@@ -500,16 +519,11 @@ ordered_remove_soa :: proc(array: ^$T/#soa[dynamic]$E, index: int, loc := #calle
ti = type_info_base(ti)
si := &ti.variant.(Type_Info_Struct)
field_count: uintptr
when intrinsics.type_is_array(E) {
field_count = len(E)
} else {
field_count = uintptr(intrinsics.type_struct_field_count(E))
}
field_count := uintptr(len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E))
data := uintptr(array)
for i in 0..<field_count {
type := si.types[i].variant.(Type_Info_Pointer).elem
type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
offset := (^uintptr)(data)^ + uintptr(index*type.size)
length := type.size*(len(array) - index - 1)

View File

@@ -6,6 +6,9 @@ when ODIN_DEFAULT_TO_NIL_ALLOCATOR {
} else when ODIN_DEFAULT_TO_PANIC_ALLOCATOR {
default_allocator_proc :: panic_allocator_proc
default_allocator :: panic_allocator
} else when ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
default_allocator :: default_wasm_allocator
default_allocator_proc :: wasm_allocator_proc
} else {
default_allocator :: heap_allocator
default_allocator_proc :: heap_allocator_proc

View File

@@ -12,6 +12,8 @@ Memory_Block :: struct {
capacity: uint,
}
// NOTE: This is a growing arena that is only used for the default temp allocator.
// For your own growing arena needs, prefer `Arena` from `core:mem/virtual`.
Arena :: struct {
backing_allocator: Allocator,
curr_block: ^Memory_Block,

View File

@@ -1,7 +1,7 @@
package runtime
DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte)
NO_DEFAULT_TEMP_ALLOCATOR: bool : ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR
NO_DEFAULT_TEMP_ALLOCATOR: bool : ODIN_OS == .Freestanding || ODIN_DEFAULT_TO_NIL_ALLOCATOR
when NO_DEFAULT_TEMP_ALLOCATOR {
Default_Temp_Allocator :: struct {}

View File

@@ -157,7 +157,7 @@ __dynamic_map_get // dynamic map calls
__dynamic_map_set // dynamic map calls
## Dynamic literals ([dymamic]T and map[K]V) (can be disabled with -no-dynamic-literals)
## Dynamic literals ([dynamic]T and map[K]V) (can be disabled with -no-dynamic-literals)
__dynamic_array_reserve
__dynamic_array_append

View File

@@ -577,7 +577,7 @@ map_grow_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Inf
@(require_results)
map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
map_reserve_dynamic :: #force_no_inline proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uintptr, loc := #caller_location) -> Allocator_Error {
@(require_results)
ceil_log2 :: #force_inline proc "contextless" (x: uintptr) -> uintptr {
z := intrinsics.count_leading_zeros(x)
@@ -641,7 +641,7 @@ map_reserve_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_
@(require_results)
map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
map_shrink_dynamic :: #force_no_inline proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, loc := #caller_location) -> (did_shrink: bool, err: Allocator_Error) {
if m.allocator.procedure == nil {
m.allocator = context.allocator
}
@@ -688,7 +688,7 @@ map_shrink_dynamic :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_I
}
@(require_results)
map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
map_free_dynamic :: #force_no_inline proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_location) -> Allocator_Error {
ptr := rawptr(map_data(m))
size := int(map_total_allocation_size(uintptr(map_cap(m)), info))
err := mem_free_with_size(ptr, size, m.allocator, loc)
@@ -700,7 +700,7 @@ map_free_dynamic :: proc "odin" (m: Raw_Map, info: ^Map_Info, loc := #caller_loc
}
@(require_results)
map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
map_lookup_dynamic :: #force_no_inline proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (index: uintptr, ok: bool) {
if map_len(m) == 0 {
return 0, false
}
@@ -723,7 +723,7 @@ map_lookup_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info,
}
}
@(require_results)
map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
map_exists_dynamic :: #force_no_inline proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (ok: bool) {
if map_len(m) == 0 {
return false
}
@@ -749,7 +749,7 @@ map_exists_dynamic :: proc "contextless" (m: Raw_Map, #no_alias info: ^Map_Info,
@(require_results)
map_erase_dynamic :: #force_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
map_erase_dynamic :: #force_no_inline proc "contextless" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, k: uintptr) -> (old_k, old_v: uintptr, ok: bool) {
index := map_lookup_dynamic(m^, info, k) or_return
ks, vs, hs, _, _ := map_kvh_data_dynamic(m^, info)
hs[index] |= TOMBSTONE_MASK

View File

@@ -1,5 +1,5 @@
//+private
//+build linux, darwin, freebsd, openbsd, haiku
//+build linux, darwin, freebsd, openbsd, netbsd, haiku
//+no-instrumentation
package runtime

View File

@@ -6,15 +6,34 @@ package runtime
import "base:intrinsics"
when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
@(link_name="_start", linkage="strong", require, export)
_start :: proc "c" () {
context = default_context()
#force_no_inline _startup_runtime()
intrinsics.__entry_point()
when ODIN_OS == .Orca {
@(linkage="strong", require, export)
oc_on_init :: proc "c" () {
context = default_context()
#force_no_inline _startup_runtime()
intrinsics.__entry_point()
}
@(linkage="strong", require, export)
oc_on_terminate :: proc "c" () {
context = default_context()
#force_no_inline _cleanup_runtime()
}
} else {
@(link_name="_start", linkage="strong", require, export)
_start :: proc "c" () {
context = default_context()
when ODIN_OS == .WASI {
_wasi_setup_args()
}
#force_no_inline _startup_runtime()
intrinsics.__entry_point()
}
@(link_name="_end", linkage="strong", require, export)
_end :: proc "c" () {
context = default_context()
#force_no_inline _cleanup_runtime()
}
}
@(link_name="_end", linkage="strong", require, export)
_end :: proc "c" () {
context = default_context()
#force_no_inline _cleanup_runtime()
}
}
}

View File

@@ -10,8 +10,9 @@ when ODIN_BUILD_MODE == .Dynamic {
DllMain :: proc "system" (hinstDLL: rawptr, fdwReason: u32, lpReserved: rawptr) -> b32 {
context = default_context()
// Populate Windows DLL-specific global
// Populate Windows DLL-specific globals
dll_forward_reason = DLL_Forward_Reason(fdwReason)
dll_instance = hinstDLL
switch dll_forward_reason {
case .Process_Attach:

View File

@@ -4,6 +4,8 @@ package runtime
bounds_trap :: proc "contextless" () -> ! {
when ODIN_OS == .Windows {
windows_trap_array_bounds()
} else when ODIN_OS == .Orca {
abort_ext("", "", 0, "bounds trap")
} else {
trap()
}
@@ -13,6 +15,8 @@ bounds_trap :: proc "contextless" () -> ! {
type_assertion_trap :: proc "contextless" () -> ! {
when ODIN_OS == .Windows {
windows_trap_type_assertion()
} else when ODIN_OS == .Orca {
abort_ext("", "", 0, "type assertion trap")
} else {
trap()
}

View File

@@ -97,14 +97,14 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
}
heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
return _heap_alloc(size, zero_memory)
}
heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
return _heap_resize(ptr, new_size)
}
heap_free :: proc(ptr: rawptr) {
heap_free :: proc "contextless" (ptr: rawptr) {
_heap_free(ptr)
}

View File

@@ -0,0 +1,29 @@
//+build orca
//+private
package runtime
foreign {
@(link_name="malloc") _orca_malloc :: proc "c" (size: int) -> rawptr ---
@(link_name="calloc") _orca_calloc :: proc "c" (num, size: int) -> rawptr ---
@(link_name="free") _orca_free :: proc "c" (ptr: rawptr) ---
@(link_name="realloc") _orca_realloc :: proc "c" (ptr: rawptr, size: int) -> rawptr ---
}
_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
if size <= 0 {
return nil
}
if zero_memory {
return _orca_calloc(1, size)
} else {
return _orca_malloc(size)
}
}
_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
return _orca_realloc(ptr, new_size)
}
_heap_free :: proc "contextless" (ptr: rawptr) {
_orca_free(ptr)
}

View File

@@ -2,14 +2,17 @@
//+private
package runtime
_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
context = default_context()
unimplemented("base:runtime 'heap_alloc' procedure is not supported on this platform")
}
_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
context = default_context()
unimplemented("base:runtime 'heap_resize' procedure is not supported on this platform")
}
_heap_free :: proc(ptr: rawptr) {
_heap_free :: proc "contextless" (ptr: rawptr) {
context = default_context()
unimplemented("base:runtime 'heap_free' procedure is not supported on this platform")
}
}

View File

@@ -1,4 +1,4 @@
//+build linux, darwin, freebsd, openbsd, haiku
//+build linux, darwin, freebsd, openbsd, netbsd, haiku
//+private
package runtime
@@ -16,7 +16,7 @@ foreign libc {
@(link_name="realloc") _unix_realloc :: proc(ptr: rawptr, size: int) -> rawptr ---
}
_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
if size <= 0 {
return nil
}
@@ -27,12 +27,12 @@ _heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
}
}
_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
// NOTE: _unix_realloc doesn't guarantee new memory will be zeroed on
// POSIX platforms. Ensure your caller takes this into account.
return _unix_realloc(ptr, new_size)
}
_heap_free :: proc(ptr: rawptr) {
_heap_free :: proc "contextless" (ptr: rawptr) {
_unix_free(ptr)
}

View File

@@ -14,11 +14,11 @@ foreign kernel32 {
HeapFree :: proc(hHeap: rawptr, dwFlags: u32, lpMem: rawptr) -> b32 ---
}
_heap_alloc :: proc(size: int, zero_memory := true) -> rawptr {
_heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
HEAP_ZERO_MEMORY :: 0x00000008
return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
}
_heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
_heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
if new_size == 0 {
_heap_free(ptr)
return nil
@@ -30,7 +30,7 @@ _heap_resize :: proc(ptr: rawptr, new_size: int) -> rawptr {
HEAP_ZERO_MEMORY :: 0x00000008
return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
}
_heap_free :: proc(ptr: rawptr) {
_heap_free :: proc "contextless" (ptr: rawptr) {
if ptr == nil {
return
}

View File

@@ -1,3 +1,4 @@
//+vet !cast
package runtime
import "base:intrinsics"
@@ -29,7 +30,7 @@ is_power_of_two_int :: #force_inline proc "contextless" (x: int) -> bool {
return (x & (x-1)) == 0
}
align_forward_int :: #force_inline proc(ptr, align: int) -> int {
align_forward_int :: #force_inline proc "odin" (ptr, align: int) -> int {
assert(is_power_of_two_int(align))
p := ptr
@@ -40,6 +41,24 @@ align_forward_int :: #force_inline proc(ptr, align: int) -> int {
return p
}
is_power_of_two_uint :: #force_inline proc "contextless" (x: uint) -> bool {
if x <= 0 {
return false
}
return (x & (x-1)) == 0
}
align_forward_uint :: #force_inline proc "odin" (ptr, align: uint) -> uint {
assert(is_power_of_two_uint(align))
p := ptr
modulo := p & (align-1)
if modulo != 0 {
p += align - modulo
}
return p
}
is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool {
if x <= 0 {
return false
@@ -47,7 +66,7 @@ is_power_of_two_uintptr :: #force_inline proc "contextless" (x: uintptr) -> bool
return (x & (x-1)) == 0
}
align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr {
align_forward_uintptr :: #force_inline proc "odin" (ptr, align: uintptr) -> uintptr {
assert(is_power_of_two_uintptr(align))
p := ptr
@@ -58,6 +77,18 @@ align_forward_uintptr :: #force_inline proc(ptr, align: uintptr) -> uintptr {
return p
}
is_power_of_two :: proc {
is_power_of_two_int,
is_power_of_two_uint,
is_power_of_two_uintptr,
}
align_forward :: proc {
align_forward_int,
align_forward_uint,
align_forward_uintptr,
}
mem_zero :: proc "contextless" (data: rawptr, len: int) -> rawptr {
if data == nil {
return nil
@@ -453,7 +484,7 @@ quaternion256_ne :: #force_inline proc "contextless" (a, b: quaternion256) -> bo
string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int) {
// NOTE(bill): Duplicated here to remove dependency on package unicode/utf8
@static accept_sizes := [256]u8{
@(static, rodata) accept_sizes := [256]u8{
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x00-0x0f
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x10-0x1f
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x20-0x2f
@@ -474,7 +505,7 @@ string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int
}
Accept_Range :: struct {lo, hi: u8}
@static accept_ranges := [5]Accept_Range{
@(static, rodata) accept_ranges := [5]Accept_Range{
{0x80, 0xbf},
{0xa0, 0xbf},
{0x80, 0x9f},
@@ -612,21 +643,24 @@ abs_quaternion256 :: #force_inline proc "contextless" (x: quaternion256) -> f64
quo_complex32 :: proc "contextless" (n, m: complex32) -> complex32 {
e, f: f16
nr, ni := f32(real(n)), f32(imag(n))
mr, mi := f32(real(m)), f32(imag(m))
if abs(real(m)) >= abs(imag(m)) {
ratio := imag(m) / real(m)
denom := real(m) + ratio*imag(m)
e = (real(n) + imag(n)*ratio) / denom
f = (imag(n) - real(n)*ratio) / denom
e, f: f32
if abs(mr) >= abs(mi) {
ratio := mi / mr
denom := mr + ratio*mi
e = (nr + ni*ratio) / denom
f = (ni - nr*ratio) / denom
} else {
ratio := real(m) / imag(m)
denom := imag(m) + ratio*real(m)
e = (real(n)*ratio + imag(n)) / denom
f = (imag(n)*ratio - real(n)) / denom
ratio := mr / mi
denom := mi + ratio*mr
e = (nr*ratio + ni) / denom
f = (ni*ratio - nr) / denom
}
return complex(e, f)
return complex(f16(e), f16(f))
}
@@ -667,15 +701,15 @@ quo_complex128 :: proc "contextless" (n, m: complex128) -> complex128 {
}
mul_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
t0 := r0*q0 - r1*q1 - r2*q2 - r3*q3
t1 := r0*q1 + r1*q0 - r2*q3 + r3*q2
t2 := r0*q2 + r1*q3 + r2*q0 - r3*q1
t3 := r0*q3 - r1*q2 + r2*q1 + r3*q0
return quaternion(w=t0, x=t1, y=t2, z=t3)
return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
}
mul_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
@@ -703,8 +737,8 @@ mul_quaternion256 :: proc "contextless" (q, r: quaternion256) -> quaternion256 {
}
quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
q0, q1, q2, q3 := real(q), imag(q), jmag(q), kmag(q)
r0, r1, r2, r3 := real(r), imag(r), jmag(r), kmag(r)
q0, q1, q2, q3 := f32(real(q)), f32(imag(q)), f32(jmag(q)), f32(kmag(q))
r0, r1, r2, r3 := f32(real(r)), f32(imag(r)), f32(jmag(r)), f32(kmag(r))
invmag2 := 1.0 / (r0*r0 + r1*r1 + r2*r2 + r3*r3)
@@ -713,7 +747,7 @@ quo_quaternion64 :: proc "contextless" (q, r: quaternion64) -> quaternion64 {
t2 := (r0*q2 - r1*q3 - r2*q0 + r3*q1) * invmag2
t3 := (r0*q3 + r1*q2 + r2*q1 - r3*q0) * invmag2
return quaternion(w=t0, x=t1, y=t2, z=t3)
return quaternion(w=f16(t0), x=f16(t1), y=f16(t2), z=f16(t3))
}
quo_quaternion128 :: proc "contextless" (q, r: quaternion128) -> quaternion128 {
@@ -801,6 +835,10 @@ truncsfhf2 :: proc "c" (value: f32) -> __float16 {
}
}
@(link_name="__aeabi_d2h", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
aeabi_d2h :: proc "c" (value: f64) -> __float16 {
return truncsfhf2(f32(value))
}
@(link_name="__truncdfhf2", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
truncdfhf2 :: proc "c" (value: f64) -> __float16 {
@@ -978,26 +1016,26 @@ modti3 :: proc "c" (a, b: i128) -> i128 {
bn := (b ~ s_b) - s_b
r: u128 = ---
_ = udivmod128(transmute(u128)an, transmute(u128)bn, &r)
return (transmute(i128)r ~ s_a) - s_a
_ = udivmod128(u128(an), u128(bn), &r)
return (i128(r) ~ s_a) - s_a
}
@(link_name="__divmodti4", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
divmodti4 :: proc "c" (a, b: i128, rem: ^i128) -> i128 {
u := udivmod128(transmute(u128)a, transmute(u128)b, cast(^u128)rem)
return transmute(i128)u
u := udivmod128(u128(a), u128(b), (^u128)(rem))
return i128(u)
}
@(link_name="__divti3", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
divti3 :: proc "c" (a, b: i128) -> i128 {
u := udivmodti4(transmute(u128)a, transmute(u128)b, nil)
return transmute(i128)u
u := udivmodti4(u128(a), u128(b), nil)
return i128(u)
}
@(link_name="__fixdfti", linkage=RUNTIME_LINKAGE, require=RUNTIME_REQUIRE)
fixdfti :: proc(a: u64) -> i128 {
fixdfti :: proc "c" (a: u64) -> i128 {
significandBits :: 52
typeWidth :: (size_of(u64)*8)
exponentBits :: (typeWidth - significandBits - 1)
@@ -1042,19 +1080,17 @@ fixdfti :: proc(a: u64) -> i128 {
__write_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
for i in 0..<size {
j := offset+i
the_bit := byte((src[i/8]) & (1<<(i&7)) != 0)
b := the_bit<<(j&7)
dst[j/8] &~= b
dst[j/8] |= b
the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
dst[j>>3] &~= 1<<(j&7)
dst[j>>3] |= the_bit<<(j&7)
}
}
__read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uintptr) {
for j in 0..<size {
i := offset+j
the_bit := byte((src[i/8]) & (1<<(i&7)) != 0)
b := the_bit<<(j&7)
dst[j/8] &~= b
dst[j/8] |= b
the_bit := byte((src[i>>3]) & (1<<(i&7)) != 0)
dst[j>>3] &~= 1<<(j&7)
dst[j>>3] |= the_bit<<(j&7)
}
}
}

View File

@@ -1,4 +1,4 @@
//+build freebsd, openbsd
//+build freebsd, openbsd, netbsd
//+private
package runtime
@@ -9,7 +9,11 @@ foreign libc {
@(link_name="write")
_unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int ---
__error :: proc() -> ^i32 ---
when ODIN_OS == .NetBSD {
@(link_name="__errno") __error :: proc() -> ^i32 ---
} else {
__error :: proc() -> ^i32 ---
}
}
_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {

View File

@@ -5,11 +5,24 @@ package runtime
import "base:intrinsics"
_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
WRITE :: 0x2000004
STDERR :: 2
ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
if ret < 0 {
return 0, _OS_Errno(-ret)
when ODIN_NO_CRT {
WRITE :: 0x2000004
ret := intrinsics.syscall(WRITE, STDERR, uintptr(raw_data(data)), uintptr(len(data)))
if ret < 0 {
return 0, _OS_Errno(-ret)
}
return int(ret), 0
} else {
foreign {
write :: proc(handle: i32, buffer: [^]byte, count: uint) -> int ---
__error :: proc() -> ^i32 ---
}
if ret := write(STDERR, raw_data(data), len(data)); ret >= 0 {
return int(ret), 0
}
return 0, _OS_Errno(__error()^)
}
return int(ret), 0
}

View File

@@ -0,0 +1,43 @@
//+build orca
//+private
package runtime
import "base:intrinsics"
// Constants allowing to specify the level of logging verbosity.
log_level :: enum u32 {
// Only errors are logged.
ERROR = 0,
// Only warnings and errors are logged.
WARNING = 1,
// All messages are logged.
INFO = 2,
COUNT = 3,
}
@(default_calling_convention="c", link_prefix="oc_")
foreign {
abort_ext :: proc(file: cstring, function: cstring, line: i32, fmt: cstring, #c_vararg args: ..any) -> ! ---
assert_fail :: proc(file: cstring, function: cstring, line: i32, src: cstring, fmt: cstring, #c_vararg args: ..any) -> ! ---
log_ext :: proc(level: log_level, function: cstring, file: cstring, line: i32, fmt: cstring, #c_vararg args: ..any) ---
}
// NOTE: This is all pretty gross, don't look.
// WASM is single threaded so this should be fine.
orca_stderr_buffer: [4096]byte
orca_stderr_buffer_idx: int
_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
for b in data {
orca_stderr_buffer[orca_stderr_buffer_idx] = b
orca_stderr_buffer_idx += 1
if b == '\n' || orca_stderr_buffer_idx == len(orca_stderr_buffer)-1 {
log_ext(.ERROR, "", "", 0, cstring(raw_data(orca_stderr_buffer[:orca_stderr_buffer_idx])))
orca_stderr_buffer_idx = 0
}
}
return len(data), 0
}

View File

@@ -2,10 +2,54 @@
//+private
package runtime
import "core:sys/wasm/wasi"
foreign import wasi "wasi_snapshot_preview1"
@(default_calling_convention="contextless")
foreign wasi {
fd_write :: proc(
fd: i32,
iovs: [][]byte,
n: ^uint,
) -> u16 ---
@(private="file")
args_sizes_get :: proc(
num_of_args: ^uint,
size_of_args: ^uint,
) -> u16 ---
@(private="file")
args_get :: proc(
argv: [^]cstring,
argv_buf: [^]byte,
) -> u16 ---
}
_stderr_write :: proc "contextless" (data: []byte) -> (int, _OS_Errno) {
data := (wasi.ciovec_t)(data)
n, err := wasi.fd_write(1, {data})
n: uint
err := fd_write(1, {data}, &n)
return int(n), _OS_Errno(err)
}
_wasi_setup_args :: proc() {
num_of_args, size_of_args: uint
if errno := args_sizes_get(&num_of_args, &size_of_args); errno != 0 {
return
}
err: Allocator_Error
if args__, err = make([]cstring, num_of_args); err != nil {
return
}
args_buf: []byte
if args_buf, err = make([]byte, size_of_args); err != nil {
delete(args__)
return
}
if errno := args_get(raw_data(args__), raw_data(args_buf)); errno != 0 {
delete(args__)
delete(args_buf)
}
}

View File

@@ -262,7 +262,7 @@ print_typeid :: #force_no_inline proc "contextless" (id: typeid) {
}
}
@(optimization_mode="size")
@(optimization_mode="favor_size")
print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
if ti == nil {
print_string("nil")
@@ -401,15 +401,16 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
}
print_string("struct ")
if info.is_packed { print_string("#packed ") }
if info.is_raw_union { print_string("#raw_union ") }
if info.custom_align {
if .packed in info.flags { print_string("#packed ") }
if .raw_union in info.flags { print_string("#raw_union ") }
if .no_copy in info.flags { print_string("#no_copy ") }
if .align in info.flags {
print_string("#align(")
print_u64(u64(ti.align))
print_string(") ")
}
print_byte('{')
for name, i in info.names {
for name, i in info.names[:info.field_count] {
if i > 0 { print_string(", ") }
print_string(name)
print_string(": ")
@@ -469,7 +470,7 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
print_string("bit_field ")
print_type(info.backing_type)
print_string(" {")
for name, i in info.names {
for name, i in info.names[:info.field_count] {
if i > 0 { print_string(", ") }
print_string(name)
print_string(": ")

View File

@@ -25,13 +25,19 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
RtlMoveMemory(dst, src, len)
return dst
}
} else when ODIN_NO_CRT || (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32) {
} else when ODIN_NO_CRT || (ODIN_OS != .Orca && (ODIN_ARCH == .wasm32 || ODIN_ARCH == .wasm64p32)) {
// NOTE: on wasm, calls to these procs are generated (by LLVM) with type `i32` instead of `int`.
//
// NOTE: `#any_int` is also needed, because calls that we generate (and package code)
// will be using `int` and need to be converted.
int_t :: i32 when ODIN_ARCH == .wasm64p32 else int
@(link_name="memset", linkage="strong", require)
memset :: proc "c" (ptr: rawptr, val: i32, len: int) -> rawptr {
memset :: proc "c" (ptr: rawptr, val: i32, #any_int len: int_t) -> rawptr {
if ptr != nil && len != 0 {
b := byte(val)
p := ([^]byte)(ptr)
for i := 0; i < len; i += 1 {
for i := int_t(0); i < len; i += 1 {
p[i] = b
}
}
@@ -39,10 +45,10 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
}
@(link_name="bzero", linkage="strong", require)
bzero :: proc "c" (ptr: rawptr, len: int) -> rawptr {
bzero :: proc "c" (ptr: rawptr, #any_int len: int_t) -> rawptr {
if ptr != nil && len != 0 {
p := ([^]byte)(ptr)
for i := 0; i < len; i += 1 {
for i := int_t(0); i < len; i += 1 {
p[i] = 0
}
}
@@ -50,7 +56,7 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
}
@(link_name="memmove", linkage="strong", require)
memmove :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
memmove :: proc "c" (dst, src: rawptr, #any_int len: int_t) -> rawptr {
d, s := ([^]byte)(dst), ([^]byte)(src)
if d == s || len == 0 {
return dst
@@ -63,7 +69,7 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
}
if s > d && uintptr(s)-uintptr(d) < uintptr(len) {
for i := 0; i < len; i += 1 {
for i := int_t(0); i < len; i += 1 {
d[i] = s[i]
}
return dst
@@ -71,10 +77,10 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
return memcpy(dst, src, len)
}
@(link_name="memcpy", linkage="strong", require)
memcpy :: proc "c" (dst, src: rawptr, len: int) -> rawptr {
memcpy :: proc "c" (dst, src: rawptr, #any_int len: int_t) -> rawptr {
d, s := ([^]byte)(dst), ([^]byte)(src)
if d != s {
for i := 0; i < len; i += 1 {
for i := int_t(0); i < len; i += 1 {
d[i] = s[i]
}
}
@@ -92,4 +98,4 @@ when ODIN_NO_CRT && ODIN_OS == .Windows {
}
return ptr
}
}
}

View File

@@ -0,0 +1,127 @@
package runtime
import "base:intrinsics"
@(require_results)
random_generator_read_bytes :: proc(rg: Random_Generator, p: []byte) -> bool {
if rg.procedure != nil {
rg.procedure(rg.data, .Read, p)
return true
}
return false
}
@(require_results)
random_generator_read_ptr :: proc(rg: Random_Generator, p: rawptr, len: uint) -> bool {
if rg.procedure != nil {
rg.procedure(rg.data, .Read, ([^]byte)(p)[:len])
return true
}
return false
}
@(require_results)
random_generator_query_info :: proc(rg: Random_Generator) -> (info: Random_Generator_Query_Info) {
if rg.procedure != nil {
rg.procedure(rg.data, .Query_Info, ([^]byte)(&info)[:size_of(info)])
}
return
}
random_generator_reset_bytes :: proc(rg: Random_Generator, p: []byte) {
if rg.procedure != nil {
rg.procedure(rg.data, .Reset, p)
}
}
random_generator_reset_u64 :: proc(rg: Random_Generator, p: u64) {
if rg.procedure != nil {
p := p
rg.procedure(rg.data, .Reset, ([^]byte)(&p)[:size_of(p)])
}
}
Default_Random_State :: struct {
state: u64,
inc: u64,
}
default_random_generator_proc :: proc(data: rawptr, mode: Random_Generator_Mode, p: []byte) {
@(require_results)
read_u64 :: proc "contextless" (r: ^Default_Random_State) -> u64 {
old_state := r.state
r.state = old_state * 6364136223846793005 + (r.inc|1)
xor_shifted := (((old_state >> 59) + 5) ~ old_state) * 12605985483714917081
rot := (old_state >> 59)
return (xor_shifted >> rot) | (xor_shifted << ((-rot) & 63))
}
@(thread_local)
global_rand_seed: Default_Random_State
init :: proc "contextless" (r: ^Default_Random_State, seed: u64) {
seed := seed
if seed == 0 {
seed = u64(intrinsics.read_cycle_counter())
}
r.state = 0
r.inc = (seed << 1) | 1
_ = read_u64(r)
r.state += seed
_ = read_u64(r)
}
r: ^Default_Random_State = ---
if data == nil {
r = &global_rand_seed
} else {
r = cast(^Default_Random_State)data
}
switch mode {
case .Read:
if r.state == 0 && r.inc == 0 {
init(r, 0)
}
switch len(p) {
case size_of(u64):
// Fast path for a 64-bit destination.
intrinsics.unaligned_store((^u64)(raw_data(p)), read_u64(r))
case:
// All other cases.
pos := i8(0)
val := u64(0)
for &v in p {
if pos == 0 {
val = read_u64(r)
pos = 7
}
v = byte(val)
val >>= 8
pos -= 1
}
}
case .Reset:
seed: u64
mem_copy_non_overlapping(&seed, raw_data(p), min(size_of(seed), len(p)))
init(r, seed)
case .Query_Info:
if len(p) != size_of(Random_Generator_Query_Info) {
return
}
info := (^Random_Generator_Query_Info)(raw_data(p))
info^ += {.Uniform, .Resettable}
}
}
default_random_generator :: proc "contextless" (state: ^Default_Random_State = nil) -> Random_Generator {
return {
procedure = default_random_generator_proc,
data = state,
}
}

View File

@@ -58,7 +58,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
return u128(n[high] >> _ctz(d[high]))
}
sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])))
sr = u32(i32(_clz(d[high])) - i32(_clz(n[high])))
if sr > U64_BITS - 2 {
if rem != nil {
rem^ = a
@@ -107,7 +107,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
r[low] = n[high] >> (sr - U64_BITS)
}
} else {
sr = transmute(u32)(i32(_clz(d[high])) - i32(_clz(n[high])))
sr = u32(i32(_clz(d[high])) - i32(_clz(n[high])))
if sr > U64_BITS - 1 {
if rem != nil {
@@ -143,7 +143,7 @@ udivmod128 :: proc "c" (a, b: u128, rem: ^u128) -> u128 {
r_all = transmute(u128)r
s := i128(b - r_all - 1) >> (U128_BITS - 1)
carry = u32(s & 1)
r_all -= b & transmute(u128)s
r_all -= b & u128(s)
r = transmute([2]u64)r_all
}

View File

@@ -0,0 +1,871 @@
//+build wasm32, wasm64p32
package runtime
import "base:intrinsics"
/*
Port of emmalloc, modified for use in Odin.
Invariants:
- Per-allocation header overhead is 8 bytes, smallest allocated payload
amount is 8 bytes, and a multiple of 4 bytes.
- Acquired memory blocks are subdivided into disjoint regions that lie
next to each other.
- A region is either in used or free.
Used regions may be adjacent, and a used and unused region
may be adjacent, but not two unused ones - they would be
merged.
- Memory allocation takes constant time, unless the alloc needs to wasm_memory_grow()
or memory is very close to being exhausted.
- Free and used regions are managed inside "root regions", which are slabs
of memory acquired via wasm_memory_grow().
- Memory retrieved using wasm_memory_grow() can not be given back to the OS.
Therefore, frees are internal to the allocator.
Copyright (c) 2010-2014 Emscripten authors, see AUTHORS file.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
WASM_Allocator :: struct #no_copy {
// The minimum alignment of allocations.
alignment: uint,
// A region that contains as payload a single forward linked list of pointers to
// root regions of each disjoint region blocks.
list_of_all_regions: ^Root_Region,
// For each of the buckets, maintain a linked list head node. The head node for each
// free region is a sentinel node that does not actually represent any free space, but
// the sentinel is used to avoid awkward testing against (if node == freeRegionHeadNode)
// when adding and removing elements from the linked list, i.e. we are guaranteed that
// the sentinel node is always fixed and there, and the actual free region list elements
// start at free_region_buckets[i].next each.
free_region_buckets: [NUM_FREE_BUCKETS]Region,
// A bitmask that tracks the population status for each of the 64 distinct memory regions:
// a zero at bit position i means that the free list bucket i is empty. This bitmask is
// used to avoid redundant scanning of the 64 different free region buckets: instead by
// looking at the bitmask we can find in constant time an index to a free region bucket
// that contains free memory of desired size.
free_region_buckets_used: BUCKET_BITMASK_T,
// Because wasm memory can only be allocated in pages of 64k at a time, we keep any
// spilled/unused bytes that are left from the allocated pages here, first using this
// when bytes are needed.
spill: []byte,
// Mutex for thread safety, only used if the target feature "atomics" is enabled.
mu: Mutex_State,
}
// Not required to be called, called on first allocation otherwise.
wasm_allocator_init :: proc(a: ^WASM_Allocator, alignment: uint = 8) {
assert(is_power_of_two(alignment), "alignment must be a power of two")
assert(alignment > 4, "alignment must be more than 4")
a.alignment = alignment
for i in 0..<NUM_FREE_BUCKETS {
a.free_region_buckets[i].next = &a.free_region_buckets[i]
a.free_region_buckets[i].prev = a.free_region_buckets[i].next
}
if !claim_more_memory(a, 3*size_of(Region)) {
panic("wasm_allocator: initial memory could not be allocated")
}
}
global_default_wasm_allocator_data: WASM_Allocator
default_wasm_allocator :: proc() -> Allocator {
return wasm_allocator(&global_default_wasm_allocator_data)
}
wasm_allocator :: proc(a: ^WASM_Allocator) -> Allocator {
return {
data = a,
procedure = wasm_allocator_proc,
}
}
wasm_allocator_proc :: proc(a: rawptr, mode: Allocator_Mode, size, alignment: int, old_memory: rawptr, old_size: int, loc := #caller_location) -> ([]byte, Allocator_Error) {
a := (^WASM_Allocator)(a)
if a == nil {
a = &global_default_wasm_allocator_data
}
if a.alignment == 0 {
wasm_allocator_init(a)
}
switch mode {
case .Alloc:
ptr := aligned_alloc(a, uint(alignment), uint(size), loc)
if ptr == nil {
return nil, .Out_Of_Memory
}
intrinsics.mem_zero(ptr, size)
return ([^]byte)(ptr)[:size], nil
case .Alloc_Non_Zeroed:
ptr := aligned_alloc(a, uint(alignment), uint(size), loc)
if ptr == nil {
return nil, .Out_Of_Memory
}
return ([^]byte)(ptr)[:size], nil
case .Resize:
ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc)
if ptr == nil {
return nil, .Out_Of_Memory
}
bytes := ([^]byte)(ptr)[:size]
if size > old_size {
new_region := raw_data(bytes[old_size:])
intrinsics.mem_zero(new_region, size - old_size)
}
return bytes, nil
case .Resize_Non_Zeroed:
ptr := aligned_realloc(a, old_memory, uint(alignment), uint(size), loc)
if ptr == nil {
return nil, .Out_Of_Memory
}
return ([^]byte)(ptr)[:size], nil
case .Free:
free(a, old_memory, loc)
return nil, nil
case .Free_All, .Query_Info:
return nil, .Mode_Not_Implemented
case .Query_Features:
set := (^Allocator_Mode_Set)(old_memory)
if set != nil {
set^ = {.Alloc, .Alloc_Non_Zeroed, .Free, .Resize, .Resize_Non_Zeroed, .Query_Features }
}
return nil, nil
}
unreachable()
}
// Returns the allocated size of the allocator (both free and used).
// If `nil` is given, the global allocator is used.
wasm_allocator_size :: proc(a: ^WASM_Allocator = nil) -> (size: uint) {
a := a
if a == nil {
a = &global_default_wasm_allocator_data
}
lock(a)
defer unlock(a)
root := a.list_of_all_regions
for root != nil {
size += uint(uintptr(root.end_ptr) - uintptr(root))
root = root.next
}
size += len(a.spill)
return
}
// Returns the amount of free memory on the allocator.
// If `nil` is given, the global allocator is used.
wasm_allocator_free_space :: proc(a: ^WASM_Allocator = nil) -> (free: uint) {
a := a
if a == nil {
a = &global_default_wasm_allocator_data
}
lock(a)
defer unlock(a)
bucket_index: u64 = 0
bucket_mask := a.free_region_buckets_used
for bucket_mask != 0 {
index_add := intrinsics.count_trailing_zeros(bucket_mask)
bucket_index += index_add
bucket_mask >>= index_add
for free_region := a.free_region_buckets[bucket_index].next; free_region != &a.free_region_buckets[bucket_index]; free_region = free_region.next {
free += free_region.size - REGION_HEADER_SIZE
}
bucket_index += 1
bucket_mask >>= 1
}
free += len(a.spill)
return
}
@(private="file")
NUM_FREE_BUCKETS :: 64
@(private="file")
BUCKET_BITMASK_T :: u64
// Dynamic memory is subdivided into regions, in the format
// <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | <size:u32> ..... <size:u32> | .....
// That is, at the bottom and top end of each memory region, the size of that region is stored. That allows traversing the
// memory regions backwards and forwards. Because each allocation must be at least a multiple of 4 bytes, the lowest two bits of
// each size field is unused. Free regions are distinguished by used regions by having the FREE_REGION_FLAG bit present
// in the size field. I.e. for free regions, the size field is odd, and for used regions, the size field reads even.
@(private="file")
FREE_REGION_FLAG :: 0x1
// Attempts to alloc more than this many bytes would cause an overflow when calculating the size of a region,
// therefore allocations larger than this are short-circuited immediately on entry.
@(private="file")
MAX_ALLOC_SIZE :: 0xFFFFFFC7
// A free region has the following structure:
// <size:uint> <prevptr> <nextptr> ... <size:uint>
@(private="file")
Region :: struct {
size: uint,
prev, next: ^Region,
_at_the_end_of_this_struct_size: uint,
}
// Each memory block starts with a Root_Region at the beginning.
// The Root_Region specifies the size of the region block, and forms a linked
// list of all Root_Regions in the program, starting with `list_of_all_regions`
// below.
@(private="file")
Root_Region :: struct {
size: u32,
next: ^Root_Region,
end_ptr: ^byte,
}
@(private="file")
Mutex_State :: enum u32 {
Unlocked = 0,
Locked = 1,
Waiting = 2,
}
@(private="file")
lock :: proc(a: ^WASM_Allocator) {
when intrinsics.has_target_feature("atomics") {
@(cold)
lock_slow :: proc(a: ^WASM_Allocator, curr_state: Mutex_State) {
new_state := curr_state // Make a copy of it
spin_lock: for spin in 0..<i32(100) {
state, ok := intrinsics.atomic_compare_exchange_weak_explicit(&a.mu, .Unlocked, new_state, .Acquire, .Consume)
if ok {
return
}
if state == .Waiting {
break spin_lock
}
for i := min(spin+1, 32); i > 0; i -= 1 {
intrinsics.cpu_relax()
}
}
// Set just in case 100 iterations did not do it
new_state = .Waiting
for {
if intrinsics.atomic_exchange_explicit(&a.mu, .Waiting, .Acquire) == .Unlocked {
return
}
ret := intrinsics.wasm_memory_atomic_wait32((^u32)(&a.mu), u32(new_state), -1)
assert(ret != 0)
intrinsics.cpu_relax()
}
}
if v := intrinsics.atomic_exchange_explicit(&a.mu, .Locked, .Acquire); v != .Unlocked {
lock_slow(a, v)
}
}
}
@(private="file")
unlock :: proc(a: ^WASM_Allocator) {
when intrinsics.has_target_feature("atomics") {
@(cold)
unlock_slow :: proc(a: ^WASM_Allocator) {
for {
s := intrinsics.wasm_memory_atomic_notify32((^u32)(&a.mu), 1)
if s >= 1 {
return
}
}
}
switch intrinsics.atomic_exchange_explicit(&a.mu, .Unlocked, .Release) {
case .Unlocked:
unreachable()
case .Locked:
// Okay
case .Waiting:
unlock_slow(a)
}
}
}
@(private="file")
assert_locked :: proc(a: ^WASM_Allocator) {
when intrinsics.has_target_feature("atomics") {
assert(intrinsics.atomic_load(&a.mu) != .Unlocked)
}
}
@(private="file")
has_alignment_uintptr :: proc(ptr: uintptr, #any_int alignment: uintptr) -> bool {
return ptr & (alignment-1) == 0
}
@(private="file")
has_alignment_uint :: proc(ptr: uint, alignment: uint) -> bool {
return ptr & (alignment-1) == 0
}
@(private="file")
has_alignment :: proc {
has_alignment_uintptr,
has_alignment_uint,
}
@(private="file")
REGION_HEADER_SIZE :: 2*size_of(uint)
@(private="file")
SMALLEST_ALLOCATION_SIZE :: 2*size_of(rawptr)
// Subdivide regions of free space into distinct circular doubly linked lists, where each linked list
// represents a range of free space blocks. The following function compute_free_list_bucket() converts
// an allocation size to the bucket index that should be looked at.
#assert(NUM_FREE_BUCKETS == 64, "Following function is tailored specifically for the NUM_FREE_BUCKETS == 64 case")
@(private="file")
compute_free_list_bucket :: proc(size: uint) -> uint {
if size < 128 { return (size >> 3) - 1 }
clz := intrinsics.count_leading_zeros(i32(size))
bucket_index: i32 = ((clz > 19) \
? 110 - (clz<<2) + ((i32)(size >> (u32)(29-clz)) ~ 4) \
: min( 71 - (clz<<1) + ((i32)(size >> (u32)(30-clz)) ~ 2), NUM_FREE_BUCKETS-1))
assert(bucket_index >= 0)
assert(bucket_index < NUM_FREE_BUCKETS)
return uint(bucket_index)
}
@(private="file")
prev_region :: proc(region: ^Region) -> ^Region {
prev_region_size := ([^]uint)(region)[-1]
prev_region_size = prev_region_size & ~uint(FREE_REGION_FLAG)
return (^Region)(uintptr(region)-uintptr(prev_region_size))
}
@(private="file")
next_region :: proc(region: ^Region) -> ^Region {
return (^Region)(uintptr(region)+uintptr(region.size))
}
@(private="file")
region_ceiling_size :: proc(region: ^Region) -> uint {
return ([^]uint)(uintptr(region)+uintptr(region.size))[-1]
}
@(private="file")
region_is_free :: proc(r: ^Region) -> bool {
return region_ceiling_size(r) & FREE_REGION_FLAG >= 1
}
@(private="file")
region_is_in_use :: proc(r: ^Region) -> bool {
return r.size == region_ceiling_size(r)
}
@(private="file")
region_payload_start_ptr :: proc(r: ^Region) -> [^]byte {
return ([^]byte)(r)[size_of(uint):]
}
@(private="file")
region_payload_end_ptr :: proc(r: ^Region) -> [^]byte {
return ([^]byte)(r)[r.size-size_of(uint):]
}
@(private="file")
create_used_region :: proc(ptr: rawptr, size: uint) {
assert(has_alignment(uintptr(ptr), size_of(uint)))
assert(has_alignment(size, size_of(uint)))
assert(size >= size_of(Region))
uptr := ([^]uint)(ptr)
uptr[0] = size
uptr[size/size_of(uint)-1] = size
}
@(private="file")
create_free_region :: proc(ptr: rawptr, size: uint) {
assert(has_alignment(uintptr(ptr), size_of(uint)))
assert(has_alignment(size, size_of(uint)))
assert(size >= size_of(Region))
free_region := (^Region)(ptr)
free_region.size = size
([^]uint)(ptr)[size/size_of(uint)-1] = size | FREE_REGION_FLAG
}
@(private="file")
prepend_to_free_list :: proc(region: ^Region, prepend_to: ^Region) {
assert(region_is_free(region))
region.next = prepend_to
region.prev = prepend_to.prev
prepend_to.prev = region
region.prev.next = region
}
@(private="file")
unlink_from_free_list :: proc(region: ^Region) {
assert(region_is_free(region))
region.prev.next = region.next
region.next.prev = region.prev
}
@(private="file")
link_to_free_list :: proc(a: ^WASM_Allocator, free_region: ^Region) {
assert(free_region.size >= size_of(Region))
bucket_index := compute_free_list_bucket(free_region.size-REGION_HEADER_SIZE)
free_list_head := &a.free_region_buckets[bucket_index]
free_region.prev = free_list_head
free_region.next = free_list_head.next
free_list_head.next = free_region
free_region.next.prev = free_region
a.free_region_buckets_used |= BUCKET_BITMASK_T(1) << bucket_index
}
@(private="file")
claim_more_memory :: proc(a: ^WASM_Allocator, num_bytes: uint) -> bool {
PAGE_SIZE :: 64 * 1024
page_alloc :: proc(page_count: int) -> []byte {
prev_page_count := intrinsics.wasm_memory_grow(0, uintptr(page_count))
if prev_page_count < 0 { return nil }
ptr := ([^]byte)(uintptr(prev_page_count) * PAGE_SIZE)
return ptr[:page_count * PAGE_SIZE]
}
alloc :: proc(a: ^WASM_Allocator, num_bytes: uint) -> (bytes: [^]byte) #no_bounds_check {
if uint(len(a.spill)) >= num_bytes {
bytes = raw_data(a.spill[:num_bytes])
a.spill = a.spill[num_bytes:]
return
}
pages := int((num_bytes / PAGE_SIZE) + 1)
allocated := page_alloc(pages)
if allocated == nil { return nil }
// If the allocated memory is a direct continuation of the spill from before,
// we can just extend the spill.
spill_end := uintptr(raw_data(a.spill)) + uintptr(len(a.spill))
if spill_end == uintptr(raw_data(allocated)) {
raw_spill := (^Raw_Slice)(&a.spill)
raw_spill.len += len(allocated)
} else {
// Otherwise, we have to "waste" the previous spill.
// Now this is probably uncommon, and will only happen if another code path
// is also requesting pages.
a.spill = allocated
}
bytes = raw_data(a.spill)
a.spill = a.spill[num_bytes:]
return
}
num_bytes := num_bytes
num_bytes = align_forward(num_bytes, a.alignment)
start_ptr := alloc(a, uint(num_bytes))
if start_ptr == nil { return false }
assert(has_alignment(uintptr(start_ptr), align_of(uint)))
end_ptr := start_ptr[num_bytes:]
end_sentinel_region := (^Region)(end_ptr[-size_of(Region):])
create_used_region(end_sentinel_region, size_of(Region))
// If we are the sole user of wasm_memory_grow(), it will feed us continuous/consecutive memory addresses - take advantage
// of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size.
prev_alloc_end_address := a.list_of_all_regions != nil ? a.list_of_all_regions.end_ptr : nil
if start_ptr == prev_alloc_end_address {
prev_end_sentinel := prev_region((^Region)(start_ptr))
assert(region_is_in_use(prev_end_sentinel))
prev_region := prev_region(prev_end_sentinel)
a.list_of_all_regions.end_ptr = end_ptr
// Two scenarios, either the last region of the previous block was in use, in which case we need to create
// a new free region in the newly allocated space; or it was free, in which case we can extend that region
// to cover a larger size.
if region_is_free(prev_region) {
new_free_region_size := uint(uintptr(end_sentinel_region) - uintptr(prev_region))
unlink_from_free_list(prev_region)
create_free_region(prev_region, new_free_region_size)
link_to_free_list(a, prev_region)
return true
}
start_ptr = start_ptr[-size_of(Region):]
} else {
create_used_region(start_ptr, size_of(Region))
new_region_block := (^Root_Region)(start_ptr)
new_region_block.next = a.list_of_all_regions
new_region_block.end_ptr = end_ptr
a.list_of_all_regions = new_region_block
start_ptr = start_ptr[size_of(Region):]
}
create_free_region(start_ptr, uint(uintptr(end_sentinel_region)-uintptr(start_ptr)))
link_to_free_list(a, (^Region)(start_ptr))
return true
}
@(private="file")
validate_alloc_size :: proc(size: uint) -> uint {
#assert(size_of(uint) >= size_of(uintptr))
#assert(size_of(uint) % size_of(uintptr) == 0)
// NOTE: emmalloc aligns this forward on pointer size, but I think that is a mistake and will
// do bad on wasm64p32.
validated_size := size > SMALLEST_ALLOCATION_SIZE ? align_forward(size, size_of(uint)) : SMALLEST_ALLOCATION_SIZE
assert(validated_size >= size) // Assert we haven't wrapped.
return validated_size
}
@(private="file")
allocate_memory :: proc(a: ^WASM_Allocator, alignment: uint, size: uint, loc := #caller_location) -> rawptr {
attempt_allocate :: proc(a: ^WASM_Allocator, free_region: ^Region, alignment, size: uint) -> rawptr {
assert_locked(a)
free_region := free_region
payload_start_ptr := uintptr(region_payload_start_ptr(free_region))
payload_start_ptr_aligned := align_forward(payload_start_ptr, uintptr(alignment))
payload_end_ptr := uintptr(region_payload_end_ptr(free_region))
if payload_start_ptr_aligned + uintptr(size) > payload_end_ptr {
return nil
}
// We have enough free space, so the memory allocation will be made into this region. Remove this free region
// from the list of free regions: whatever slop remains will be later added back to the free region pool.
unlink_from_free_list(free_region)
// Before we proceed further, fix up the boundary between this and the preceding region,
// so that the boundary between the two regions happens at a right spot for the payload to be aligned.
if payload_start_ptr != payload_start_ptr_aligned {
prev := prev_region(free_region)
assert(region_is_in_use(prev))
region_boundary_bump_amount := payload_start_ptr_aligned - payload_start_ptr
new_this_region_size := free_region.size - uint(region_boundary_bump_amount)
create_used_region(prev, prev.size + uint(region_boundary_bump_amount))
free_region = (^Region)(uintptr(free_region) + region_boundary_bump_amount)
free_region.size = new_this_region_size
}
// Next, we need to decide whether this region is so large that it should be split into two regions,
// one representing the newly used memory area, and at the high end a remaining leftover free area.
// This splitting to two is done always if there is enough space for the high end to fit a region.
// Carve 'size' bytes of payload off this region. So,
// [sz prev next sz]
// becomes
// [sz payload sz] [sz prev next sz]
if size_of(Region) + REGION_HEADER_SIZE + size <= free_region.size {
new_free_region := (^Region)(uintptr(free_region) + REGION_HEADER_SIZE + uintptr(size))
create_free_region(new_free_region, free_region.size - size - REGION_HEADER_SIZE)
link_to_free_list(a, new_free_region)
create_used_region(free_region, size + REGION_HEADER_SIZE)
} else {
// There is not enough space to split the free memory region into used+free parts, so consume the whole
// region as used memory, not leaving a free memory region behind.
// Initialize the free region as used by resetting the ceiling size to the same value as the size at bottom.
([^]uint)(uintptr(free_region) + uintptr(free_region.size))[-1] = free_region.size
}
return rawptr(uintptr(free_region) + size_of(uint))
}
assert_locked(a)
assert(is_power_of_two(alignment))
assert(size <= MAX_ALLOC_SIZE, "allocation too big", loc=loc)
alignment := alignment
alignment = max(alignment, a.alignment)
size := size
size = validate_alloc_size(size)
// Attempt to allocate memory starting from smallest bucket that can contain the required amount of memory.
// Under normal alignment conditions this should always be the first or second bucket we look at, but if
// performing an allocation with complex alignment, we may need to look at multiple buckets.
bucket_index := compute_free_list_bucket(size)
bucket_mask := a.free_region_buckets_used >> bucket_index
// Loop through each bucket that has free regions in it, based on bits set in free_region_buckets_used bitmap.
for bucket_mask != 0 {
index_add := intrinsics.count_trailing_zeros(bucket_mask)
bucket_index += uint(index_add)
bucket_mask >>= index_add
assert(bucket_index <= NUM_FREE_BUCKETS-1)
assert(a.free_region_buckets_used & (BUCKET_BITMASK_T(1) << bucket_index) > 0)
free_region := a.free_region_buckets[bucket_index].next
assert(free_region != nil)
if free_region != &a.free_region_buckets[bucket_index] {
ptr := attempt_allocate(a, free_region, alignment, size)
if ptr != nil {
return ptr
}
// We were not able to allocate from the first region found in this bucket, so penalize
// the region by cycling it to the end of the doubly circular linked list. (constant time)
// This provides a randomized guarantee that when performing allocations of size k to a
// bucket of [k-something, k+something] range, we will not always attempt to satisfy the
// allocation from the same available region at the front of the list, but we try each
// region in turn.
unlink_from_free_list(free_region)
prepend_to_free_list(free_region, &a.free_region_buckets[bucket_index])
// But do not stick around to attempt to look at other regions in this bucket - move
// to search the next populated bucket index if this did not fit. This gives a practical
// "allocation in constant time" guarantee, since the next higher bucket will only have
// regions that are all of strictly larger size than the requested allocation. Only if
// there is a difficult alignment requirement we may fail to perform the allocation from
// a region in the next bucket, and if so, we keep trying higher buckets until one of them
// works.
bucket_index += 1
bucket_mask >>= 1
} else {
// This bucket was not populated after all with any regions,
// but we just had a stale bit set to mark a populated bucket.
// Reset the bit to update latest status so that we do not
// redundantly look at this bucket again.
a.free_region_buckets_used &~= BUCKET_BITMASK_T(1) << bucket_index
bucket_mask ~= 1
}
assert((bucket_index == NUM_FREE_BUCKETS && bucket_mask == 0) || (bucket_mask == a.free_region_buckets_used >> bucket_index))
}
// None of the buckets were able to accommodate an allocation. If this happens we are almost out of memory.
// The largest bucket might contain some suitable regions, but we only looked at one region in that bucket, so
// as a last resort, loop through more free regions in the bucket that represents the largest allocations available.
// But only if the bucket representing largest allocations available is not any of the first thirty buckets,
// these represent allocatable areas less than <1024 bytes - which could be a lot of scrap.
// In such case, prefer to claim more memory right away.
largest_bucket_index := NUM_FREE_BUCKETS - 1 - intrinsics.count_leading_zeros(a.free_region_buckets_used)
// free_region will be null if there is absolutely no memory left. (all buckets are 100% used)
free_region := a.free_region_buckets_used > 0 ? a.free_region_buckets[largest_bucket_index].next : nil
// The 30 first free region buckets cover memory blocks < 2048 bytes, so skip looking at those here (too small)
if a.free_region_buckets_used >> 30 > 0 {
// Look only at a constant number of regions in this bucket max, to avoid bad worst case behavior.
// If this many regions cannot find free space, we give up and prefer to claim more memory instead.
max_regions_to_try_before_giving_up :: 99
num_tries_left := max_regions_to_try_before_giving_up
for ; free_region != &a.free_region_buckets[largest_bucket_index] && num_tries_left > 0; num_tries_left -= 1 {
ptr := attempt_allocate(a, free_region, alignment, size)
if ptr != nil {
return ptr
}
free_region = free_region.next
}
}
// We were unable to find a free memory region. Must claim more memory!
num_bytes_to_claim := size+size_of(Region)*3
if alignment > a.alignment {
num_bytes_to_claim += alignment
}
success := claim_more_memory(a, num_bytes_to_claim)
if (success) {
// Try allocate again with the newly available memory.
return allocate_memory(a, alignment, size)
}
// also claim_more_memory failed, we are really really constrained :( As a last resort, go back to looking at the
// bucket we already looked at above, continuing where the above search left off - perhaps there are
// regions we overlooked the first time that might be able to satisfy the allocation.
if free_region != nil {
for free_region != &a.free_region_buckets[largest_bucket_index] {
ptr := attempt_allocate(a, free_region, alignment, size)
if ptr != nil {
return ptr
}
free_region = free_region.next
}
}
// Fully out of memory.
return nil
}
@(private="file")
aligned_alloc :: proc(a: ^WASM_Allocator, alignment, size: uint, loc := #caller_location) -> rawptr {
lock(a)
defer unlock(a)
return allocate_memory(a, alignment, size, loc)
}
@(private="file")
free :: proc(a: ^WASM_Allocator, ptr: rawptr, loc := #caller_location) {
if ptr == nil {
return
}
region_start_ptr := uintptr(ptr) - size_of(uint)
region := (^Region)(region_start_ptr)
assert(has_alignment(region_start_ptr, size_of(uint)))
lock(a)
defer unlock(a)
size := region.size
assert(region_is_in_use(region), "double free or corrupt region", loc=loc)
prev_region_size_field := ([^]uint)(region)[-1]
prev_region_size := prev_region_size_field & ~uint(FREE_REGION_FLAG)
if prev_region_size_field != prev_region_size {
prev_region := (^Region)(uintptr(region) - uintptr(prev_region_size))
unlink_from_free_list(prev_region)
region_start_ptr = uintptr(prev_region)
size += prev_region_size
}
next_reg := next_region(region)
size_at_end := (^uint)(region_payload_end_ptr(next_reg))^
if next_reg.size != size_at_end {
unlink_from_free_list(next_reg)
size += next_reg.size
}
create_free_region(rawptr(region_start_ptr), size)
link_to_free_list(a, (^Region)(region_start_ptr))
}
@(private="file")
aligned_realloc :: proc(a: ^WASM_Allocator, ptr: rawptr, alignment, size: uint, loc := #caller_location) -> rawptr {
attempt_region_resize :: proc(a: ^WASM_Allocator, region: ^Region, size: uint) -> bool {
lock(a)
defer unlock(a)
// First attempt to resize this region, if the next region that follows this one
// is a free region.
next_reg := next_region(region)
next_region_end_ptr := uintptr(next_reg) + uintptr(next_reg.size)
size_at_ceiling := ([^]uint)(next_region_end_ptr)[-1]
if next_reg.size != size_at_ceiling { // Next region is free?
assert(region_is_free(next_reg))
new_next_region_start_ptr := uintptr(region) + uintptr(size)
assert(has_alignment(new_next_region_start_ptr, size_of(uint)))
// Next region does not shrink to too small size?
if new_next_region_start_ptr + size_of(Region) <= next_region_end_ptr {
unlink_from_free_list(next_reg)
create_free_region(rawptr(new_next_region_start_ptr), uint(next_region_end_ptr - new_next_region_start_ptr))
link_to_free_list(a, (^Region)(new_next_region_start_ptr))
create_used_region(region, uint(new_next_region_start_ptr - uintptr(region)))
return true
}
// If we remove the next region altogether, allocation is satisfied?
if new_next_region_start_ptr <= next_region_end_ptr {
unlink_from_free_list(next_reg)
create_used_region(region, region.size + next_reg.size)
return true
}
} else {
// Next region is an used region - we cannot change its starting address. However if we are shrinking the
// size of this region, we can create a new free region between this and the next used region.
if size + size_of(Region) <= region.size {
free_region_size := region.size - size
create_used_region(region, size)
free_region := (^Region)(uintptr(region) + uintptr(size))
create_free_region(free_region, free_region_size)
link_to_free_list(a, free_region)
return true
} else if size <= region.size {
// Caller was asking to shrink the size, but due to not being able to fit a full Region in the shrunk
// area, we cannot actually do anything. This occurs if the shrink amount is really small. In such case,
// just call it success without doing any work.
return true
}
}
return false
}
if ptr == nil {
return aligned_alloc(a, alignment, size, loc)
}
if size == 0 {
free(a, ptr, loc)
return nil
}
if size > MAX_ALLOC_SIZE {
return nil
}
assert(is_power_of_two(alignment))
assert(has_alignment(uintptr(ptr), alignment), "realloc on different alignment than original allocation", loc=loc)
size := size
size = validate_alloc_size(size)
region := (^Region)(uintptr(ptr) - size_of(uint))
// Attempt an in-place resize.
if attempt_region_resize(a, region, size + REGION_HEADER_SIZE) {
return ptr
}
// Can't do it in-place, allocate new region and copy over.
newptr := aligned_alloc(a, alignment, size, loc)
if newptr != nil {
intrinsics.mem_copy(newptr, ptr, min(size, region.size - REGION_HEADER_SIZE))
free(a, ptr, loc=loc)
}
return newptr
}

Binary file not shown.

View File

@@ -48,6 +48,9 @@ if "%2" == "1" (
set odin_version_raw="dev-%curr_year%-%curr_month%"
set compiler_flags= -nologo -Oi -TP -fp:precise -Gm- -MP -FC -EHsc- -GR- -GF
rem Parse source code as utf-8 even on shift-jis and other codepages
rem See https://learn.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
set compiler_flags= %compiler_flags% /utf-8
set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\"
if not exist .git\ goto skip_git_hash
@@ -111,7 +114,7 @@ call build_vendor.bat
if %errorlevel% neq 0 goto end_of_build
rem If the demo doesn't run for you and your CPU is more than a decade old, try -microarch:native
if %release_mode% EQU 0 odin run examples/demo -- Hellope World
if %release_mode% EQU 0 odin run examples/demo -vet -strict-style -- Hellope World
del *.obj > NUL 2> NUL

View File

@@ -2,7 +2,6 @@
set -eu
: ${CPPFLAGS=}
: ${CXX=clang++}
: ${CXXFLAGS=}
: ${LDFLAGS=}
: ${LLVM_CONFIG=}
@@ -26,17 +25,19 @@ error() {
if [ -z "$LLVM_CONFIG" ]; then
# darwin, linux, openbsd
if [ -n "$(command -v llvm-config-17)" ]; then LLVM_CONFIG="llvm-config-17"
if [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
elif [ -n "$(command -v llvm-config-17)" ]; then LLVM_CONFIG="llvm-config-17"
elif [ -n "$(command -v llvm-config-14)" ]; then LLVM_CONFIG="llvm-config-14"
elif [ -n "$(command -v llvm-config-13)" ]; then LLVM_CONFIG="llvm-config-13"
elif [ -n "$(command -v llvm-config-12)" ]; then LLVM_CONFIG="llvm-config-12"
elif [ -n "$(command -v llvm-config-11)" ]; then LLVM_CONFIG="llvm-config-11"
# freebsd
elif [ -n "$(command -v llvm-config17)" ]; then LLVM_CONFIG="llvm-config-17"
elif [ -n "$(command -v llvm-config14)" ]; then LLVM_CONFIG="llvm-config-14"
elif [ -n "$(command -v llvm-config13)" ]; then LLVM_CONFIG="llvm-config-13"
elif [ -n "$(command -v llvm-config12)" ]; then LLVM_CONFIG="llvm-config-12"
elif [ -n "$(command -v llvm-config11)" ]; then LLVM_CONFIG="llvm-config-11"
elif [ -n "$(command -v llvm-config18)" ]; then LLVM_CONFIG="llvm-config18"
elif [ -n "$(command -v llvm-config17)" ]; then LLVM_CONFIG="llvm-config17"
elif [ -n "$(command -v llvm-config14)" ]; then LLVM_CONFIG="llvm-config14"
elif [ -n "$(command -v llvm-config13)" ]; then LLVM_CONFIG="llvm-config13"
elif [ -n "$(command -v llvm-config12)" ]; then LLVM_CONFIG="llvm-config12"
elif [ -n "$(command -v llvm-config11)" ]; then LLVM_CONFIG="llvm-config11"
# fallback
elif [ -n "$(command -v llvm-config)" ]; then LLVM_CONFIG="llvm-config"
else
@@ -44,31 +45,51 @@ if [ -z "$LLVM_CONFIG" ]; then
fi
fi
if [ -x "$(which clang++)" ]; then
: ${CXX="clang++"}
elif [ -x "$($LLVM_CONFIG --bindir)/clang++" ]; then
: ${CXX=$($LLVM_CONFIG --bindir)/clang++}
else
error "No clang++ command found. Set CXX to proceed."
fi
LLVM_VERSION="$($LLVM_CONFIG --version)"
LLVM_VERSION_MAJOR="$(echo $LLVM_VERSION | awk -F. '{print $1}')"
LLVM_VERSION_MINOR="$(echo $LLVM_VERSION | awk -F. '{print $2}')"
LLVM_VERSION_PATCH="$(echo $LLVM_VERSION | awk -F. '{print $3}')"
if [ $LLVM_VERSION_MAJOR -lt 11 ] ||
([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]); then
error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14 or 17"
if [ $LLVM_VERSION_MAJOR -lt 11 ] || ([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]) || [ $LLVM_VERSION_MAJOR -gt 18 ]; then
error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14, 17 or 18"
fi
case "$OS_NAME" in
Darwin)
if [ "$OS_ARCH" = "arm64" ]; then
if [ $LLVM_VERSION_MAJOR -lt 13 ] || [ $LLVM_VERSION_MAJOR -gt 17 ]; then
error "Darwin Arm64 requires LLVM 13, 14 or 17"
if [ $LLVM_VERSION_MAJOR -lt 13 ]; then
error "Invalid LLVM version $LLVM_VERSION: Darwin Arm64 requires LLVM 13, 14, 17 or 18"
fi
fi
CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
darwin_sysroot=
if [ $(which xcrun) ]; then
darwin_sysroot="--sysroot $(xcrun --sdk macosx --show-sdk-path)"
elif [[ -e "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" ]]; then
darwin_sysroot="--sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"
else
echo "Warning: MacOSX.sdk not found."
fi
CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags) ${darwin_sysroot}"
LDFLAGS="$LDFLAGS -liconv -ldl -framework System -lLLVM"
;;
FreeBSD)
CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
;;
NetBSD)
CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
LDFLAGS="$LDFLAGS $($LLVM_CONFIG --libs core native --system-libs)"
;;
Linux)
CXXFLAGS="$CXXFLAGS $($LLVM_CONFIG --cxxflags --ldflags)"
LDFLAGS="$LDFLAGS -ldl $($LLVM_CONFIG --libs core native --system-libs --libfiles)"
@@ -123,7 +144,7 @@ build_odin() {
}
run_demo() {
./odin run examples/demo/demo.odin -file -- Hellope World
./odin run examples/demo -vet -strict-style -- Hellope World
}
if [ $# -eq 0 ]; then

View File

@@ -1,51 +0,0 @@
import subprocess
import sys
import json
import datetime
import urllib.parse
import sys
def main():
files_by_date = {}
bucket = sys.argv[1]
files_lines = execute_cli(f"b2 ls --long {bucket} nightly").split("\n")
for x in files_lines:
parts = x.split(" ", 1)
if parts[0]:
json_str = execute_cli(f"b2 get-file-info {parts[0]}")
data = json.loads(json_str)
name = remove_prefix(data['fileName'], "nightly/")
url = f"https://f001.backblazeb2.com/file/{bucket}/nightly/{urllib.parse.quote_plus(name)}"
sha1 = data['contentSha1']
size = int(data['size'])
ts = int(data['fileInfo']['src_last_modified_millis'])
date = datetime.datetime.fromtimestamp(ts/1000).strftime('%Y-%m-%d')
if date not in files_by_date.keys():
files_by_date[date] = []
files_by_date[date].append({
'name': name,
'url': url,
'sha1': sha1,
'sizeInBytes': size,
})
now = datetime.datetime.utcnow().isoformat()
print(json.dumps({
'last_updated' : now,
'files': files_by_date
}, sort_keys=True, indent=4))
def remove_prefix(text, prefix):
return text[text.startswith(prefix) and len(prefix):]
def execute_cli(command):
sb = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
return sb.stdout.read().decode("utf-8");
if __name__ == '__main__':
sys.exit(main())

View File

@@ -1,34 +0,0 @@
import subprocess
import sys
import json
import datetime
import urllib.parse
import sys
def main():
files_by_date = {}
bucket = sys.argv[1]
days_to_keep = int(sys.argv[2])
print(f"Looking for binaries to delete older than {days_to_keep} days")
files_lines = execute_cli(f"b2 ls --long --versions {bucket} nightly").split("\n")
for x in files_lines:
parts = [y for y in x.split(' ') if y]
if parts and parts[0]:
date = datetime.datetime.strptime(parts[2], '%Y-%m-%d').replace(hour=0, minute=0, second=0, microsecond=0)
now = datetime.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
delta = now - date
if delta.days > days_to_keep:
print(f'Deleting {parts[5]}')
execute_cli(f'b2 delete-file-version {parts[0]}')
def execute_cli(command):
sb = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
return sb.stdout.read().decode("utf-8");
if __name__ == '__main__':
sys.exit(main())

140
ci/nightly.py Normal file
View File

@@ -0,0 +1,140 @@
import os
import sys
from zipfile import ZipFile, ZIP_DEFLATED
from b2sdk.v2 import InMemoryAccountInfo, B2Api
from datetime import datetime
import json
UPLOAD_FOLDER = "nightly/"
info = InMemoryAccountInfo()
b2_api = B2Api(info)
application_key_id = os.environ['APPID']
application_key = os.environ['APPKEY']
bucket_name = os.environ['BUCKET']
days_to_keep = os.environ['DAYS_TO_KEEP']
def auth() -> bool:
try:
realm = b2_api.account_info.get_realm()
return True # Already authenticated
except:
pass # Not yet authenticated
err = b2_api.authorize_account("production", application_key_id, application_key)
return err == None
def get_bucket():
if not auth(): sys.exit(1)
return b2_api.get_bucket_by_name(bucket_name)
def remove_prefix(text: str, prefix: str) -> str:
return text[text.startswith(prefix) and len(prefix):]
def create_and_upload_artifact_zip(platform: str, artifact: str) -> int:
now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
destination_zip_name = "odin-{}-nightly+{}.zip".format(platform, now.strftime("%Y-%m-%d"))
source_zip_name = artifact
if not artifact.endswith(".zip"):
print(f"Creating archive {destination_zip_name} from {artifact} and uploading to {bucket_name}")
source_zip_name = destination_zip_name
with ZipFile(source_zip_name, mode='w', compression=ZIP_DEFLATED, compresslevel=9) as z:
for root, directory, filenames in os.walk(artifact):
for file in filenames:
file_path = os.path.join(root, file)
zip_path = os.path.join("dist", os.path.relpath(file_path, artifact))
z.write(file_path, zip_path)
if not os.path.exists(source_zip_name):
print(f"Error: Newly created ZIP archive {source_zip_name} not found.")
return 1
print("Uploading {} to {}".format(source_zip_name, UPLOAD_FOLDER + destination_zip_name))
bucket = get_bucket()
res = bucket.upload_local_file(
source_zip_name, # Local file to upload
"nightly/" + destination_zip_name, # B2 destination path
)
return 0
def prune_artifacts():
print(f"Looking for binaries to delete older than {days_to_keep} days")
bucket = get_bucket()
for file, _ in bucket.ls(UPLOAD_FOLDER, latest_only=False):
# Timestamp is in milliseconds
date = datetime.fromtimestamp(file.upload_timestamp / 1_000.0).replace(hour=0, minute=0, second=0, microsecond=0)
now = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
delta = now - date
if delta.days > int(days_to_keep):
print("Deleting {}".format(file.file_name))
file.delete()
return 0
def update_nightly_json():
print(f"Updating nightly.json with files {days_to_keep} days or newer")
files_by_date = {}
bucket = get_bucket()
for file, _ in bucket.ls(UPLOAD_FOLDER, latest_only=True):
# Timestamp is in milliseconds
date = datetime.fromtimestamp(file.upload_timestamp / 1_000.0).replace(hour=0, minute=0, second=0, microsecond=0).strftime('%Y-%m-%d')
name = remove_prefix(file.file_name, UPLOAD_FOLDER)
sha1 = file.content_sha1
size = file.size
url = bucket.get_download_url(file.file_name)
if date not in files_by_date.keys():
files_by_date[date] = []
files_by_date[date].append({
'name': name,
'url': url,
'sha1': sha1,
'sizeInBytes': size,
})
now = datetime.utcnow().isoformat()
nightly = json.dumps({
'last_updated' : now,
'files': files_by_date
}, sort_keys=True, indent=4, ensure_ascii=False).encode('utf-8')
res = bucket.upload_bytes(
nightly, # JSON bytes
"nightly.json", # B2 destination path
)
return 0
if __name__ == "__main__":
if len(sys.argv) == 1:
print("Usage: {} <verb> [arguments]".format(sys.argv[0]))
print("\tartifact <platform prefix> <artifact path>\n\t\tCreates and uploads a platform artifact zip.")
print("\tprune\n\t\tDeletes old artifacts from bucket")
print("\tjson\n\t\tUpdate and upload nightly.json")
sys.exit(1)
else:
command = sys.argv[1].lower()
if command == "artifact":
if len(sys.argv) != 4:
print("Usage: {} artifact <platform prefix> <artifact path>".format(sys.argv[0]))
print("Error: Expected artifact command to be given platform prefix and artifact path.\n")
sys.exit(1)
res = create_and_upload_artifact_zip(sys.argv[2], sys.argv[3])
sys.exit(res)
elif command == "prune":
res = prune_artifacts()
sys.exit(res)
elif command == "json":
res = update_nightly_json()
sys.exit(res)

View File

@@ -1,25 +0,0 @@
#!/bin/bash
set -e
bucket=$1
platform=$2
artifact=$3
now=$(date +'%Y-%m-%d')
filename="odin-$platform-nightly+$now.zip"
echo "Creating archive $filename from $artifact and uploading to $bucket"
# If this is already zipped up (done before artifact upload to keep permissions in tact), just move it.
if [ "${artifact: -4}" == ".zip" ]
then
echo "Artifact already a zip"
mkdir -p "output"
mv "$artifact" "output/$filename"
else
echo "Artifact needs to be zipped"
7z a -bd "output/$filename" -r "$artifact"
fi
b2 upload-file --noProgress "$bucket" "output/$filename" "nightly/$filename"

View File

@@ -29,12 +29,12 @@ MIN_READ_BUFFER_SIZE :: 16
@(private)
DEFAULT_MAX_CONSECUTIVE_EMPTY_READS :: 128
reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator) {
reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator, loc := #caller_location) {
size := size
size = max(size, MIN_READ_BUFFER_SIZE)
reader_reset(b, rd)
b.buf_allocator = allocator
b.buf = make([]byte, size, allocator)
b.buf = make([]byte, size, allocator, loc)
}
reader_init_with_buf :: proc(b: ^Reader, rd: io.Reader, buf: []byte) {
@@ -81,7 +81,7 @@ _reader_read_new_chunk :: proc(b: ^Reader) -> io.Error {
for i := b.max_consecutive_empty_reads; i > 0; i -= 1 {
n, err := io.read(b.rd, b.buf[b.w:])
if n < 0 {
return .Negative_Read
return err if err != nil else .Negative_Read
}
b.w += n
if err != nil {
@@ -189,7 +189,7 @@ reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
if len(p) >= len(b.buf) {
n, b.err = io.read(b.rd, p)
if n < 0 {
return 0, .Negative_Read
return 0, b.err if b.err != nil else .Negative_Read
}
if n > 0 {
@@ -202,7 +202,7 @@ reader_read :: proc(b: ^Reader, p: []byte) -> (n: int, err: io.Error) {
b.r, b.w = 0, 0
n, b.err = io.read(b.rd, b.buf)
if n < 0 {
return 0, .Negative_Read
return 0, b.err if b.err != nil else .Negative_Read
}
if n == 0 {
return 0, _reader_consume_err(b)
@@ -290,7 +290,7 @@ reader_write_to :: proc(b: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
write_buf :: proc(b: ^Reader, w: io.Writer) -> (i64, io.Error) {
n, err := io.write(w, b.buf[b.r:b.w])
if n < 0 {
return 0, .Negative_Write
return 0, err if err != nil else .Negative_Write
}
b.r += n
return i64(n), err

View File

@@ -95,6 +95,10 @@ writer_write :: proc(b: ^Writer, p: []byte) -> (n: int, err: io.Error) {
m: int
if writer_buffered(b) == 0 {
m, b.err = io.write(b.wr, p)
if m < 0 && b.err == nil {
b.err = .Negative_Write
break
}
} else {
m = copy(b.buf[b.n:], p)
b.n += m

View File

@@ -27,19 +27,19 @@ Read_Op :: enum i8 {
}
buffer_init :: proc(b: ^Buffer, buf: []byte) {
resize(&b.buf, len(buf))
buffer_init :: proc(b: ^Buffer, buf: []byte, loc := #caller_location) {
resize(&b.buf, len(buf), loc=loc)
copy(b.buf[:], buf)
}
buffer_init_string :: proc(b: ^Buffer, s: string) {
resize(&b.buf, len(s))
buffer_init_string :: proc(b: ^Buffer, s: string, loc := #caller_location) {
resize(&b.buf, len(s), loc=loc)
copy(b.buf[:], s)
}
buffer_init_allocator :: proc(b: ^Buffer, len, cap: int, allocator := context.allocator) {
buffer_init_allocator :: proc(b: ^Buffer, len, cap: int, allocator := context.allocator, loc := #caller_location) {
if b.buf == nil {
b.buf = make([dynamic]byte, len, cap, allocator)
b.buf = make([dynamic]byte, len, cap, allocator, loc)
return
}
@@ -96,28 +96,28 @@ buffer_truncate :: proc(b: ^Buffer, n: int) {
}
@(private)
_buffer_try_grow :: proc(b: ^Buffer, n: int) -> (int, bool) {
_buffer_try_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) -> (int, bool) {
if l := len(b.buf); n <= cap(b.buf)-l {
resize(&b.buf, l+n)
resize(&b.buf, l+n, loc=loc)
return l, true
}
return 0, false
}
@(private)
_buffer_grow :: proc(b: ^Buffer, n: int) -> int {
_buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) -> int {
m := buffer_length(b)
if m == 0 && b.off != 0 {
buffer_reset(b)
}
if i, ok := _buffer_try_grow(b, n); ok {
if i, ok := _buffer_try_grow(b, n, loc=loc); ok {
return i
}
if b.buf == nil && n <= SMALL_BUFFER_SIZE {
// Fixes #2756 by preserving allocator if already set on Buffer via init_buffer_allocator
reserve(&b.buf, SMALL_BUFFER_SIZE)
resize(&b.buf, n)
reserve(&b.buf, SMALL_BUFFER_SIZE, loc=loc)
resize(&b.buf, n, loc=loc)
return 0
}
@@ -127,31 +127,31 @@ _buffer_grow :: proc(b: ^Buffer, n: int) -> int {
} else if c > max(int) - c - n {
panic("bytes.Buffer: too large")
} else {
resize(&b.buf, 2*c + n)
resize(&b.buf, 2*c + n, loc=loc)
copy(b.buf[:], b.buf[b.off:])
}
b.off = 0
resize(&b.buf, m+n)
resize(&b.buf, m+n, loc=loc)
return m
}
buffer_grow :: proc(b: ^Buffer, n: int) {
buffer_grow :: proc(b: ^Buffer, n: int, loc := #caller_location) {
if n < 0 {
panic("bytes.buffer_grow: negative count")
}
m := _buffer_grow(b, n)
resize(&b.buf, m)
m := _buffer_grow(b, n, loc=loc)
resize(&b.buf, m, loc=loc)
}
buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.Error) {
buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int, loc := #caller_location) -> (n: int, err: io.Error) {
b.last_read = .Invalid
if offset < 0 {
err = .Invalid_Offset
return
}
_, ok := _buffer_try_grow(b, offset+len(p))
_, ok := _buffer_try_grow(b, offset+len(p), loc=loc)
if !ok {
_ = _buffer_grow(b, offset+len(p))
_ = _buffer_grow(b, offset+len(p), loc=loc)
}
if len(b.buf) <= offset {
return 0, .Short_Write
@@ -160,47 +160,47 @@ buffer_write_at :: proc(b: ^Buffer, p: []byte, offset: int) -> (n: int, err: io.
}
buffer_write :: proc(b: ^Buffer, p: []byte) -> (n: int, err: io.Error) {
buffer_write :: proc(b: ^Buffer, p: []byte, loc := #caller_location) -> (n: int, err: io.Error) {
b.last_read = .Invalid
m, ok := _buffer_try_grow(b, len(p))
m, ok := _buffer_try_grow(b, len(p), loc=loc)
if !ok {
m = _buffer_grow(b, len(p))
m = _buffer_grow(b, len(p), loc=loc)
}
return copy(b.buf[m:], p), nil
}
buffer_write_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int) -> (n: int, err: io.Error) {
return buffer_write(b, ([^]byte)(ptr)[:size])
buffer_write_ptr :: proc(b: ^Buffer, ptr: rawptr, size: int, loc := #caller_location) -> (n: int, err: io.Error) {
return buffer_write(b, ([^]byte)(ptr)[:size], loc=loc)
}
buffer_write_string :: proc(b: ^Buffer, s: string) -> (n: int, err: io.Error) {
buffer_write_string :: proc(b: ^Buffer, s: string, loc := #caller_location) -> (n: int, err: io.Error) {
b.last_read = .Invalid
m, ok := _buffer_try_grow(b, len(s))
m, ok := _buffer_try_grow(b, len(s), loc=loc)
if !ok {
m = _buffer_grow(b, len(s))
m = _buffer_grow(b, len(s), loc=loc)
}
return copy(b.buf[m:], s), nil
}
buffer_write_byte :: proc(b: ^Buffer, c: byte) -> io.Error {
buffer_write_byte :: proc(b: ^Buffer, c: byte, loc := #caller_location) -> io.Error {
b.last_read = .Invalid
m, ok := _buffer_try_grow(b, 1)
m, ok := _buffer_try_grow(b, 1, loc=loc)
if !ok {
m = _buffer_grow(b, 1)
m = _buffer_grow(b, 1, loc=loc)
}
b.buf[m] = c
return nil
}
buffer_write_rune :: proc(b: ^Buffer, r: rune) -> (n: int, err: io.Error) {
buffer_write_rune :: proc(b: ^Buffer, r: rune, loc := #caller_location) -> (n: int, err: io.Error) {
if r < utf8.RUNE_SELF {
buffer_write_byte(b, byte(r))
buffer_write_byte(b, byte(r), loc=loc)
return 1, nil
}
b.last_read = .Invalid
m, ok := _buffer_try_grow(b, utf8.UTF_MAX)
m, ok := _buffer_try_grow(b, utf8.UTF_MAX, loc=loc)
if !ok {
m = _buffer_grow(b, utf8.UTF_MAX)
m = _buffer_grow(b, utf8.UTF_MAX, loc=loc)
}
res: [4]byte
res, n = utf8.encode_rune(r)
@@ -359,7 +359,7 @@ buffer_read_from :: proc(b: ^Buffer, r: io.Reader) -> (n: i64, err: io.Error) #n
resize(&b.buf, i)
m, e := io.read(r, b.buf[i:cap(b.buf)])
if m < 0 {
err = .Negative_Read
err = e if e != nil else .Negative_Read
return
}

View File

@@ -1167,3 +1167,28 @@ fields_proc :: proc(s: []byte, f: proc(rune) -> bool, allocator := context.alloc
return subslices[:]
}
// alias returns true iff a and b have a non-zero length, and any part of
// a overlaps with b.
alias :: proc "contextless" (a, b: []byte) -> bool {
a_len, b_len := len(a), len(b)
if a_len == 0 || b_len == 0 {
return false
}
a_start, b_start := uintptr(raw_data(a)), uintptr(raw_data(b))
a_end, b_end := a_start + uintptr(a_len-1), b_start + uintptr(b_len-1)
return a_start <= b_end && b_start <= a_end
}
// alias_inexactly returns true iff a and b have a non-zero length,
// the base pointer of a and b are NOT equal, and any part of a overlaps
// with b (ie: `alias(a, b)` with an exception that returns false for
// `a == b`, `b = a[:len(a)-69]` and similar conditions).
alias_inexactly :: proc "contextless" (a, b: []byte) -> bool {
if raw_data(a) == raw_data(b) {
return false
}
return alias(a, b)
}

View File

@@ -40,7 +40,7 @@ when ODIN_OS == .FreeBSD {
ERANGE :: 34
}
when ODIN_OS == .OpenBSD {
when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
@(private="file")
@(default_calling_convention="c")
foreign libc {

View File

@@ -34,20 +34,7 @@ when ODIN_OS == .Windows {
SIGTERM :: 15
}
when ODIN_OS == .Linux || ODIN_OS == .FreeBSD {
SIG_ERR :: rawptr(~uintptr(0))
SIG_DFL :: rawptr(uintptr(0))
SIG_IGN :: rawptr(uintptr(1))
SIGABRT :: 6
SIGFPE :: 8
SIGILL :: 4
SIGINT :: 2
SIGSEGV :: 11
SIGTERM :: 15
}
when ODIN_OS == .Darwin {
when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Haiku || ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD || ODIN_OS == .Darwin {
SIG_ERR :: rawptr(~uintptr(0))
SIG_DFL :: rawptr(uintptr(0))
SIG_IGN :: rawptr(uintptr(1))

View File

@@ -83,7 +83,7 @@ when ODIN_OS == .Linux {
}
}
when ODIN_OS == .OpenBSD {
when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
fpos_t :: distinct i64
_IOFBF :: 0
@@ -102,10 +102,12 @@ when ODIN_OS == .OpenBSD {
SEEK_END :: 2
foreign libc {
stderr: ^FILE
stdin: ^FILE
stdout: ^FILE
__sF: [3]FILE
}
stdin: ^FILE = &__sF[0]
stdout: ^FILE = &__sF[1]
stderr: ^FILE = &__sF[2]
}
when ODIN_OS == .FreeBSD {
@@ -127,9 +129,9 @@ when ODIN_OS == .FreeBSD {
SEEK_END :: 2
foreign libc {
stderr: ^FILE
stdin: ^FILE
stdout: ^FILE
@(link_name="__stderrp") stderr: ^FILE
@(link_name="__stdinp") stdin: ^FILE
@(link_name="__stdoutp") stdout: ^FILE
}
}

View File

@@ -45,7 +45,7 @@ when ODIN_OS == .Windows {
}
}
when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD || ODIN_OS == .Haiku {
when ODIN_OS == .Linux || ODIN_OS == .FreeBSD || ODIN_OS == .Darwin || ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD || ODIN_OS == .Haiku {
@(default_calling_convention="c")
foreign libc {
// 7.27.2 Time manipulation functions

View File

@@ -22,7 +22,7 @@ when ODIN_OS == .Windows {
wctrans_t :: distinct int
wctype_t :: distinct u32
} else when ODIN_OS == .OpenBSD {
} else when ODIN_OS == .OpenBSD || ODIN_OS == .NetBSD {
wctrans_t :: distinct rawptr
wctype_t :: distinct rawptr

View File

@@ -34,13 +34,13 @@ COMPRESS_OUTPUT_ALLOCATE_MIN :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MIN, 1 << 2
*/
when size_of(uintptr) == 8 {
// For 64-bit platforms, we set the default max buffer size to 4 GiB,
// which is GZIP and PKZIP's max payload size.
// For 64-bit platforms, we set the default max buffer size to 4 GiB,
// which is GZIP and PKZIP's max payload size.
COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 32))
} else {
// For 32-bit platforms, we set the default max buffer size to 512 MiB.
COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29))
COMPRESS_OUTPUT_ALLOCATE_MAX :: int(#config(COMPRESS_OUTPUT_ALLOCATE_MAX, 1 << 29))
}
@@ -186,7 +186,7 @@ input_size_from_stream :: proc(z: ^Context_Stream_Input) -> (res: i64, err: Erro
input_size :: proc{input_size_from_memory, input_size_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int) -> (res: []u8, err: io.Error) {
#no_bounds_check {
if len(z.input_data) >= size {
@@ -203,7 +203,7 @@ read_slice_from_memory :: #force_inline proc(z: ^Context_Memory_Input, size: int
}
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int) -> (res: []u8, err: io.Error) {
// TODO: REMOVE ALL USE OF context.temp_allocator here
// there is literally no need for it
@@ -214,13 +214,13 @@ read_slice_from_stream :: #force_inline proc(z: ^Context_Stream_Input, size: int
read_slice :: proc{read_slice_from_memory, read_slice_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_data :: #force_inline proc(z: ^$C, $T: typeid) -> (res: T, err: io.Error) {
b := read_slice(z, size_of(T)) or_return
return (^T)(&b[0])^, nil
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8, err: io.Error) {
#no_bounds_check {
if len(z.input_data) >= 1 {
@@ -232,7 +232,7 @@ read_u8_from_memory :: #force_inline proc(z: ^Context_Memory_Input) -> (res: u8,
return 0, .EOF
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_u8_from_stream :: #force_inline proc(z: ^Context_Stream_Input) -> (res: u8, err: io.Error) {
b := read_slice_from_stream(z, 1) or_return
return b[0], nil
@@ -242,7 +242,7 @@ read_u8 :: proc{read_u8_from_memory, read_u8_from_stream}
// You would typically only use this at the end of Inflate, to drain bits from the code buffer
// preferentially.
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: io.Error) {
if z.num_bits >= 8 {
res = u8(read_bits_no_refill_lsb(z, 8))
@@ -257,7 +257,7 @@ read_u8_prefer_code_buffer_lsb :: #force_inline proc(z: ^$C) -> (res: u8, err: i
return
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid) -> (res: T, err: io.Error) {
size :: size_of(T)
@@ -275,7 +275,7 @@ peek_data_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid
}
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
size :: size_of(T)
@@ -293,7 +293,7 @@ peek_data_at_offset_from_memory :: #force_inline proc(z: ^Context_Memory_Input,
}
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid) -> (res: T, err: io.Error) {
size :: size_of(T)
@@ -317,7 +317,7 @@ peek_data_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid
return res, .None
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_data_at_offset_from_stream :: #force_inline proc(z: ^Context_Stream_Input, $T: typeid, #any_int offset: int) -> (res: T, err: io.Error) {
size :: size_of(T)
@@ -352,14 +352,14 @@ peek_data :: proc{peek_data_from_memory, peek_data_from_stream, peek_data_at_off
// Sliding window read back
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_back_byte :: #force_inline proc(z: ^$C, offset: i64) -> (res: u8, err: io.Error) {
// Look back into the sliding window.
return z.output.buf[z.bytes_written - offset], .None
}
// Generalized bit reader LSB
@(optimization_mode="speed")
@(optimization_mode="favor_size")
refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width := i8(48)) {
refill := u64(width)
b := u64(0)
@@ -385,7 +385,7 @@ refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width :=
}
// Generalized bit reader LSB
@(optimization_mode="speed")
@(optimization_mode="favor_size")
refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
refill := u64(width)
@@ -414,13 +414,13 @@ refill_lsb_from_stream :: proc(z: ^Context_Stream_Input, width := i8(24)) {
refill_lsb :: proc{refill_lsb_from_memory, refill_lsb_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
consume_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) {
z.code_buffer >>= width
z.num_bits -= u64(width)
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) {
z.code_buffer >>= width
z.num_bits -= u64(width)
@@ -428,7 +428,7 @@ consume_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, wid
consume_bits_lsb :: proc{consume_bits_lsb_from_memory, consume_bits_lsb_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
if z.num_bits < u64(width) {
refill_lsb(z)
@@ -436,7 +436,7 @@ peek_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width:
return u32(z.code_buffer &~ (~u64(0) << width))
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
if z.num_bits < u64(width) {
refill_lsb(z)
@@ -446,13 +446,13 @@ peek_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width:
peek_bits_lsb :: proc{peek_bits_lsb_from_memory, peek_bits_lsb_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
assert(z.num_bits >= u64(width))
return u32(z.code_buffer &~ (~u64(0) << width))
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
assert(z.num_bits >= u64(width))
return u32(z.code_buffer &~ (~u64(0) << width))
@@ -460,14 +460,14 @@ peek_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Inp
peek_bits_no_refill_lsb :: proc{peek_bits_no_refill_lsb_from_memory, peek_bits_no_refill_lsb_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_bits_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
k := #force_inline peek_bits_lsb(z, width)
#force_inline consume_bits_lsb(z, width)
return k
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
k := peek_bits_lsb(z, width)
consume_bits_lsb(z, width)
@@ -476,14 +476,14 @@ read_bits_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width:
read_bits_lsb :: proc{read_bits_lsb_from_memory, read_bits_lsb_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_bits_no_refill_lsb_from_memory :: #force_inline proc(z: ^Context_Memory_Input, width: u8) -> u32 {
k := #force_inline peek_bits_no_refill_lsb(z, width)
#force_inline consume_bits_lsb(z, width)
return k
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Input, width: u8) -> u32 {
k := peek_bits_no_refill_lsb(z, width)
consume_bits_lsb(z, width)
@@ -493,14 +493,14 @@ read_bits_no_refill_lsb_from_stream :: #force_inline proc(z: ^Context_Stream_Inp
read_bits_no_refill_lsb :: proc{read_bits_no_refill_lsb_from_memory, read_bits_no_refill_lsb_from_stream}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
discard_to_next_byte_lsb_from_memory :: proc(z: ^Context_Memory_Input) {
discard := u8(z.num_bits & 7)
#force_inline consume_bits_lsb(z, discard)
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
discard_to_next_byte_lsb_from_stream :: proc(z: ^Context_Stream_Input) {
discard := u8(z.num_bits & 7)
consume_bits_lsb(z, discard)

View File

@@ -98,7 +98,7 @@ decompress_slice_to_output_buffer :: proc(input: []u8, output: []u8, model := DE
validate_model(model) or_return
for inp < inp_end {
val := transmute(i8)input[inp]
val := i8(input[inp])
mark := int(-1)
for val < 0 {
@@ -274,12 +274,9 @@ compress_string_to_buffer :: proc(input: string, output: []u8, model := DEFAULT_
out_ptr := raw_data(output[out:])
switch pack.bytes_packed {
case 4:
intrinsics.unaligned_store(transmute(^u32)out_ptr, code)
case 2:
intrinsics.unaligned_store(transmute(^u16)out_ptr, u16(code))
case 1:
intrinsics.unaligned_store(transmute(^u8)out_ptr, u8(code))
case 4: intrinsics.unaligned_store((^u32)(out_ptr), code)
case 2: intrinsics.unaligned_store((^u16)(out_ptr), u16(code))
case 1: intrinsics.unaligned_store( (^u8)(out_ptr), u8(code))
case:
return out, .Unknown_Compression_Method
}

View File

@@ -120,7 +120,7 @@ Huffman_Table :: struct {
}
// Implementation starts here
@(optimization_mode="speed")
@(optimization_mode="favor_size")
z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
assert(bits <= 16)
// NOTE: Can optimize with llvm.bitreverse.i64 or some bit twiddling
@@ -136,7 +136,7 @@ z_bit_reverse :: #force_inline proc(n: u16, bits: u8) -> (r: u16) {
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
/*
That we get here at all means that we didn't pass an expected output size,
@@ -154,7 +154,7 @@ grow_buffer :: proc(buf: ^[dynamic]u8) -> (err: compress.Error) {
TODO: Make these return compress.Error.
*/
@(optimization_mode="speed")
@(optimization_mode="favor_size")
write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_check {
/*
Resize if needed.
@@ -173,7 +173,7 @@ write_byte :: #force_inline proc(z: ^$C, c: u8) -> (err: io.Error) #no_bounds_ch
return .None
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) #no_bounds_check {
/*
TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
@@ -201,7 +201,7 @@ repl_byte :: proc(z: ^$C, count: u16, c: u8) -> (err: io.Error) #no_bounds_check
return .None
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
repl_bytes :: proc(z: ^$C, count: u16, distance: u16) -> (err: io.Error) {
/*
TODO(Jeroen): Once we have a magic ring buffer, we can just peek/write into it
@@ -234,8 +234,8 @@ allocate_huffman_table :: proc(allocator := context.allocator) -> (z: ^Huffman_T
return new(Huffman_Table, allocator), nil
}
@(optimization_mode="speed")
build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
@(optimization_mode="favor_size")
build_huffman :: #force_no_inline proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
sizes: [HUFFMAN_MAX_BITS+1]int
next_code: [HUFFMAN_MAX_BITS+1]int
@@ -293,7 +293,7 @@ build_huffman :: proc(z: ^Huffman_Table, code_lengths: []u8) -> (err: Error) {
return nil
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
code := u16(compress.peek_bits_lsb(z,16))
@@ -324,7 +324,7 @@ decode_huffman_slowpath :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Erro
return r, nil
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bounds_check {
if z.num_bits < 16 {
if z.num_bits > 63 {
@@ -344,7 +344,7 @@ decode_huffman :: proc(z: ^$C, t: ^Huffman_Table) -> (r: u16, err: Error) #no_bo
return decode_huffman_slowpath(z, t)
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err: Error) #no_bounds_check {
#no_bounds_check for {
value, e := decode_huffman(z, z_repeat)
@@ -413,7 +413,7 @@ parse_huffman_block :: proc(z: ^$C, z_repeat, z_offset: ^Huffman_Table) -> (err:
}
}
@(optimization_mode="speed")
@(optimization_mode="favor_size")
inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := false, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
/*
ctx.output must be a bytes.Buffer for now. We'll add a separate implementation that writes to a stream.
@@ -486,7 +486,7 @@ inflate_from_context :: proc(using ctx: ^compress.Context_Memory_Input, raw := f
// TODO: Check alignment of reserve/resize.
@(optimization_mode="speed")
@(optimization_mode="favor_size")
inflate_raw :: proc(z: ^$C, expected_output_size := -1, allocator := context.allocator) -> (err: Error) #no_bounds_check {
context.allocator = allocator
expected_output_size := expected_output_size
@@ -670,4 +670,4 @@ inflate_from_byte_array_raw :: proc(input: []u8, buf: ^bytes.Buffer, raw := fals
return inflate_raw(&ctx, expected_output_size=expected_output_size)
}
inflate :: proc{inflate_from_context, inflate_from_byte_array}
inflate :: proc{inflate_from_context, inflate_from_byte_array}

View File

@@ -5,13 +5,10 @@ The implementation is non-intrusive, and non-recursive.
*/
package container_avl
import "base:intrinsics"
import "base:runtime"
@(require) import "base:intrinsics"
@(require) import "base:runtime"
import "core:slice"
_ :: intrinsics
_ :: runtime
// Originally based on the CC0 implementation by Eric Biggers
// See: https://github.com/ebiggers/avl_tree/
@@ -90,7 +87,7 @@ init_cmp :: proc(
init_ordered :: proc(
t: ^$T/Tree($Value),
node_allocator := context.allocator,
) where intrinsics.type_is_ordered_numeric(Value) {
) where intrinsics.type_is_ordered(Value) {
init_cmp(t, slice.cmp_proc(Value), node_allocator)
}
@@ -675,4 +672,4 @@ iterator_first :: proc "contextless" (it: ^Iterator($Value)) {
if it._cur != nil {
it._next = node_next_or_prev_in_order(it._cur, it._direction)
}
}
}

View File

@@ -210,8 +210,11 @@ set :: proc(ba: ^Bit_Array, #any_int index: uint, set_to: bool = true, allocator
ba.max_index = max(idx, ba.max_index)
if set_to{ ba.bits[leg_index] |= 1 << uint(bit_index) }
else { ba.bits[leg_index] &= ~(1 << uint(bit_index)) }
if set_to {
ba.bits[leg_index] |= 1 << uint(bit_index)
} else {
ba.bits[leg_index] &~= 1 << uint(bit_index)
}
return true
}
@@ -253,7 +256,7 @@ Inputs:
- index: Which bit in the array
*/
unsafe_unset :: proc(b: ^Bit_Array, bit: int) #no_bounds_check {
b.bits[bit >> INDEX_SHIFT] &= ~(1 << uint(bit & INDEX_MASK))
b.bits[bit >> INDEX_SHIFT] &~= 1 << uint(bit & INDEX_MASK)
}
/*
A helper function to create a Bit Array with optional bias, in case your smallest index is non-zero (including negative).

View File

@@ -0,0 +1,46 @@
/*
Package list implements an intrusive doubly-linked list.
An intrusive container requires a `Node` to be embedded in your own structure, like this:
My_String :: struct {
node: list.Node,
value: string,
}
Embedding the members of a `list.Node` in your structure with the `using` keyword is also allowed:
My_String :: struct {
using node: list.Node,
value: string,
}
Here is a full example:
package test
import "core:fmt"
import "core:container/intrusive/list"
main :: proc() {
l: list.List
one := My_String{value="Hello"}
two := My_String{value="World"}
list.push_back(&l, &one.node)
list.push_back(&l, &two.node)
iter := list.iterator_head(l, My_String, "node")
for s in list.iterate_next(&iter) {
fmt.println(s.value)
}
}
My_String :: struct {
node: list.Node,
value: string,
}
*/
package container_intrusive_list

View File

@@ -18,11 +18,18 @@ List :: struct {
tail: ^Node,
}
// The list link you must include in your own structure.
Node :: struct {
prev, next: ^Node,
}
/*
Inserts a new element at the front of the list with O(1) time complexity.
**Inputs**
- list: The container list
- node: The node member of the user-defined element structure
*/
push_front :: proc "contextless" (list: ^List, node: ^Node) {
if list.head != nil {
list.head.prev = node
@@ -33,7 +40,13 @@ push_front :: proc "contextless" (list: ^List, node: ^Node) {
node.prev, node.next = nil, nil
}
}
/*
Inserts a new element at the back of the list with O(1) time complexity.
**Inputs**
- list: The container list
- node: The node member of the user-defined element structure
*/
push_back :: proc "contextless" (list: ^List, node: ^Node) {
if list.tail != nil {
list.tail.next = node
@@ -45,6 +58,13 @@ push_back :: proc "contextless" (list: ^List, node: ^Node) {
}
}
/*
Removes an element from a list with O(1) time complexity.
**Inputs**
- list: The container list
- node: The node member of the user-defined element structure to be removed
*/
remove :: proc "contextless" (list: ^List, node: ^Node) {
if node != nil {
if node.next != nil {
@@ -61,7 +81,13 @@ remove :: proc "contextless" (list: ^List, node: ^Node) {
}
}
}
/*
Removes from the given list all elements that satisfy a condition with O(N) time complexity.
**Inputs**
- list: The container list
- to_erase: The condition procedure. It should return `true` if a node should be removed, `false` otherwise
*/
remove_by_proc :: proc(list: ^List, to_erase: proc(^Node) -> bool) {
for node := list.head; node != nil; {
next := node.next
@@ -82,7 +108,13 @@ remove_by_proc :: proc(list: ^List, to_erase: proc(^Node) -> bool) {
node = next
}
}
/*
Removes from the given list all elements that satisfy a condition with O(N) time complexity.
**Inputs**
- list: The container list
- to_erase: The _contextless_ condition procedure. It should return `true` if a node should be removed, `false` otherwise
*/
remove_by_proc_contextless :: proc(list: ^List, to_erase: proc "contextless" (^Node) -> bool) {
for node := list.head; node != nil; {
next := node.next
@@ -104,12 +136,26 @@ remove_by_proc_contextless :: proc(list: ^List, to_erase: proc "contextless" (^N
}
}
/*
Checks whether the given list does not contain any element.
**Inputs**
- list: The container list
**Returns** `true` if `list` is empty, `false` otherwise
*/
is_empty :: proc "contextless" (list: ^List) -> bool {
return list.head == nil
}
/*
Removes and returns the element at the front of the list with O(1) time complexity.
**Inputs**
- list: The container list
**Returns** The node member of the user-defined element structure, or `nil` if the list is empty
*/
pop_front :: proc "contextless" (list: ^List) -> ^Node {
link := list.head
if link == nil {
@@ -130,6 +176,14 @@ pop_front :: proc "contextless" (list: ^List) -> ^Node {
return link
}
/*
Removes and returns the element at the back of the list with O(1) time complexity.
**Inputs**
- list: The container list
**Returns** The node member of the user-defined element structure, or `nil` if the list is empty
*/
pop_back :: proc "contextless" (list: ^List) -> ^Node {
link := list.tail
if link == nil {
@@ -151,29 +205,102 @@ pop_back :: proc "contextless" (list: ^List) -> ^Node {
}
Iterator :: struct($T: typeid) {
curr: ^Node,
offset: uintptr,
}
/*
Creates an iterator pointing at the head of the given list. For an example, see `iterate_next`.
**Inputs**
- list: The container list
- T: The type of the list's elements
- field_name: The name of the node field in the `T` structure
**Returns** An iterator pointing at the head of `list`
*/
iterator_head :: proc "contextless" (list: List, $T: typeid, $field_name: string) -> Iterator(T)
where intrinsics.type_has_field(T, field_name),
intrinsics.type_field_type(T, field_name) == Node {
return {list.head, offset_of_by_string(T, field_name)}
}
/*
Creates an iterator pointing at the tail of the given list. For an example, see `iterate_prev`.
**Inputs**
- list: The container list
- T: The type of the list's elements
- field_name: The name of the node field in the `T` structure
**Returns** An iterator pointing at the tail of `list`
*/
iterator_tail :: proc "contextless" (list: List, $T: typeid, $field_name: string) -> Iterator(T)
where intrinsics.type_has_field(T, field_name),
intrinsics.type_field_type(T, field_name) == Node {
return {list.tail, offset_of_by_string(T, field_name)}
}
/*
Creates an iterator pointing at the specified node of a list.
**Inputs**
- node: a list node
- T: The type of the list's elements
- field_name: The name of the node field in the `T` structure
**Returns** An iterator pointing at `node`
*/
iterator_from_node :: proc "contextless" (node: ^Node, $T: typeid, $field_name: string) -> Iterator(T)
where intrinsics.type_has_field(T, field_name),
intrinsics.type_field_type(T, field_name) == Node {
return {node, offset_of_by_string(T, field_name)}
}
/*
Retrieves the next element in a list and advances the iterator.
**Inputs**
- it: The iterator
**Returns**
- ptr: The next list element
- ok: `true` if the element is valid (the iterator could advance), `false` otherwise
Example:
import "core:fmt"
import "core:container/intrusive/list"
iterate_next_example :: proc() {
l: list.List
one := My_Struct{value=1}
two := My_Struct{value=2}
list.push_back(&l, &one.node)
list.push_back(&l, &two.node)
it := list.iterator_head(l, My_Struct, "node")
for num in list.iterate_next(&it) {
fmt.println(num.value)
}
}
My_Struct :: struct {
node : list.Node,
value: int,
}
Output:
1
2
*/
iterate_next :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
node := it.curr
if node == nil {
@@ -183,7 +310,47 @@ iterate_next :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
return (^T)(uintptr(node) - it.offset), true
}
/*
Retrieves the previous element in a list and recede the iterator.
**Inputs**
- it: The iterator
**Returns**
- ptr: The previous list element
- ok: `true` if the element is valid (the iterator could recede), `false` otherwise
Example:
import "core:fmt"
import "core:container/intrusive/list"
iterate_next_example :: proc() {
l: list.List
one := My_Struct{value=1}
two := My_Struct{value=2}
list.push_back(&l, &one.node)
list.push_back(&l, &two.node)
it := list.iterator_tail(l, My_Struct, "node")
for num in list.iterate_prev(&it) {
fmt.println(num.value)
}
}
My_Struct :: struct {
node : list.Node,
value: int,
}
Output:
2
1
*/
iterate_prev :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
node := it.curr
if node == nil {
@@ -192,4 +359,4 @@ iterate_prev :: proc "contextless" (it: ^Iterator($T)) -> (ptr: ^T, ok: bool) {
it.curr = node.prev
return (^T)(uintptr(node) - it.offset), true
}
}

View File

@@ -70,8 +70,7 @@ set :: proc(c: ^$C/Cache($Key, $Value), key: Key, value: Value) -> runtime.Alloc
if c.count == c.capacity {
e = c.tail
_remove_node(c, e)
}
else {
} else {
c.count += 1
e = new(Node(Key, Value), c.node_allocator) or_return
}

View File

@@ -95,11 +95,11 @@ front_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
}
back :: proc(q: ^$Q/Queue($T)) -> T {
idx := (q.offset+uint(q.len))%builtin.len(q.data)
idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
return q.data[idx]
}
back_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
idx := (q.offset+uint(q.len))%builtin.len(q.data)
idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
return &q.data[idx]
}
@@ -189,7 +189,7 @@ pop_front_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
return
}
// Push multiple elements to the front of the queue
// Push multiple elements to the back of the queue
push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T) -> (ok: bool, err: runtime.Allocator_Error) {
n := uint(builtin.len(elems))
if space(q^) < int(n) {
@@ -241,7 +241,7 @@ clear :: proc(q: ^$Q/Queue($T)) {
}
// Internal growinh procedure
// Internal growing procedure
_grow :: proc(q: ^$Q/Queue($T), min_capacity: uint = 0) -> runtime.Allocator_Error {
new_capacity := max(min_capacity, uint(8), uint(builtin.len(q.data))*2)
n := uint(builtin.len(q.data))

View File

@@ -0,0 +1,568 @@
// This package implements a red-black tree
package container_rbtree
@(require) import "base:intrinsics"
@(require) import "base:runtime"
import "core:slice"
// Originally based on the CC0 implementation from literateprograms.org
// But with API design mimicking `core:container/avl` for ease of use.
// Direction specifies the traversal direction for a tree iterator.
Direction :: enum i8 {
// Backward is the in-order backwards direction.
Backward = -1,
// Forward is the in-order forwards direction.
Forward = 1,
}
Ordering :: slice.Ordering
// Tree is a red-black tree
Tree :: struct($Key: typeid, $Value: typeid) {
// user_data is a parameter that will be passed to the on_remove
// callback.
user_data: rawptr,
// on_remove is an optional callback that can be called immediately
// after a node is removed from the tree.
on_remove: proc(key: Key, value: Value, user_data: rawptr),
_root: ^Node(Key, Value),
_node_allocator: runtime.Allocator,
_cmp_fn: proc(Key, Key) -> Ordering,
_size: int,
}
// Node is a red-black tree node.
//
// WARNING: It is unsafe to mutate value if the node is part of a tree
// if doing so will alter the Node's sort position relative to other
// elements in the tree.
Node :: struct($Key: typeid, $Value: typeid) {
key: Key,
value: Value,
_parent: ^Node(Key, Value),
_left: ^Node(Key, Value),
_right: ^Node(Key, Value),
_color: Color,
}
// Might store this in the node pointer in the future, but that'll require a decent amount of rework to pass ^^N instead of ^N
Color :: enum uintptr {Black = 0, Red = 1}
// Iterator is a tree iterator.
//
// WARNING: It is unsafe to modify the tree while iterating, except via
// the iterator_remove method.
Iterator :: struct($Key: typeid, $Value: typeid) {
_tree: ^Tree(Key, Value),
_cur: ^Node(Key, Value),
_next: ^Node(Key, Value),
_direction: Direction,
_called_next: bool,
}
// init initializes a tree.
init :: proc {
init_ordered,
init_cmp,
}
// init_cmp initializes a tree.
init_cmp :: proc(t: ^$T/Tree($Key, $Value), cmp_fn: proc(a, b: Key) -> Ordering, node_allocator := context.allocator) {
t._root = nil
t._node_allocator = node_allocator
t._cmp_fn = cmp_fn
t._size = 0
}
// init_ordered initializes a tree containing ordered keys, with
// a comparison function that results in an ascending order sort.
init_ordered :: proc(t: ^$T/Tree($Key, $Value), node_allocator := context.allocator) where intrinsics.type_is_ordered(Key) {
init_cmp(t, slice.cmp_proc(Key), node_allocator)
}
// destroy de-initializes a tree.
destroy :: proc(t: ^$T/Tree($Key, $Value), call_on_remove: bool = true) {
iter := iterator(t, .Forward)
for _ in iterator_next(&iter) {
iterator_remove(&iter, call_on_remove)
}
}
len :: proc "contextless" (t: ^$T/Tree($Key, $Value)) -> (node_count: int) {
return t._size
}
// first returns the first node in the tree (in-order) or nil iff
// the tree is empty.
first :: proc "contextless" (t: ^$T/Tree($Key, $Value)) -> ^Node(Key, Value) {
return tree_first_or_last_in_order(t, Direction.Backward)
}
// last returns the last element in the tree (in-order) or nil iff
// the tree is empty.
last :: proc "contextless" (t: ^$T/Tree($Key, $Value)) -> ^Node(Key, Value) {
return tree_first_or_last_in_order(t, Direction.Forward)
}
// find finds the key in the tree, and returns the corresponding node, or nil iff the value is not present.
find :: proc(t: ^$T/Tree($Key, $Value), key: Key) -> (node: ^Node(Key, Value)) {
node = t._root
for node != nil {
switch t._cmp_fn(key, node.key) {
case .Equal: return node
case .Less: node = node._left
case .Greater: node = node._right
}
}
return node
}
// find_value finds the key in the tree, and returns the corresponding value, or nil iff the value is not present.
find_value :: proc(t: ^$T/Tree($Key, $Value), key: Key) -> (value: Value, ok: bool) #optional_ok {
if n := find(t, key); n != nil {
return n.value, true
}
return
}
// find_or_insert attempts to insert the value into the tree, and returns
// the node, a boolean indicating if the value was inserted, and the
// node allocator error if relevant. If the value is already present, the existing node is updated.
find_or_insert :: proc(t: ^$T/Tree($Key, $Value), key: Key, value: Value) -> (n: ^Node(Key, Value), inserted: bool, err: runtime.Allocator_Error) {
n_ptr := &t._root
for n_ptr^ != nil {
n = n_ptr^
switch t._cmp_fn(key, n.key) {
case .Less:
n_ptr = &n._left
case .Greater:
n_ptr = &n._right
case .Equal:
return
}
}
_parent := n
n = new_clone(Node(Key, Value){key=key, value=value, _parent=_parent, _color=.Red}, t._node_allocator) or_return
n_ptr^ = n
insert_case1(t, n)
t._size += 1
return n, true, nil
}
// remove removes a node or value from the tree, and returns true iff the
// removal was successful. While the node's value will be left intact,
// the node itself will be freed via the tree's node allocator.
remove :: proc {
remove_key,
remove_node,
}
// remove_value removes a value from the tree, and returns true iff the
// removal was successful. While the node's key + value will be left intact,
// the node itself will be freed via the tree's node allocator.
remove_key :: proc(t: ^$T/Tree($Key, $Value), key: Key, call_on_remove := true) -> bool {
n := find(t, key)
if n == nil {
return false // Key not found, nothing to do
}
return remove_node(t, n, call_on_remove)
}
// remove_node removes a node from the tree, and returns true iff the
// removal was successful. While the node's key + value will be left intact,
// the node itself will be freed via the tree's node allocator.
remove_node :: proc(t: ^$T/Tree($Key, $Value), node: ^$N/Node(Key, Value), call_on_remove := true) -> (found: bool) {
if node._parent == node || (node._parent == nil && t._root != node) {
return false // Don't touch self-parented or dangling nodes.
}
node := node
if node._left != nil && node._right != nil {
// Copy key + value from predecessor and delete it instead
predecessor := maximum_node(node._left)
node.key = predecessor.key
node.value = predecessor.value
node = predecessor
}
child := node._right == nil ? node._left : node._right
if node_color(node) == .Black {
node._color = node_color(child)
remove_case1(t, node)
}
replace_node(t, node, child)
if node._parent == nil && child != nil {
child._color = .Black // root should be black
}
if call_on_remove && t.on_remove != nil {
t.on_remove(node.key, node.value, t.user_data)
}
free(node, t._node_allocator)
t._size -= 1
return true
}
// iterator returns a tree iterator in the specified direction.
iterator :: proc "contextless" (t: ^$T/Tree($Key, $Value), direction: Direction) -> Iterator(Key, Value) {
it: Iterator(Key, Value)
it._tree = cast(^Tree(Key, Value))t
it._direction = direction
iterator_first(&it)
return it
}
// iterator_from_pos returns a tree iterator in the specified direction,
// spanning the range [pos, last] (inclusive).
iterator_from_pos :: proc "contextless" (t: ^$T/Tree($Key, $Value), pos: ^Node(Key, Value), direction: Direction) -> Iterator(Key, Value) {
it: Iterator(Key, Value)
it._tree = transmute(^Tree(Key, Value))t
it._direction = direction
it._next = nil
it._called_next = false
if it._cur = pos; pos != nil {
it._next = node_next_or_prev_in_order(it._cur, it._direction)
}
return it
}
// iterator_get returns the node currently pointed to by the iterator,
// or nil iff the node has been removed, the tree is empty, or the end
// of the tree has been reached.
iterator_get :: proc "contextless" (it: ^$I/Iterator($Key, $Value)) -> ^Node(Key, Value) {
return it._cur
}
// iterator_remove removes the node currently pointed to by the iterator,
// and returns true iff the removal was successful. Semantics are the
// same as the Tree remove.
iterator_remove :: proc(it: ^$I/Iterator($Key, $Value), call_on_remove: bool = true) -> bool {
if it._cur == nil {
return false
}
ok := remove_node(it._tree, it._cur , call_on_remove)
if ok {
it._cur = nil
}
return ok
}
// iterator_next advances the iterator and returns the (node, true) or
// or (nil, false) iff the end of the tree has been reached.
//
// Note: The first call to iterator_next will return the first node instead
// of advancing the iterator.
iterator_next :: proc "contextless" (it: ^$I/Iterator($Key, $Value)) -> (^Node(Key, Value), bool) {
// This check is needed so that the first element gets returned from
// a brand-new iterator, and so that the somewhat contrived case where
// iterator_remove is called before the first call to iterator_next
// returns the correct value.
if !it._called_next {
it._called_next = true
// There can be the contrived case where iterator_remove is
// called before ever calling iterator_next, which needs to be
// handled as an actual call to next.
//
// If this happens it._cur will be nil, so only return the
// first value, if it._cur is valid.
if it._cur != nil {
return it._cur, true
}
}
if it._next == nil {
return nil, false
}
it._cur = it._next
it._next = node_next_or_prev_in_order(it._cur, it._direction)
return it._cur, true
}
@(private)
tree_first_or_last_in_order :: proc "contextless" (t: ^$T/Tree($Key, $Value), direction: Direction) -> ^Node(Key, Value) {
first, sign := t._root, i8(direction)
if first != nil {
for {
tmp := node_get_child(first, sign)
if tmp == nil {
break
}
first = tmp
}
}
return first
}
@(private)
node_get_child :: #force_inline proc "contextless" (n: ^Node($Key, $Value), sign: i8) -> ^Node(Key, Value) {
if sign < 0 {
return n._left
}
return n._right
}
@(private)
node_next_or_prev_in_order :: proc "contextless" (n: ^Node($Key, $Value), direction: Direction) -> ^Node(Key, Value) {
next, tmp: ^Node(Key, Value)
sign := i8(direction)
if next = node_get_child(n, +sign); next != nil {
for {
tmp = node_get_child(next, -sign)
if tmp == nil {
break
}
next = tmp
}
} else {
tmp, next = n, n._parent
for next != nil && tmp == node_get_child(next, +sign) {
tmp, next = next, next._parent
}
}
return next
}
@(private)
iterator_first :: proc "contextless" (it: ^Iterator($Key, $Value)) {
// This is private because behavior when the user manually calls
// iterator_first followed by iterator_next is unintuitive, since
// the first call to iterator_next MUST return the first node
// instead of advancing so that `for node in iterator_next(&next)`
// works as expected.
switch it._direction {
case .Forward:
it._cur = tree_first_or_last_in_order(it._tree, .Backward)
case .Backward:
it._cur = tree_first_or_last_in_order(it._tree, .Forward)
}
it._next = nil
it._called_next = false
if it._cur != nil {
it._next = node_next_or_prev_in_order(it._cur, it._direction)
}
}
@(private)
grand_parent :: proc(n: ^$N/Node($Key, $Value)) -> (g: ^N) {
return n._parent._parent
}
@(private)
sibling :: proc(n: ^$N/Node($Key, $Value)) -> (s: ^N) {
if n == n._parent._left {
return n._parent._right
} else {
return n._parent._left
}
}
@(private)
uncle :: proc(n: ^$N/Node($Key, $Value)) -> (u: ^N) {
return sibling(n._parent)
}
@(private)
rotate__left :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
r := n._right
replace_node(t, n, r)
n._right = r._left
if r._left != nil {
r._left._parent = n
}
r._left = n
n._parent = r
}
@(private)
rotate__right :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
l := n._left
replace_node(t, n, l)
n._left = l._right
if l._right != nil {
l._right._parent = n
}
l._right = n
n._parent = l
}
@(private)
replace_node :: proc(t: ^$T/Tree($Key, $Value), old_n: ^$N/Node(Key, Value), new_n: ^N) {
if old_n._parent == nil {
t._root = new_n
} else {
if (old_n == old_n._parent._left) {
old_n._parent._left = new_n
} else {
old_n._parent._right = new_n
}
}
if new_n != nil {
new_n._parent = old_n._parent
}
}
@(private)
insert_case1 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if n._parent == nil {
n._color = .Black
} else {
insert_case2(t, n)
}
}
@(private)
insert_case2 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if node_color(n._parent) == .Black {
return // Tree is still valid
} else {
insert_case3(t, n)
}
}
@(private)
insert_case3 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if node_color(uncle(n)) == .Red {
n._parent._color = .Black
uncle(n)._color = .Black
grand_parent(n)._color = .Red
insert_case1(t, grand_parent(n))
} else {
insert_case4(t, n)
}
}
@(private)
insert_case4 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
n := n
if n == n._parent._right && n._parent == grand_parent(n)._left {
rotate__left(t, n._parent)
n = n._left
} else if n == n._parent._left && n._parent == grand_parent(n)._right {
rotate__right(t, n._parent)
n = n._right
}
insert_case5(t, n)
}
@(private)
insert_case5 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
n._parent._color = .Black
grand_parent(n)._color = .Red
if n == n._parent._left && n._parent == grand_parent(n)._left {
rotate__right(t, grand_parent(n))
} else {
rotate__left(t, grand_parent(n))
}
}
// The maximum_node() helper function just walks _right until it reaches the last non-leaf:
@(private)
maximum_node :: proc(n: ^$N/Node($Key, $Value)) -> (max_node: ^N) {
n := n
for n._right != nil {
n = n._right
}
return n
}
@(private)
remove_case1 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if n._parent == nil {
return
} else {
remove_case2(t, n)
}
}
@(private)
remove_case2 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if node_color(sibling(n)) == .Red {
n._parent._color = .Red
sibling(n)._color = .Black
if n == n._parent._left {
rotate__left(t, n._parent)
} else {
rotate__right(t, n._parent)
}
}
remove_case3(t, n)
}
@(private)
remove_case3 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if node_color(n._parent) == .Black &&
node_color(sibling(n)) == .Black &&
node_color(sibling(n)._left) == .Black &&
node_color(sibling(n)._right) == .Black {
sibling(n)._color = .Red
remove_case1(t, n._parent)
} else {
remove_case4(t, n)
}
}
@(private)
remove_case4 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if node_color(n._parent) == .Red &&
node_color(sibling(n)) == .Black &&
node_color(sibling(n)._left) == .Black &&
node_color(sibling(n)._right) == .Black {
sibling(n)._color = .Red
n._parent._color = .Black
} else {
remove_case5(t, n)
}
}
@(private)
remove_case5 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
if n == n._parent._left &&
node_color(sibling(n)) == .Black &&
node_color(sibling(n)._left) == .Red &&
node_color(sibling(n)._right) == .Black {
sibling(n)._color = .Red
sibling(n)._left._color = .Black
rotate__right(t, sibling(n))
} else if n == n._parent._right &&
node_color(sibling(n)) == .Black &&
node_color(sibling(n)._right) == .Red &&
node_color(sibling(n)._left) == .Black {
sibling(n)._color = .Red
sibling(n)._right._color = .Black
rotate__left(t, sibling(n))
}
remove_case6(t, n)
}
@(private)
remove_case6 :: proc(t: ^$T/Tree($Key, $Value), n: ^$N/Node(Key, Value)) {
sibling(n)._color = node_color(n._parent)
n._parent._color = .Black
if n == n._parent._left {
sibling(n)._right._color = .Black
rotate__left(t, n._parent)
} else {
sibling(n)._left._color = .Black
rotate__right(t, n._parent)
}
}
node_color :: proc(n: ^$N/Node($Key, $Value)) -> (c: Color) {
return n == nil ? .Black : n._color
}

View File

@@ -119,20 +119,20 @@ consume :: proc "odin" (a: ^$A/Small_Array($N, $T), count: int, loc := #caller_l
}
ordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
runtime.bounds_check_error_loc(loc, index, a.len)
if index+1 < a.len {
runtime.bounds_check_error_loc(loc, index, a.len)
if index+1 < a.len {
copy(a.data[index:], a.data[index+1:])
}
a.len -= 1
}
unordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
runtime.bounds_check_error_loc(loc, index, a.len)
runtime.bounds_check_error_loc(loc, index, a.len)
n := a.len-1
if index != n {
a.data[index] = a.data[n]
}
a.len -= 1
a.len -= 1
}
clear :: proc "contextless" (a: ^$A/Small_Array($N, $T)) {

View File

@@ -61,7 +61,7 @@ add_dependency :: proc(sorter: ^$S/Sorter($K), key, dependency: K) -> bool {
}
find.dependents[key] = true
find = &sorter.relations[key]
find = &sorter.relations[key]
if find == nil {
find = map_insert(&sorter.relations, key, make_relations(sorter))
}

28
core/crypto/_aes/aes.odin Normal file
View File

@@ -0,0 +1,28 @@
package _aes
// KEY_SIZE_128 is the AES-128 key size in bytes.
KEY_SIZE_128 :: 16
// KEY_SIZE_192 is the AES-192 key size in bytes.
KEY_SIZE_192 :: 24
// KEY_SIZE_256 is the AES-256 key size in bytes.
KEY_SIZE_256 :: 32
// BLOCK_SIZE is the AES block size in bytes.
BLOCK_SIZE :: 16
// ROUNDS_128 is the number of rounds for AES-128.
ROUNDS_128 :: 10
// ROUNDS_192 is the number of rounds for AES-192.
ROUNDS_192 :: 12
// ROUNDS_256 is the number of rounds for AES-256.
ROUNDS_256 :: 14
// GHASH_KEY_SIZE is the GHASH key size in bytes.
GHASH_KEY_SIZE :: 16
// GHASH_BLOCK_SIZE is the GHASH block size in bytes.
GHASH_BLOCK_SIZE :: 16
// GHASH_TAG_SIZE is the GHASH tag size in bytes.
GHASH_TAG_SIZE :: 16
// RCON is the AES keyschedule round constants.
RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}

View File

@@ -0,0 +1,86 @@
package aes_ct64
import "base:intrinsics"
import "core:mem"
STRIDE :: 4
// Context is a keyed AES (ECB) instance.
Context :: struct {
_sk_exp: [120]u64,
_num_rounds: int,
}
// init initializes a context for AES with the provided key.
init :: proc(ctx: ^Context, key: []byte) {
skey: [30]u64 = ---
ctx._num_rounds = keysched(skey[:], key)
skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds)
}
// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`.
encrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
q: [8]u64
load_blockx1(&q, src)
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blockx1(dst, &q)
}
// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`.
decrypt_block :: proc(ctx: ^Context, dst, src: []byte) {
q: [8]u64
load_blockx1(&q, src)
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blockx1(dst, &q)
}
// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`.
encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
q: [8]u64 = ---
src, dst := src, dst
n := len(src)
for n > 4 {
load_blocks(&q, src[0:4])
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst[0:4], &q)
src = src[4:]
dst = dst[4:]
n -= 4
}
if n > 0 {
load_blocks(&q, src)
_encrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst, &q)
}
}
// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`.
decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) {
q: [8]u64 = ---
src, dst := src, dst
n := len(src)
for n > 4 {
load_blocks(&q, src[0:4])
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst[0:4], &q)
src = src[4:]
dst = dst[4:]
n -= 4
}
if n > 0 {
load_blocks(&q, src)
_decrypt(&q, ctx._sk_exp[:], ctx._num_rounds)
store_blocks(dst, &q)
}
}
// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
mem.zero_explicit(ctx, size_of(ctx))
}

View File

@@ -0,0 +1,265 @@
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes_ct64
import "base:intrinsics"
// Bitsliced AES for 64-bit general purpose (integer) registers. Each
// invocation will process up to 4 blocks at a time. This implementation
// is derived from the BearSSL ct64 code, and distributed under a 1-clause
// BSD license with permission from the original author.
//
// WARNING: "hic sunt dracones"
//
// This package also deliberately exposes enough internals to be able to
// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to
// allow the implementation of non-AES primitives that use the AES round
// function such as AEGIS and Deoxys-II. This should ONLY be done when
// implementing something other than AES itself.
sub_bytes :: proc "contextless" (q: ^[8]u64) {
// This S-box implementation is a straightforward translation of
// the circuit described by Boyar and Peralta in "A new
// combinational logic minimization technique with applications
// to cryptology" (https://eprint.iacr.org/2009/191.pdf).
//
// Note that variables x* (input) and s* (output) are numbered
// in "reverse" order (x0 is the high bit, x7 is the low bit).
x0 := q[7]
x1 := q[6]
x2 := q[5]
x3 := q[4]
x4 := q[3]
x5 := q[2]
x6 := q[1]
x7 := q[0]
// Top linear transformation.
y14 := x3 ~ x5
y13 := x0 ~ x6
y9 := x0 ~ x3
y8 := x0 ~ x5
t0 := x1 ~ x2
y1 := t0 ~ x7
y4 := y1 ~ x3
y12 := y13 ~ y14
y2 := y1 ~ x0
y5 := y1 ~ x6
y3 := y5 ~ y8
t1 := x4 ~ y12
y15 := t1 ~ x5
y20 := t1 ~ x1
y6 := y15 ~ x7
y10 := y15 ~ t0
y11 := y20 ~ y9
y7 := x7 ~ y11
y17 := y10 ~ y11
y19 := y10 ~ y8
y16 := t0 ~ y11
y21 := y13 ~ y16
y18 := x0 ~ y16
// Non-linear section.
t2 := y12 & y15
t3 := y3 & y6
t4 := t3 ~ t2
t5 := y4 & x7
t6 := t5 ~ t2
t7 := y13 & y16
t8 := y5 & y1
t9 := t8 ~ t7
t10 := y2 & y7
t11 := t10 ~ t7
t12 := y9 & y11
t13 := y14 & y17
t14 := t13 ~ t12
t15 := y8 & y10
t16 := t15 ~ t12
t17 := t4 ~ t14
t18 := t6 ~ t16
t19 := t9 ~ t14
t20 := t11 ~ t16
t21 := t17 ~ y20
t22 := t18 ~ y19
t23 := t19 ~ y21
t24 := t20 ~ y18
t25 := t21 ~ t22
t26 := t21 & t23
t27 := t24 ~ t26
t28 := t25 & t27
t29 := t28 ~ t22
t30 := t23 ~ t24
t31 := t22 ~ t26
t32 := t31 & t30
t33 := t32 ~ t24
t34 := t23 ~ t33
t35 := t27 ~ t33
t36 := t24 & t35
t37 := t36 ~ t34
t38 := t27 ~ t36
t39 := t29 & t38
t40 := t25 ~ t39
t41 := t40 ~ t37
t42 := t29 ~ t33
t43 := t29 ~ t40
t44 := t33 ~ t37
t45 := t42 ~ t41
z0 := t44 & y15
z1 := t37 & y6
z2 := t33 & x7
z3 := t43 & y16
z4 := t40 & y1
z5 := t29 & y7
z6 := t42 & y11
z7 := t45 & y17
z8 := t41 & y10
z9 := t44 & y12
z10 := t37 & y3
z11 := t33 & y4
z12 := t43 & y13
z13 := t40 & y5
z14 := t29 & y2
z15 := t42 & y9
z16 := t45 & y14
z17 := t41 & y8
// Bottom linear transformation.
t46 := z15 ~ z16
t47 := z10 ~ z11
t48 := z5 ~ z13
t49 := z9 ~ z10
t50 := z2 ~ z12
t51 := z2 ~ z5
t52 := z7 ~ z8
t53 := z0 ~ z3
t54 := z6 ~ z7
t55 := z16 ~ z17
t56 := z12 ~ t48
t57 := t50 ~ t53
t58 := z4 ~ t46
t59 := z3 ~ t54
t60 := t46 ~ t57
t61 := z14 ~ t57
t62 := t52 ~ t58
t63 := t49 ~ t58
t64 := z4 ~ t59
t65 := t61 ~ t62
t66 := z1 ~ t63
s0 := t59 ~ t63
s6 := t56 ~ ~t62
s7 := t48 ~ ~t60
t67 := t64 ~ t65
s3 := t53 ~ t66
s4 := t51 ~ t66
s5 := t47 ~ t65
s1 := t64 ~ ~s3
s2 := t55 ~ ~t67
q[7] = s0
q[6] = s1
q[5] = s2
q[4] = s3
q[3] = s4
q[2] = s5
q[1] = s6
q[0] = s7
}
orthogonalize :: proc "contextless" (q: ^[8]u64) {
CL2 :: 0x5555555555555555
CH2 :: 0xAAAAAAAAAAAAAAAA
q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2)
q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2)
q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2)
q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2)
CL4 :: 0x3333333333333333
CH4 :: 0xCCCCCCCCCCCCCCCC
q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4)
q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4)
q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4)
q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4)
CL8 :: 0x0F0F0F0F0F0F0F0F
CH8 :: 0xF0F0F0F0F0F0F0F0
q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8)
q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8)
q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8)
q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8)
}
@(require_results)
interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
if len(w) < 4 {
intrinsics.trap()
}
x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
x0 |= (x0 << 16)
x1 |= (x1 << 16)
x2 |= (x2 << 16)
x3 |= (x3 << 16)
x0 &= 0x0000FFFF0000FFFF
x1 &= 0x0000FFFF0000FFFF
x2 &= 0x0000FFFF0000FFFF
x3 &= 0x0000FFFF0000FFFF
x0 |= (x0 << 8)
x1 |= (x1 << 8)
x2 |= (x2 << 8)
x3 |= (x3 << 8)
x0 &= 0x00FF00FF00FF00FF
x1 &= 0x00FF00FF00FF00FF
x2 &= 0x00FF00FF00FF00FF
x3 &= 0x00FF00FF00FF00FF
q0 = x0 | (x2 << 8)
q1 = x1 | (x3 << 8)
return
}
@(require_results)
interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) {
x0 := q0 & 0x00FF00FF00FF00FF
x1 := q1 & 0x00FF00FF00FF00FF
x2 := (q0 >> 8) & 0x00FF00FF00FF00FF
x3 := (q1 >> 8) & 0x00FF00FF00FF00FF
x0 |= (x0 >> 8)
x1 |= (x1 >> 8)
x2 |= (x2 >> 8)
x3 |= (x3 >> 8)
x0 &= 0x0000FFFF0000FFFF
x1 &= 0x0000FFFF0000FFFF
x2 &= 0x0000FFFF0000FFFF
x3 &= 0x0000FFFF0000FFFF
w0 = u32(x0) | u32(x0 >> 16)
w1 = u32(x1) | u32(x1 >> 16)
w2 = u32(x2) | u32(x2 >> 16)
w3 = u32(x3) | u32(x3 >> 16)
return
}
@(private)
rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 {
return (x << 32) | (x >> 32)
}

View File

@@ -0,0 +1,135 @@
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes_ct64
import "base:intrinsics"
inv_sub_bytes :: proc "contextless" (q: ^[8]u64) {
// AES S-box is:
// S(x) = A(I(x)) ^ 0x63
// where I() is inversion in GF(256), and A() is a linear
// transform (0 is formally defined to be its own inverse).
// Since inversion is an involution, the inverse S-box can be
// computed from the S-box as:
// iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
// where B() is the inverse of A(). Indeed, for any y in GF(256):
// iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
//
// Note: we reuse the implementation of the forward S-box,
// instead of duplicating it here, so that total code size is
// lower. By merging the B() transforms into the S-box circuit
// we could make faster CBC decryption, but CBC decryption is
// already quite faster than CBC encryption because we can
// process four blocks in parallel.
q0 := ~q[0]
q1 := ~q[1]
q2 := q[2]
q3 := q[3]
q4 := q[4]
q5 := ~q[5]
q6 := ~q[6]
q7 := q[7]
q[7] = q1 ~ q4 ~ q6
q[6] = q0 ~ q3 ~ q5
q[5] = q7 ~ q2 ~ q4
q[4] = q6 ~ q1 ~ q3
q[3] = q5 ~ q0 ~ q2
q[2] = q4 ~ q7 ~ q1
q[1] = q3 ~ q6 ~ q0
q[0] = q2 ~ q5 ~ q7
sub_bytes(q)
q0 = ~q[0]
q1 = ~q[1]
q2 = q[2]
q3 = q[3]
q4 = q[4]
q5 = ~q[5]
q6 = ~q[6]
q7 = q[7]
q[7] = q1 ~ q4 ~ q6
q[6] = q0 ~ q3 ~ q5
q[5] = q7 ~ q2 ~ q4
q[4] = q6 ~ q1 ~ q3
q[3] = q5 ~ q0 ~ q2
q[2] = q4 ~ q7 ~ q1
q[1] = q3 ~ q6 ~ q0
q[0] = q2 ~ q5 ~ q7
}
inv_shift_rows :: proc "contextless" (q: ^[8]u64) {
for x, i in q {
q[i] =
(x & 0x000000000000FFFF) |
((x & 0x000000000FFF0000) << 4) |
((x & 0x00000000F0000000) >> 12) |
((x & 0x000000FF00000000) << 8) |
((x & 0x0000FF0000000000) >> 8) |
((x & 0x000F000000000000) << 12) |
((x & 0xFFF0000000000000) >> 4)
}
}
inv_mix_columns :: proc "contextless" (q: ^[8]u64) {
q0 := q[0]
q1 := q[1]
q2 := q[2]
q3 := q[3]
q4 := q[4]
q5 := q[5]
q6 := q[6]
q7 := q[7]
r0 := (q0 >> 16) | (q0 << 48)
r1 := (q1 >> 16) | (q1 << 48)
r2 := (q2 >> 16) | (q2 << 48)
r3 := (q3 >> 16) | (q3 << 48)
r4 := (q4 >> 16) | (q4 << 48)
r5 := (q5 >> 16) | (q5 << 48)
r6 := (q6 >> 16) | (q6 << 48)
r7 := (q7 >> 16) | (q7 << 48)
q[0] = q5 ~ q6 ~ q7 ~ r0 ~ r5 ~ r7 ~ rotr32(q0 ~ q5 ~ q6 ~ r0 ~ r5)
q[1] = q0 ~ q5 ~ r0 ~ r1 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q5 ~ q7 ~ r1 ~ r5 ~ r6)
q[2] = q0 ~ q1 ~ q6 ~ r1 ~ r2 ~ r6 ~ r7 ~ rotr32(q0 ~ q2 ~ q6 ~ r2 ~ r6 ~ r7)
q[3] = q0 ~ q1 ~ q2 ~ q5 ~ q6 ~ r0 ~ r2 ~ r3 ~ r5 ~ rotr32(q0 ~ q1 ~ q3 ~ q5 ~ q6 ~ q7 ~ r0 ~ r3 ~ r5 ~ r7)
q[4] = q1 ~ q2 ~ q3 ~ q5 ~ r1 ~ r3 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q2 ~ q4 ~ q5 ~ q7 ~ r1 ~ r4 ~ r5 ~ r6)
q[5] = q2 ~ q3 ~ q4 ~ q6 ~ r2 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q2 ~ q3 ~ q5 ~ q6 ~ r2 ~ r5 ~ r6 ~ r7)
q[6] = q3 ~ q4 ~ q5 ~ q7 ~ r3 ~ r5 ~ r6 ~ r7 ~ rotr32(q3 ~ q4 ~ q6 ~ q7 ~ r3 ~ r6 ~ r7)
q[7] = q4 ~ q5 ~ q6 ~ r4 ~ r6 ~ r7 ~ rotr32(q4 ~ q5 ~ q7 ~ r4 ~ r7)
}
@(private)
_decrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
add_round_key(q, skey[num_rounds << 3:])
for u := num_rounds - 1; u > 0; u -= 1 {
inv_shift_rows(q)
inv_sub_bytes(q)
add_round_key(q, skey[u << 3:])
inv_mix_columns(q)
}
inv_shift_rows(q)
inv_sub_bytes(q)
add_round_key(q, skey)
}

View File

@@ -0,0 +1,95 @@
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes_ct64
import "base:intrinsics"
add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
if len(sk) < 8 {
intrinsics.trap()
}
q[0] ~= sk[0]
q[1] ~= sk[1]
q[2] ~= sk[2]
q[3] ~= sk[3]
q[4] ~= sk[4]
q[5] ~= sk[5]
q[6] ~= sk[6]
q[7] ~= sk[7]
}
shift_rows :: proc "contextless" (q: ^[8]u64) {
for x, i in q {
q[i] =
(x & 0x000000000000FFFF) |
((x & 0x00000000FFF00000) >> 4) |
((x & 0x00000000000F0000) << 12) |
((x & 0x0000FF0000000000) >> 8) |
((x & 0x000000FF00000000) << 8) |
((x & 0xF000000000000000) >> 12) |
((x & 0x0FFF000000000000) << 4)
}
}
mix_columns :: proc "contextless" (q: ^[8]u64) {
q0 := q[0]
q1 := q[1]
q2 := q[2]
q3 := q[3]
q4 := q[4]
q5 := q[5]
q6 := q[6]
q7 := q[7]
r0 := (q0 >> 16) | (q0 << 48)
r1 := (q1 >> 16) | (q1 << 48)
r2 := (q2 >> 16) | (q2 << 48)
r3 := (q3 >> 16) | (q3 << 48)
r4 := (q4 >> 16) | (q4 << 48)
r5 := (q5 >> 16) | (q5 << 48)
r6 := (q6 >> 16) | (q6 << 48)
r7 := (q7 >> 16) | (q7 << 48)
q[0] = q7 ~ r7 ~ r0 ~ rotr32(q0 ~ r0)
q[1] = q0 ~ r0 ~ q7 ~ r7 ~ r1 ~ rotr32(q1 ~ r1)
q[2] = q1 ~ r1 ~ r2 ~ rotr32(q2 ~ r2)
q[3] = q2 ~ r2 ~ q7 ~ r7 ~ r3 ~ rotr32(q3 ~ r3)
q[4] = q3 ~ r3 ~ q7 ~ r7 ~ r4 ~ rotr32(q4 ~ r4)
q[5] = q4 ~ r4 ~ r5 ~ rotr32(q5 ~ r5)
q[6] = q5 ~ r5 ~ r6 ~ rotr32(q6 ~ r6)
q[7] = q6 ~ r6 ~ r7 ~ rotr32(q7 ~ r7)
}
@(private)
_encrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) {
add_round_key(q, skey)
for u in 1 ..< num_rounds {
sub_bytes(q)
shift_rows(q)
mix_columns(q)
add_round_key(q, skey[u << 3:])
}
sub_bytes(q)
shift_rows(q)
add_round_key(q, skey[num_rounds << 3:])
}

View File

@@ -0,0 +1,179 @@
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes_ct64
import "base:intrinsics"
import "core:crypto/_aes"
import "core:encoding/endian"
import "core:mem"
@(private, require_results)
sub_word :: proc "contextless" (x: u32) -> u32 {
q := [8]u64{u64(x), 0, 0, 0, 0, 0, 0, 0}
orthogonalize(&q)
sub_bytes(&q)
orthogonalize(&q)
ret := u32(q[0])
mem.zero_explicit(&q[0], size_of(u64))
return ret
}
@(private, require_results)
keysched :: proc(comp_skey: []u64, key: []byte) -> int {
num_rounds, key_len := 0, len(key)
switch key_len {
case _aes.KEY_SIZE_128:
num_rounds = _aes.ROUNDS_128
case _aes.KEY_SIZE_192:
num_rounds = _aes.ROUNDS_192
case _aes.KEY_SIZE_256:
num_rounds = _aes.ROUNDS_256
case:
panic("crypto/aes: invalid AES key size")
}
skey: [60]u32 = ---
nk, nkf := key_len >> 2, (num_rounds + 1) << 2
for i in 0 ..< nk {
skey[i] = endian.unchecked_get_u32le(key[i << 2:])
}
tmp := skey[(key_len >> 2) - 1]
for i, j, k := nk, 0, 0; i < nkf; i += 1 {
if j == 0 {
tmp = (tmp << 24) | (tmp >> 8)
tmp = sub_word(tmp) ~ u32(_aes.RCON[k])
} else if nk > 6 && j == 4 {
tmp = sub_word(tmp)
}
tmp ~= skey[i - nk]
skey[i] = tmp
if j += 1; j == nk {
j = 0
k += 1
}
}
q: [8]u64 = ---
for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
q[0], q[4] = interleave_in(skey[i:])
q[1] = q[0]
q[2] = q[0]
q[3] = q[0]
q[5] = q[4]
q[6] = q[4]
q[7] = q[4]
orthogonalize(&q)
comp_skey[j + 0] =
(q[0] & 0x1111111111111111) |
(q[1] & 0x2222222222222222) |
(q[2] & 0x4444444444444444) |
(q[3] & 0x8888888888888888)
comp_skey[j + 1] =
(q[4] & 0x1111111111111111) |
(q[5] & 0x2222222222222222) |
(q[6] & 0x4444444444444444) |
(q[7] & 0x8888888888888888)
}
mem.zero_explicit(&skey, size_of(skey))
mem.zero_explicit(&q, size_of(q))
return num_rounds
}
@(private)
skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
n := (num_rounds + 1) << 1
for u, v := 0, 0; u < n; u, v = u + 1, v + 4 {
x0 := comp_skey[u]
x1, x2, x3 := x0, x0, x0
x0 &= 0x1111111111111111
x1 &= 0x2222222222222222
x2 &= 0x4444444444444444
x3 &= 0x8888888888888888
x1 >>= 1
x2 >>= 2
x3 >>= 3
skey[v + 0] = (x0 << 4) - x0
skey[v + 1] = (x1 << 4) - x1
skey[v + 2] = (x2 << 4) - x2
skey[v + 3] = (x3 << 4) - x3
}
}
orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
if len(qq) < 8 || len(key) != 16 {
intrinsics.trap()
}
skey: [4]u32 = ---
skey[0] = endian.unchecked_get_u32le(key[0:])
skey[1] = endian.unchecked_get_u32le(key[4:])
skey[2] = endian.unchecked_get_u32le(key[8:])
skey[3] = endian.unchecked_get_u32le(key[12:])
q: [8]u64 = ---
q[0], q[4] = interleave_in(skey[:])
q[1] = q[0]
q[2] = q[0]
q[3] = q[0]
q[5] = q[4]
q[6] = q[4]
q[7] = q[4]
orthogonalize(&q)
comp_skey: [2]u64 = ---
comp_skey[0] =
(q[0] & 0x1111111111111111) |
(q[1] & 0x2222222222222222) |
(q[2] & 0x4444444444444444) |
(q[3] & 0x8888888888888888)
comp_skey[1] =
(q[4] & 0x1111111111111111) |
(q[5] & 0x2222222222222222) |
(q[6] & 0x4444444444444444) |
(q[7] & 0x8888888888888888)
for x, u in comp_skey {
x0 := x
x1, x2, x3 := x0, x0, x0
x0 &= 0x1111111111111111
x1 &= 0x2222222222222222
x2 &= 0x4444444444444444
x3 &= 0x8888888888888888
x1 >>= 1
x2 >>= 2
x3 >>= 3
qq[u * 4 + 0] = (x0 << 4) - x0
qq[u * 4 + 1] = (x1 << 4) - x1
qq[u * 4 + 2] = (x2 << 4) - x2
qq[u * 4 + 3] = (x3 << 4) - x3
}
mem.zero_explicit(&skey, size_of(skey))
mem.zero_explicit(&q, size_of(q))
mem.zero_explicit(&comp_skey, size_of(comp_skey))
}

View File

@@ -0,0 +1,136 @@
// Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package aes_ct64
import "base:intrinsics"
import "core:crypto/_aes"
import "core:encoding/endian"
@(private = "file")
bmul64 :: proc "contextless" (x, y: u64) -> u64 {
x0 := x & 0x1111111111111111
x1 := x & 0x2222222222222222
x2 := x & 0x4444444444444444
x3 := x & 0x8888888888888888
y0 := y & 0x1111111111111111
y1 := y & 0x2222222222222222
y2 := y & 0x4444444444444444
y3 := y & 0x8888888888888888
z0 := (x0 * y0) ~ (x1 * y3) ~ (x2 * y2) ~ (x3 * y1)
z1 := (x0 * y1) ~ (x1 * y0) ~ (x2 * y3) ~ (x3 * y2)
z2 := (x0 * y2) ~ (x1 * y1) ~ (x2 * y0) ~ (x3 * y3)
z3 := (x0 * y3) ~ (x1 * y2) ~ (x2 * y1) ~ (x3 * y0)
z0 &= 0x1111111111111111
z1 &= 0x2222222222222222
z2 &= 0x4444444444444444
z3 &= 0x8888888888888888
return z0 | z1 | z2 | z3
}
@(private = "file")
rev64 :: proc "contextless" (x: u64) -> u64 {
x := x
x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555)
x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333)
x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F)
x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF)
x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF)
return (x << 32) | (x >> 32)
}
// ghash calculates the GHASH of data, with the key `key`, and input `dst`
// and `data`, and stores the resulting digest in `dst`.
//
// Note: `dst` is both an input and an output, to support easy implementation
// of GCM.
ghash :: proc "contextless" (dst, key, data: []byte) {
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
intrinsics.trap()
}
buf := data
l := len(buf)
y1 := endian.unchecked_get_u64be(dst[0:])
y0 := endian.unchecked_get_u64be(dst[8:])
h1 := endian.unchecked_get_u64be(key[0:])
h0 := endian.unchecked_get_u64be(key[8:])
h0r := rev64(h0)
h1r := rev64(h1)
h2 := h0 ~ h1
h2r := h0r ~ h1r
src: []byte
for l > 0 {
if l >= _aes.GHASH_BLOCK_SIZE {
src = buf
buf = buf[_aes.GHASH_BLOCK_SIZE:]
l -= _aes.GHASH_BLOCK_SIZE
} else {
tmp: [_aes.GHASH_BLOCK_SIZE]byte
copy(tmp[:], buf)
src = tmp[:]
l = 0
}
y1 ~= endian.unchecked_get_u64be(src)
y0 ~= endian.unchecked_get_u64be(src[8:])
y0r := rev64(y0)
y1r := rev64(y1)
y2 := y0 ~ y1
y2r := y0r ~ y1r
z0 := bmul64(y0, h0)
z1 := bmul64(y1, h1)
z2 := bmul64(y2, h2)
z0h := bmul64(y0r, h0r)
z1h := bmul64(y1r, h1r)
z2h := bmul64(y2r, h2r)
z2 ~= z0 ~ z1
z2h ~= z0h ~ z1h
z0h = rev64(z0h) >> 1
z1h = rev64(z1h) >> 1
z2h = rev64(z2h) >> 1
v0 := z0
v1 := z0h ~ z2
v2 := z1 ~ z2h
v3 := z1h
v3 = (v3 << 1) | (v2 >> 63)
v2 = (v2 << 1) | (v1 >> 63)
v1 = (v1 << 1) | (v0 >> 63)
v0 = (v0 << 1)
v2 ~= v0 ~ (v0 >> 1) ~ (v0 >> 2) ~ (v0 >> 7)
v1 ~= (v0 << 63) ~ (v0 << 62) ~ (v0 << 57)
v3 ~= v1 ~ (v1 >> 1) ~ (v1 >> 2) ~ (v1 >> 7)
v2 ~= (v1 << 63) ~ (v1 << 62) ~ (v1 << 57)
y0 = v2
y1 = v3
}
endian.unchecked_put_u64be(dst[0:], y1)
endian.unchecked_put_u64be(dst[8:], y0)
}

View File

@@ -0,0 +1,75 @@
package aes_ct64
import "base:intrinsics"
import "core:crypto/_aes"
import "core:encoding/endian"
load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
if len(src) != _aes.BLOCK_SIZE {
intrinsics.trap()
}
w: [4]u32 = ---
w[0] = endian.unchecked_get_u32le(src[0:])
w[1] = endian.unchecked_get_u32le(src[4:])
w[2] = endian.unchecked_get_u32le(src[8:])
w[3] = endian.unchecked_get_u32le(src[12:])
q[0], q[4] = interleave_in(w[:])
orthogonalize(q)
}
store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
if len(dst) != _aes.BLOCK_SIZE {
intrinsics.trap()
}
orthogonalize(q)
w0, w1, w2, w3 := interleave_out(q[0], q[4])
endian.unchecked_put_u32le(dst[0:], w0)
endian.unchecked_put_u32le(dst[4:], w1)
endian.unchecked_put_u32le(dst[8:], w2)
endian.unchecked_put_u32le(dst[12:], w3)
}
load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
if n := len(src); n > STRIDE || n == 0 {
intrinsics.trap()
}
w: [4]u32 = ---
for s, i in src {
if len(s) != _aes.BLOCK_SIZE {
intrinsics.trap()
}
w[0] = endian.unchecked_get_u32le(s[0:])
w[1] = endian.unchecked_get_u32le(s[4:])
w[2] = endian.unchecked_get_u32le(s[8:])
w[3] = endian.unchecked_get_u32le(s[12:])
q[i], q[i + 4] = interleave_in(w[:])
}
orthogonalize(q)
}
store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
if n := len(dst); n > STRIDE || n == 0 {
intrinsics.trap()
}
orthogonalize(q)
for d, i in dst {
// Allow storing [0,4] blocks.
if d == nil {
break
}
if len(d) != _aes.BLOCK_SIZE {
intrinsics.trap()
}
w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
endian.unchecked_put_u32le(d[0:], w0)
endian.unchecked_put_u32le(d[4:], w1)
endian.unchecked_put_u32le(d[8:], w2)
endian.unchecked_put_u32le(d[12:], w3)
}
}

View File

@@ -0,0 +1,43 @@
//+build amd64
package aes_hw_intel
import "core:sys/info"
// is_supporte returns true iff hardware accelerated AES
// is supported.
is_supported :: proc "contextless" () -> bool {
features, ok := info.cpu_features.?
if !ok {
return false
}
// Note: Everything with AES-NI and PCLMULQDQ has support for
// the required SSE extxtensions.
req_features :: info.CPU_Features{
.sse2,
.ssse3,
.sse41,
.aes,
.pclmulqdq,
}
return features >= req_features
}
// Context is a keyed AES (ECB) instance.
Context :: struct {
// Note: The ideal thing to do is for the expanded round keys to be
// arrays of `__m128i`, however that implies alignment (or using AVX).
//
// All the people using e-waste processors that don't support an
// insturction set that has been around for over 10 years are why
// we can't have nice things.
_sk_exp_enc: [15][16]byte,
_sk_exp_dec: [15][16]byte,
_num_rounds: int,
}
// init initializes a context for AES with the provided key.
init :: proc(ctx: ^Context, key: []byte) {
keysched(ctx, key)
}

View File

@@ -0,0 +1,281 @@
// Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//+build amd64
package aes_hw_intel
import "base:intrinsics"
import "core:crypto/_aes"
import "core:simd"
import "core:simd/x86"
@(private = "file")
GHASH_STRIDE_HW :: 4
@(private = "file")
GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
// GHASH is defined over elements of GF(2^128) with "full little-endian"
// representation: leftmost byte is least significant, and, within each
// byte, leftmost _bit_ is least significant. The natural ordering in
// x86 is "mixed little-endian": bytes are ordered from least to most
// significant, but bits within a byte are in most-to-least significant
// order. Going to full little-endian representation would require
// reversing bits within each byte, which is doable but expensive.
//
// Instead, we go to full big-endian representation, by swapping bytes
// around, which is done with a single _mm_shuffle_epi8() opcode (it
// comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We
// can use a full big-endian representation because in a carryless
// multiplication, we have a nice bit reversal property:
//
// rev_128(x) * rev_128(y) = rev_255(x * y)
//
// So by using full big-endian, we still get the right result, except
// that it is right-shifted by 1 bit. The left-shift is relatively
// inexpensive, and it can be mutualised.
//
// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
// values with bit precision, we have to break down values into 64-bit
// chunks. We number chunks from 0 to 3 in left to right order.
@(private = "file")
byteswap_index := transmute(x86.__m128i)simd.i8x16{
// Note: simd.i8x16 is reverse order from x86._mm_set_epi8.
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
}
@(private = "file", require_results, enable_target_feature = "sse2,ssse3")
byteswap :: #force_inline proc "contextless" (x: x86.__m128i) -> x86.__m128i {
return x86._mm_shuffle_epi8(x, byteswap_index)
}
// From a 128-bit value kw, compute kx as the XOR of the two 64-bit
// halves of kw (into the right half of kx; left half is unspecified),
// and return kx.
@(private = "file", require_results, enable_target_feature = "sse2")
bk :: #force_inline proc "contextless" (kw: x86.__m128i) -> x86.__m128i {
return x86._mm_xor_si128(kw, x86._mm_shuffle_epi32(kw, 0x0e))
}
// Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and
// the XOR of the two values (kx), and return (kw, kx).
@(private = "file", enable_target_feature = "sse2")
pbk :: #force_inline proc "contextless" (k0, k1: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
kw := x86._mm_unpacklo_epi64(k1, k0)
kx := x86._mm_xor_si128(k0, k1)
return kw, kx
}
// Left-shift by 1 bit a 256-bit value (in four 64-bit words).
@(private = "file", require_results, enable_target_feature = "sse2")
sl_256 :: #force_inline proc "contextless" (x0, x1, x2, x3: x86.__m128i) -> (x86.__m128i, x86.__m128i, x86.__m128i, x86.__m128i) {
x0, x1, x2, x3 := x0, x1, x2, x3
x0 = x86._mm_or_si128(x86._mm_slli_epi64(x0, 1), x86._mm_srli_epi64(x1, 63))
x1 = x86._mm_or_si128(x86._mm_slli_epi64(x1, 1), x86._mm_srli_epi64(x2, 63))
x2 = x86._mm_or_si128(x86._mm_slli_epi64(x2, 1), x86._mm_srli_epi64(x3, 63))
x3 = x86._mm_slli_epi64(x3, 1)
return x0, x1, x2, x3
}
// Perform reduction in GF(2^128).
@(private = "file", require_results, enable_target_feature = "sse2")
reduce_f128 :: #force_inline proc "contextless" (x0, x1, x2, x3: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
x0, x1, x2 := x0, x1, x2
x1 = x86._mm_xor_si128(
x1,
x86._mm_xor_si128(
x86._mm_xor_si128(
x3,
x86._mm_srli_epi64(x3, 1)),
x86._mm_xor_si128(
x86._mm_srli_epi64(x3, 2),
x86._mm_srli_epi64(x3, 7))))
x2 = x86._mm_xor_si128(
x86._mm_xor_si128(
x2,
x86._mm_slli_epi64(x3, 63)),
x86._mm_xor_si128(
x86._mm_slli_epi64(x3, 62),
x86._mm_slli_epi64(x3, 57)))
x0 = x86._mm_xor_si128(
x0,
x86._mm_xor_si128(
x86._mm_xor_si128(
x2,
x86._mm_srli_epi64(x2, 1)),
x86._mm_xor_si128(
x86._mm_srli_epi64(x2, 2),
x86._mm_srli_epi64(x2, 7))))
x1 = x86._mm_xor_si128(
x86._mm_xor_si128(
x1,
x86._mm_slli_epi64(x2, 63)),
x86._mm_xor_si128(
x86._mm_slli_epi64(x2, 62),
x86._mm_slli_epi64(x2, 57)))
return x0, x1
}
// Square value kw in GF(2^128) into (dw,dx).
@(private = "file", require_results, enable_target_feature = "sse2,pclmul")
square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
z1 := x86._mm_clmulepi64_si128(kw, kw, 0x11)
z3 := x86._mm_clmulepi64_si128(kw, kw, 0x00)
z0 := x86._mm_shuffle_epi32(z1, 0x0E)
z2 := x86._mm_shuffle_epi32(z3, 0x0E)
z0, z1, z2, z3 = sl_256(z0, z1, z2, z3)
z0, z1 = reduce_f128(z0, z1, z2, z3)
return pbk(z0, z1)
}
// ghash calculates the GHASH of data, with the key `key`, and input `dst`
// and `data`, and stores the resulting digest in `dst`.
//
// Note: `dst` is both an input and an output, to support easy implementation
// of GCM.
@(enable_target_feature = "sse2,ssse3,pclmul")
ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
intrinsics.trap()
}
// Note: BearSSL opts to copy the remainder into a zero-filled
// 64-byte buffer. We do something slightly more simple.
// Load key and dst (h and y).
yw := intrinsics.unaligned_load((^x86.__m128i)(raw_data(dst)))
h1w := intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
yw = byteswap(yw)
h1w = byteswap(h1w)
h1x := bk(h1w)
// Process 4 blocks at a time
buf := data
l := len(buf)
if l >= GHASH_STRIDE_BYTES_HW {
// Compute h2 = h^2
h2w, h2x := square_f128(h1w)
// Compute h3 = h^3 = h*(h^2)
t1 := x86._mm_clmulepi64_si128(h1w, h2w, 0x11)
t3 := x86._mm_clmulepi64_si128(h1w, h2w, 0x00)
t2 := x86._mm_xor_si128(
x86._mm_clmulepi64_si128(h1x, h2x, 0x00),
x86._mm_xor_si128(t1, t3))
t0 := x86._mm_shuffle_epi32(t1, 0x0E)
t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
t0, t1 = reduce_f128(t0, t1, t2, t3)
h3w, h3x := pbk(t0, t1)
// Compute h4 = h^4 = (h^2)^2
h4w, h4x := square_f128(h2w)
for l >= GHASH_STRIDE_BYTES_HW {
aw0 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf)))
aw1 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[16:])))
aw2 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[32:])))
aw3 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(buf[48:])))
aw0 = byteswap(aw0)
aw1 = byteswap(aw1)
aw2 = byteswap(aw2)
aw3 = byteswap(aw3)
buf, l = buf[GHASH_STRIDE_BYTES_HW:], l - GHASH_STRIDE_BYTES_HW
aw0 = x86._mm_xor_si128(aw0, yw)
ax1 := bk(aw1)
ax2 := bk(aw2)
ax3 := bk(aw3)
ax0 := bk(aw0)
t1 = x86._mm_xor_si128(
x86._mm_xor_si128(
x86._mm_clmulepi64_si128(aw0, h4w, 0x11),
x86._mm_clmulepi64_si128(aw1, h3w, 0x11)),
x86._mm_xor_si128(
x86._mm_clmulepi64_si128(aw2, h2w, 0x11),
x86._mm_clmulepi64_si128(aw3, h1w, 0x11)))
t3 = x86._mm_xor_si128(
x86._mm_xor_si128(
x86._mm_clmulepi64_si128(aw0, h4w, 0x00),
x86._mm_clmulepi64_si128(aw1, h3w, 0x00)),
x86._mm_xor_si128(
x86._mm_clmulepi64_si128(aw2, h2w, 0x00),
x86._mm_clmulepi64_si128(aw3, h1w, 0x00)))
t2 = x86._mm_xor_si128(
x86._mm_xor_si128(
x86._mm_clmulepi64_si128(ax0, h4x, 0x00),
x86._mm_clmulepi64_si128(ax1, h3x, 0x00)),
x86._mm_xor_si128(
x86._mm_clmulepi64_si128(ax2, h2x, 0x00),
x86._mm_clmulepi64_si128(ax3, h1x, 0x00)))
t2 = x86._mm_xor_si128(t2, x86._mm_xor_si128(t1, t3))
t0 = x86._mm_shuffle_epi32(t1, 0x0E)
t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
t0, t1 = reduce_f128(t0, t1, t2, t3)
yw = x86._mm_unpacklo_epi64(t1, t0)
}
}
// Process 1 block at a time
src: []byte
for l > 0 {
if l >= _aes.GHASH_BLOCK_SIZE {
src = buf
buf = buf[_aes.GHASH_BLOCK_SIZE:]
l -= _aes.GHASH_BLOCK_SIZE
} else {
tmp: [_aes.GHASH_BLOCK_SIZE]byte
copy(tmp[:], buf)
src = tmp[:]
l = 0
}
aw := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
aw = byteswap(aw)
aw = x86._mm_xor_si128(aw, yw)
ax := bk(aw)
t1 := x86._mm_clmulepi64_si128(aw, h1w, 0x11)
t3 := x86._mm_clmulepi64_si128(aw, h1w, 0x00)
t2 := x86._mm_clmulepi64_si128(ax, h1x, 0x00)
t2 = x86._mm_xor_si128(t2, x86._mm_xor_si128(t1, t3))
t0 := x86._mm_shuffle_epi32(t1, 0x0E)
t1 = x86._mm_xor_si128(t1, x86._mm_shuffle_epi32(t2, 0x0E))
t2 = x86._mm_xor_si128(t2, x86._mm_shuffle_epi32(t3, 0x0E))
t0, t1, t2, t3 = sl_256(t0, t1, t2, t3)
t0, t1 = reduce_f128(t0, t1, t2, t3)
yw = x86._mm_unpacklo_epi64(t1, t0)
}
// Write back the hash (dst, aka y)
yw = byteswap(yw)
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), yw)
}

View File

@@ -0,0 +1,178 @@
// Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//+build amd64
package aes_hw_intel
import "base:intrinsics"
import "core:crypto/_aes"
import "core:mem"
import "core:simd/x86"
// Intel AES-NI based implementation. Inspiration taken from BearSSL.
//
// Note: This assumes that the SROA optimization pass is enabled to be
// anything resembling performat otherwise, LLVM will not elide a massive
// number of redundant loads/stores it generates for every intrinsic call.
@(private = "file", require_results, enable_target_feature = "sse2")
expand_step128 :: #force_inline proc(k1, k2: x86.__m128i) -> x86.__m128i {
k1, k2 := k1, k2
k2 = x86._mm_shuffle_epi32(k2, 0xff)
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
return x86._mm_xor_si128(k1, k2)
}
@(private = "file", require_results, enable_target_feature = "sse,sse2")
expand_step192a :: #force_inline proc (k1_, k2_: ^x86.__m128i, k3: x86.__m128i) -> (x86.__m128i, x86.__m128i) {
k1, k2, k3 := k1_^, k2_^, k3
k3 = x86._mm_shuffle_epi32(k3, 0x55)
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, k3)
tmp := k2
k2 = x86._mm_xor_si128(k2, x86._mm_slli_si128(k2, 0x04))
k2 = x86._mm_xor_si128(k2, x86._mm_shuffle_epi32(k1, 0xff))
k1_, k2_ := k1_, k2_
k1_^, k2_^ = k1, k2
r1 := transmute(x86.__m128i)(x86._mm_shuffle_ps(transmute(x86.__m128)(tmp), transmute(x86.__m128)(k1), 0x44))
r2 := transmute(x86.__m128i)(x86._mm_shuffle_ps(transmute(x86.__m128)(k1), transmute(x86.__m128)(k2), 0x4e))
return r1, r2
}
@(private = "file", require_results, enable_target_feature = "sse2")
expand_step192b :: #force_inline proc (k1_, k2_: ^x86.__m128i, k3: x86.__m128i) -> x86.__m128i {
k1, k2, k3 := k1_^, k2_^, k3
k3 = x86._mm_shuffle_epi32(k3, 0x55)
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, k3)
k2 = x86._mm_xor_si128(k2, x86._mm_slli_si128(k2, 0x04))
k2 = x86._mm_xor_si128(k2, x86._mm_shuffle_epi32(k1, 0xff))
k1_, k2_ := k1_, k2_
k1_^, k2_^ = k1, k2
return k1
}
@(private = "file", require_results, enable_target_feature = "sse2")
expand_step256b :: #force_inline proc(k1, k2: x86.__m128i) -> x86.__m128i {
k1, k2 := k1, k2
k2 = x86._mm_shuffle_epi32(k2, 0xaa)
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
k1 = x86._mm_xor_si128(k1, x86._mm_slli_si128(k1, 0x04))
return x86._mm_xor_si128(k1, k2)
}
@(private = "file", enable_target_feature = "aes")
derive_dec_keys :: proc(ctx: ^Context, sks: ^[15]x86.__m128i, num_rounds: int) {
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[0]), sks[num_rounds])
for i in 1 ..< num_rounds {
tmp := x86._mm_aesimc_si128(sks[i])
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[num_rounds - i]), tmp)
}
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_dec[num_rounds]), sks[0])
}
@(private, enable_target_feature = "sse,sse2,aes")
keysched :: proc(ctx: ^Context, key: []byte) {
sks: [15]x86.__m128i = ---
// Compute the encryption keys.
num_rounds, key_len := 0, len(key)
switch key_len {
case _aes.KEY_SIZE_128:
sks[0] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
sks[1] = expand_step128(sks[0], x86._mm_aeskeygenassist_si128(sks[0], 0x01))
sks[2] = expand_step128(sks[1], x86._mm_aeskeygenassist_si128(sks[1], 0x02))
sks[3] = expand_step128(sks[2], x86._mm_aeskeygenassist_si128(sks[2], 0x04))
sks[4] = expand_step128(sks[3], x86._mm_aeskeygenassist_si128(sks[3], 0x08))
sks[5] = expand_step128(sks[4], x86._mm_aeskeygenassist_si128(sks[4], 0x10))
sks[6] = expand_step128(sks[5], x86._mm_aeskeygenassist_si128(sks[5], 0x20))
sks[7] = expand_step128(sks[6], x86._mm_aeskeygenassist_si128(sks[6], 0x40))
sks[8] = expand_step128(sks[7], x86._mm_aeskeygenassist_si128(sks[7], 0x80))
sks[9] = expand_step128(sks[8], x86._mm_aeskeygenassist_si128(sks[8], 0x1b))
sks[10] = expand_step128(sks[9], x86._mm_aeskeygenassist_si128(sks[9], 0x36))
num_rounds = _aes.ROUNDS_128
case _aes.KEY_SIZE_192:
k0 := intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
k1 := x86.__m128i{
intrinsics.unaligned_load((^i64)(raw_data(key[16:]))),
0,
}
sks[0] = k0
sks[1], sks[2] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x01))
sks[3] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x02))
sks[4], sks[5] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x04))
sks[6] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x08))
sks[7], sks[8] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x10))
sks[9] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x20))
sks[10], sks[11] = expand_step192a(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x40))
sks[12] = expand_step192b(&k0, &k1, x86._mm_aeskeygenassist_si128(k1, 0x80))
num_rounds = _aes.ROUNDS_192
case _aes.KEY_SIZE_256:
sks[0] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key)))
sks[1] = intrinsics.unaligned_load((^x86.__m128i)(raw_data(key[16:])))
sks[2] = expand_step128(sks[0], x86._mm_aeskeygenassist_si128(sks[1], 0x01))
sks[3] = expand_step256b(sks[1], x86._mm_aeskeygenassist_si128(sks[2], 0x01))
sks[4] = expand_step128(sks[2], x86._mm_aeskeygenassist_si128(sks[3], 0x02))
sks[5] = expand_step256b(sks[3], x86._mm_aeskeygenassist_si128(sks[4], 0x02))
sks[6] = expand_step128(sks[4], x86._mm_aeskeygenassist_si128(sks[5], 0x04))
sks[7] = expand_step256b(sks[5], x86._mm_aeskeygenassist_si128(sks[6], 0x04))
sks[8] = expand_step128(sks[6], x86._mm_aeskeygenassist_si128(sks[7], 0x08))
sks[9] = expand_step256b(sks[7], x86._mm_aeskeygenassist_si128(sks[8], 0x08))
sks[10] = expand_step128(sks[8], x86._mm_aeskeygenassist_si128(sks[9], 0x10))
sks[11] = expand_step256b(sks[9], x86._mm_aeskeygenassist_si128(sks[10], 0x10))
sks[12] = expand_step128(sks[10], x86._mm_aeskeygenassist_si128(sks[11], 0x20))
sks[13] = expand_step256b(sks[11], x86._mm_aeskeygenassist_si128(sks[12], 0x20))
sks[14] = expand_step128(sks[12], x86._mm_aeskeygenassist_si128(sks[13], 0x40))
num_rounds = _aes.ROUNDS_256
case:
panic("crypto/aes: invalid AES key size")
}
for i in 0 ..= num_rounds {
intrinsics.unaligned_store((^x86.__m128i)(&ctx._sk_exp_enc[i]), sks[i])
}
// Compute the decryption keys. GCM and CTR do not need this, however
// ECB, CBC, OCB3, etc do.
derive_dec_keys(ctx, &sks, num_rounds)
ctx._num_rounds = num_rounds
mem.zero_explicit(&sks, size_of(sks))
}

View File

@@ -110,7 +110,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
if len(b) != 32 {
intrinsics.trap()
}
b_ := transmute(^[32]byte)(raw_data(b))
b_ := (^[32]byte)(raw_data(b))
// Do the work in a scratch element, so that ge is unchanged on
// failure.
@@ -169,7 +169,7 @@ ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
if len(dst) != 32 {
intrinsics.trap()
}
dst_ := transmute(^[32]byte)(raw_data(dst))
dst_ := (^[32]byte)(raw_data(dst))
// Convert the element to affine (x, y) representation.
x, y, z_inv: field.Tight_Field_Element = ---, ---, ---

View File

@@ -28,7 +28,7 @@ sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
if len(b) != 32 {
intrinsics.trap()
}
b_ := transmute(^[32]byte)(raw_data(b))
b_ := (^[32]byte)(raw_data(b))
return field.fe_from_bytes(sc, b_)
}
@@ -36,7 +36,7 @@ sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
if len(b) != 32 {
intrinsics.trap()
}
b_ := transmute(^[32]byte)(raw_data(b))
b_ := (^[32]byte)(raw_data(b))
field.fe_from_bytes_rfc8032(sc, b_)
}

View File

@@ -6,13 +6,13 @@ import "core:mem"
fe_relax_cast :: #force_inline proc "contextless" (
arg1: ^Tight_Field_Element,
) -> ^Loose_Field_Element {
return transmute(^Loose_Field_Element)(arg1)
return (^Loose_Field_Element)(arg1)
}
fe_tighten_cast :: #force_inline proc "contextless" (
arg1: ^Loose_Field_Element,
) -> ^Tight_Field_Element {
return transmute(^Tight_Field_Element)(arg1)
return (^Tight_Field_Element)(arg1)
}
fe_clear :: proc "contextless" (

View File

@@ -7,13 +7,13 @@ import "core:mem"
fe_relax_cast :: #force_inline proc "contextless" (
arg1: ^Tight_Field_Element,
) -> ^Loose_Field_Element {
return transmute(^Loose_Field_Element)(arg1)
return (^Loose_Field_Element)(arg1)
}
fe_tighten_cast :: #force_inline proc "contextless" (
arg1: ^Loose_Field_Element,
) -> ^Tight_Field_Element {
return transmute(^Tight_Field_Element)(arg1)
return (^Tight_Field_Element)(arg1)
}
fe_from_bytes :: #force_inline proc "contextless" (

21
core/crypto/aes/aes.odin Normal file
View File

@@ -0,0 +1,21 @@
/*
package aes implements the AES block cipher and some common modes.
See:
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
*/
package aes
import "core:crypto/_aes"
// KEY_SIZE_128 is the AES-128 key size in bytes.
KEY_SIZE_128 :: _aes.KEY_SIZE_128
// KEY_SIZE_192 is the AES-192 key size in bytes.
KEY_SIZE_192 :: _aes.KEY_SIZE_192
// KEY_SIZE_256 is the AES-256 key size in bytes.
KEY_SIZE_256 :: _aes.KEY_SIZE_256
// BLOCK_SIZE is the AES block size in bytes.
BLOCK_SIZE :: _aes.BLOCK_SIZE

View File

@@ -0,0 +1,201 @@
package aes
import "core:bytes"
import "core:crypto/_aes/ct64"
import "core:encoding/endian"
import "core:math/bits"
import "core:mem"
// CTR_IV_SIZE is the size of the CTR mode IV in bytes.
CTR_IV_SIZE :: 16
// Context_CTR is a keyed AES-CTR instance.
Context_CTR :: struct {
_impl: Context_Impl,
_buffer: [BLOCK_SIZE]byte,
_off: int,
_ctr_hi: u64,
_ctr_lo: u64,
_is_initialized: bool,
}
// init_ctr initializes a Context_CTR with the provided key and IV.
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
if len(iv) != CTR_IV_SIZE {
panic("crypto/aes: invalid CTR IV size")
}
init_impl(&ctx._impl, key, impl)
ctx._off = BLOCK_SIZE
ctx._ctr_hi = endian.unchecked_get_u64be(iv[0:])
ctx._ctr_lo = endian.unchecked_get_u64be(iv[8:])
ctx._is_initialized = true
}
// xor_bytes_ctr XORs each byte in src with bytes taken from the AES-CTR
// keystream, and writes the resulting output to dst. dst and src MUST
// alias exactly or not at all.
xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
assert(ctx._is_initialized)
src, dst := src, dst
if dst_len := len(dst); dst_len < len(src) {
src = src[:dst_len]
}
if bytes.alias_inexactly(dst, src) {
panic("crypto/aes: dst and src alias inexactly")
}
for remaining := len(src); remaining > 0; {
// Process multiple blocks at once
if ctx._off == BLOCK_SIZE {
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * BLOCK_SIZE
ctr_blocks(ctx, dst, src, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
}
dst = dst[direct_bytes:]
src = src[direct_bytes:]
}
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
ctr_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
}
// Process partial blocks from the buffered keystream.
to_xor := min(BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
for i := 0; i < to_xor; i = i + 1 {
dst[i] = buffered_keystream[i] ~ src[i]
}
ctx._off += to_xor
dst = dst[to_xor:]
src = src[to_xor:]
remaining -= to_xor
}
}
// keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
assert(ctx._is_initialized)
dst := dst
for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once
if ctx._off == BLOCK_SIZE {
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * BLOCK_SIZE
ctr_blocks(ctx, dst, nil, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
}
dst = dst[direct_bytes:]
}
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
ctr_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
}
// Process partial blocks from the buffered keystream.
to_copy := min(BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
copy(dst[:to_copy], buffered_keystream[:to_copy])
ctx._off += to_copy
dst = dst[to_copy:]
remaining -= to_copy
}
}
// reset_ctr sanitizes the Context_CTR. The Context_CTR must be
// re-initialized to be used again.
reset_ctr :: proc "contextless" (ctx: ^Context_CTR) {
reset_impl(&ctx._impl)
ctx._off = 0
ctx._ctr_hi = 0
ctx._ctr_lo = 0
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
ctx._is_initialized = false
}
@(private = "file")
ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) #no_bounds_check {
// Use the optimized hardware implementation if available.
if _, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
ctr_blocks_hw(ctx, dst, src, nr_blocks)
return
}
// Portable implementation.
ct64_inc_ctr := #force_inline proc "contextless" (dst: []byte, hi, lo: u64) -> (u64, u64) {
endian.unchecked_put_u64be(dst[0:], hi)
endian.unchecked_put_u64be(dst[8:], lo)
hi, lo := hi, lo
carry: u64
lo, carry = bits.add_u64(lo, 1, 0)
hi, _ = bits.add_u64(hi, 0, carry)
return hi, lo
}
impl := &ctx._impl.(ct64.Context)
src, dst := src, dst
nr_blocks := nr_blocks
ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
tmp: [ct64.STRIDE][BLOCK_SIZE]byte = ---
ctrs: [ct64.STRIDE][]byte = ---
for i in 0 ..< ct64.STRIDE {
ctrs[i] = tmp[i][:]
}
for nr_blocks > 0 {
n := min(ct64.STRIDE, nr_blocks)
blocks := ctrs[:n]
for i in 0 ..< n {
ctr_hi, ctr_lo = ct64_inc_ctr(blocks[i], ctr_hi, ctr_lo)
}
ct64.encrypt_blocks(impl, blocks, blocks)
xor_blocks(dst, src, blocks)
if src != nil {
src = src[n * BLOCK_SIZE:]
}
dst = dst[n * BLOCK_SIZE:]
nr_blocks -= n
}
// Write back the counter.
ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
mem.zero_explicit(&tmp, size_of(tmp))
}
@(private)
xor_blocks :: #force_inline proc "contextless" (dst, src: []byte, blocks: [][]byte) {
// Note: This would be faster `core:simd` was used, however if
// performance of this implementation matters to where that
// optimization would be worth it, use chacha20poly1305, or a
// CPU that isn't e-waste.
#no_bounds_check {
if src != nil {
for i in 0 ..< len(blocks) {
off := i * BLOCK_SIZE
for j in 0 ..< BLOCK_SIZE {
blocks[i][j] ~= src[off + j]
}
}
}
for i in 0 ..< len(blocks) {
copy(dst[i * BLOCK_SIZE:], blocks[i])
}
}
}

View File

@@ -0,0 +1,151 @@
//+build amd64
package aes
import "base:intrinsics"
import "core:crypto/_aes"
import "core:math/bits"
import "core:mem"
import "core:simd/x86"
@(private)
CTR_STRIDE_HW :: 4
@(private)
CTR_STRIDE_BYTES_HW :: CTR_STRIDE_HW * BLOCK_SIZE
@(private, enable_target_feature = "sse2,aes")
ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) #no_bounds_check {
hw_ctx := ctx._impl.(Context_Impl_Hardware)
sks: [15]x86.__m128i = ---
for i in 0 ..= hw_ctx._num_rounds {
sks[i] = intrinsics.unaligned_load((^x86.__m128i)(&hw_ctx._sk_exp_enc[i]))
}
hw_inc_ctr := #force_inline proc "contextless" (hi, lo: u64) -> (x86.__m128i, u64, u64) {
ret := x86.__m128i{
i64(intrinsics.byte_swap(hi)),
i64(intrinsics.byte_swap(lo)),
}
hi, lo := hi, lo
carry: u64
lo, carry = bits.add_u64(lo, 1, 0)
hi, _ = bits.add_u64(hi, 0, carry)
return ret, hi, lo
}
// The latency of AESENC depends on mfg and microarchitecture:
// - 7 -> up to Broadwell
// - 4 -> AMD and Skylake - Cascade Lake
// - 3 -> Ice Lake and newer
//
// This implementation does 4 blocks at once, since performance
// should be "adequate" across most CPUs.
src, dst := src, dst
nr_blocks := nr_blocks
ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
blks: [CTR_STRIDE_HW]x86.__m128i = ---
for nr_blocks >= CTR_STRIDE_HW {
#unroll for i in 0..< CTR_STRIDE_HW {
blks[i], ctr_hi, ctr_lo = hw_inc_ctr(ctr_hi, ctr_lo)
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_xor_si128(blks[i], sks[0])
}
#unroll for i in 1 ..= 9 {
#unroll for j in 0 ..< CTR_STRIDE_HW {
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
}
}
switch hw_ctx._num_rounds {
case _aes.ROUNDS_128:
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[10])
}
case _aes.ROUNDS_192:
#unroll for i in 10 ..= 11 {
#unroll for j in 0 ..< CTR_STRIDE_HW {
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
}
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[12])
}
case _aes.ROUNDS_256:
#unroll for i in 10 ..= 13 {
#unroll for j in 0 ..< CTR_STRIDE_HW {
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
}
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[14])
}
}
xor_blocks_hw(dst, src, blks[:])
if src != nil {
src = src[CTR_STRIDE_BYTES_HW:]
}
dst = dst[CTR_STRIDE_BYTES_HW:]
nr_blocks -= CTR_STRIDE_HW
}
// Handle the remainder.
for nr_blocks > 0 {
blks[0], ctr_hi, ctr_lo = hw_inc_ctr(ctr_hi, ctr_lo)
blks[0] = x86._mm_xor_si128(blks[0], sks[0])
#unroll for i in 1 ..= 9 {
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
}
switch hw_ctx._num_rounds {
case _aes.ROUNDS_128:
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[10])
case _aes.ROUNDS_192:
#unroll for i in 10 ..= 11 {
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
}
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[12])
case _aes.ROUNDS_256:
#unroll for i in 10 ..= 13 {
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
}
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[14])
}
xor_blocks_hw(dst, src, blks[:1])
if src != nil {
src = src[BLOCK_SIZE:]
}
dst = dst[BLOCK_SIZE:]
nr_blocks -= 1
}
// Write back the counter.
ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
mem.zero_explicit(&blks, size_of(blks))
mem.zero_explicit(&sks, size_of(sks))
}
@(private, enable_target_feature = "sse2")
xor_blocks_hw :: proc(dst, src: []byte, blocks: []x86.__m128i) {
#no_bounds_check {
if src != nil {
for i in 0 ..< len(blocks) {
off := i * BLOCK_SIZE
tmp := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src[off:])))
blocks[i] = x86._mm_xor_si128(blocks[i], tmp)
}
}
for i in 0 ..< len(blocks) {
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst[i * BLOCK_SIZE:])), blocks[i])
}
}
}

View File

@@ -0,0 +1,57 @@
package aes
import "core:crypto/_aes/ct64"
// Context_ECB is a keyed AES-ECB instance.
//
// WARNING: Using ECB mode is strongly discouraged unless it is being
// used to implement higher level constructs.
Context_ECB :: struct {
_impl: Context_Impl,
_is_initialized: bool,
}
// init_ecb initializes a Context_ECB with the provided key.
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
init_impl(&ctx._impl, key, impl)
ctx._is_initialized = true
}
// encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
assert(ctx._is_initialized)
if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
panic("crypto/aes: invalid buffer size(s)")
}
switch &impl in ctx._impl {
case ct64.Context:
ct64.encrypt_block(&impl, dst, src)
case Context_Impl_Hardware:
encrypt_block_hw(&impl, dst, src)
}
}
// decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
assert(ctx._is_initialized)
if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
panic("crypto/aes: invalid buffer size(s)")
}
switch &impl in ctx._impl {
case ct64.Context:
ct64.decrypt_block(&impl, dst, src)
case Context_Impl_Hardware:
decrypt_block_hw(&impl, dst, src)
}
}
// reset_ecb sanitizes the Context_ECB. The Context_ECB must be
// re-initialized to be used again.
reset_ecb :: proc "contextless" (ctx: ^Context_ECB) {
reset_impl(&ctx._impl)
ctx._is_initialized = false
}

View File

@@ -0,0 +1,58 @@
//+build amd64
package aes
import "base:intrinsics"
import "core:crypto/_aes"
import "core:simd/x86"
@(private, enable_target_feature = "sse2,aes")
encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[0])))
#unroll for i in 1 ..= 9 {
blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
}
switch ctx._num_rounds {
case _aes.ROUNDS_128:
blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[10])))
case _aes.ROUNDS_192:
#unroll for i in 10 ..= 11 {
blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
}
blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[12])))
case _aes.ROUNDS_256:
#unroll for i in 10 ..= 13 {
blk = x86._mm_aesenc_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i])))
}
blk = x86._mm_aesenclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[14])))
}
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
}
@(private, enable_target_feature = "sse2,aes")
decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
blk := intrinsics.unaligned_load((^x86.__m128i)(raw_data(src)))
blk = x86._mm_xor_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[0])))
#unroll for i in 1 ..= 9 {
blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
}
switch ctx._num_rounds {
case _aes.ROUNDS_128:
blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[10])))
case _aes.ROUNDS_192:
#unroll for i in 10 ..= 11 {
blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
}
blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[12])))
case _aes.ROUNDS_256:
#unroll for i in 10 ..= 13 {
blk = x86._mm_aesdec_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[i])))
}
blk = x86._mm_aesdeclast_si128(blk, intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_dec[14])))
}
intrinsics.unaligned_store((^x86.__m128i)(raw_data(dst)), blk)
}

View File

@@ -0,0 +1,269 @@
package aes
import "core:bytes"
import "core:crypto"
import "core:crypto/_aes"
import "core:crypto/_aes/ct64"
import "core:encoding/endian"
import "core:mem"
// GCM_NONCE_SIZE is the default size of the GCM nonce in bytes.
GCM_NONCE_SIZE :: 12
// GCM_NONCE_SIZE_MAX is the maximum size of the GCM nonce in bytes.
GCM_NONCE_SIZE_MAX :: 0x2000000000000000 // floor((2^64 - 1) / 8) bits
// GCM_TAG_SIZE is the size of a GCM tag in bytes.
GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
@(private)
GCM_A_MAX :: max(u64) / 8 // 2^64 - 1 bits -> bytes
@(private)
GCM_P_MAX :: 0xfffffffe0 // 2^39 - 256 bits -> bytes
// Context_GCM is a keyed AES-GCM instance.
Context_GCM :: struct {
_impl: Context_Impl,
_is_initialized: bool,
}
// init_gcm initializes a Context_GCM with the provided key.
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
init_impl(&ctx._impl, key, impl)
ctx._is_initialized = true
}
// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided Context_GCM and nonce, stores the output in dst and tag.
//
// dst and plaintext MUST alias exactly or not at all.
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
assert(ctx._is_initialized)
gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
if len(dst) != len(plaintext) {
panic("crypto/aes: invalid destination ciphertext size")
}
if bytes.alias_inexactly(dst, plaintext) {
panic("crypto/aes: dst and plaintext alias inexactly")
}
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
return
}
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
// Note: Our GHASH implementation handles appending padding.
ct64.ghash(s[:], h[:], aad)
gctr_ct64(ctx, dst, &s, plaintext, &h, &j0, true)
final_ghash_ct64(&s, &h, &j0_enc, len(aad), len(plaintext))
copy(tag, s[:])
mem.zero_explicit(&h, len(h))
mem.zero_explicit(&j0, len(j0))
mem.zero_explicit(&j0_enc, len(j0_enc))
}
// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
// returning true iff the authentication was successful. If authentication
// fails, the destination buffer will be zeroed.
//
// dst and plaintext MUST alias exactly or not at all.
open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
assert(ctx._is_initialized)
gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
if len(dst) != len(ciphertext) {
panic("crypto/aes: invalid destination plaintext size")
}
if bytes.alias_inexactly(dst, ciphertext) {
panic("crypto/aes: dst and ciphertext alias inexactly")
}
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
}
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_ct64(ctx, &h, &j0, &j0_enc, nonce)
ct64.ghash(s[:], h[:], aad)
gctr_ct64(ctx, dst, &s, ciphertext, &h, &j0, false)
final_ghash_ct64(&s, &h, &j0_enc, len(aad), len(ciphertext))
ok := crypto.compare_constant_time(s[:], tag) == 1
if !ok {
mem.zero_explicit(raw_data(dst), len(dst))
}
mem.zero_explicit(&h, len(h))
mem.zero_explicit(&j0, len(j0))
mem.zero_explicit(&j0_enc, len(j0_enc))
mem.zero_explicit(&s, len(s))
return ok
}
// reset_ctr sanitizes the Context_GCM. The Context_GCM must be
// re-initialized to be used again.
reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
reset_impl(&ctx._impl)
ctx._is_initialized = false
}
@(private = "file")
gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
if len(tag) != GCM_TAG_SIZE {
panic("crypto/aes: invalid GCM tag size")
}
// The specification supports nonces in the range [1, 2^64) bits.
if l := len(nonce); l == 0 || u64(l) >= GCM_NONCE_SIZE_MAX {
panic("crypto/aes: invalid GCM nonce size")
}
if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
panic("crypto/aes: oversized GCM aad")
}
if text_len := u64(len(text)); text_len > GCM_P_MAX {
panic("crypto/aes: oversized GCM src data")
}
}
@(private = "file")
init_ghash_ct64 :: proc(
ctx: ^Context_GCM,
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
nonce: []byte,
) {
impl := &ctx._impl.(ct64.Context)
// 1. Let H = CIPH(k, 0^128)
ct64.encrypt_block(impl, h[:], h[:])
// Define a block, J0, as follows:
if l := len(nonce); l == GCM_NONCE_SIZE {
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
copy(j0[:], nonce)
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
} else {
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
ct64.ghash(j0[:], h[:], nonce)
tmp: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
ct64.ghash(j0[:], h[:], tmp[:])
}
// ECB encrypt j0, so that we can just XOR with the tag. In theory
// this could be processed along with the final GCTR block, to
// potentially save a call to AES-ECB, but... just use AES-NI.
ct64.encrypt_block(impl, j0_enc[:], j0[:])
}
@(private = "file")
final_ghash_ct64 :: proc(
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
a_len: int,
t_len: int,
) {
blk: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
ct64.ghash(s[:], h[:], blk[:])
for i in 0 ..< len(s) {
s[i] ~= j0[i]
}
}
@(private = "file")
gctr_ct64 :: proc(
ctx: ^Context_GCM,
dst: []byte,
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
src: []byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
is_seal: bool,
) #no_bounds_check {
ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
endian.unchecked_put_u32be(dst[12:], ctr)
return ctr + 1
}
// Setup the counter blocks.
tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
ctrs, blks: [ct64.STRIDE][]byte = ---, ---
ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
for i in 0 ..< ct64.STRIDE {
// Setup scratch space for the keystream.
blks[i] = tmp2[i][:]
// Pre-copy the IV to all the counter blocks.
ctrs[i] = tmp[i][:]
copy(ctrs[i], nonce[:GCM_NONCE_SIZE])
}
impl := &ctx._impl.(ct64.Context)
src, dst := src, dst
nr_blocks := len(src) / BLOCK_SIZE
for nr_blocks > 0 {
n := min(ct64.STRIDE, nr_blocks)
l := n * BLOCK_SIZE
if !is_seal {
ct64.ghash(s[:], h[:], src[:l])
}
// The keystream is written to a separate buffer, as we will
// reuse the first 96-bits of each counter.
for i in 0 ..< n {
ctr = ct64_inc_ctr32(ctrs[i], ctr)
}
ct64.encrypt_blocks(impl, blks[:n], ctrs[:n])
xor_blocks(dst, src, blks[:n])
if is_seal {
ct64.ghash(s[:], h[:], dst[:l])
}
src = src[l:]
dst = dst[l:]
nr_blocks -= n
}
if l := len(src); l > 0 {
if !is_seal {
ct64.ghash(s[:], h[:], src[:l])
}
ct64_inc_ctr32(ctrs[0], ctr)
ct64.encrypt_block(impl, ctrs[0], ctrs[0])
for i in 0 ..< l {
dst[i] = src[i] ~ ctrs[0][i]
}
if is_seal {
ct64.ghash(s[:], h[:], dst[:l])
}
}
mem.zero_explicit(&tmp, size_of(tmp))
mem.zero_explicit(&tmp2, size_of(tmp2))
}

View File

@@ -0,0 +1,243 @@
//+build amd64
package aes
import "base:intrinsics"
import "core:crypto"
import "core:crypto/_aes"
import "core:crypto/_aes/hw_intel"
import "core:encoding/endian"
import "core:mem"
import "core:simd/x86"
@(private)
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
// Note: Our GHASH implementation handles appending padding.
hw_intel.ghash(s[:], h[:], aad)
gctr_hw(ctx, dst, &s, plaintext, &h, &j0, true)
final_ghash_hw(&s, &h, &j0_enc, len(aad), len(plaintext))
copy(tag, s[:])
mem.zero_explicit(&h, len(h))
mem.zero_explicit(&j0, len(j0))
mem.zero_explicit(&j0_enc, len(j0_enc))
}
@(private)
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
j0_enc: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_hw(ctx, &h, &j0, &j0_enc, nonce)
hw_intel.ghash(s[:], h[:], aad)
gctr_hw(ctx, dst, &s, ciphertext, &h, &j0, false)
final_ghash_hw(&s, &h, &j0_enc, len(aad), len(ciphertext))
ok := crypto.compare_constant_time(s[:], tag) == 1
if !ok {
mem.zero_explicit(raw_data(dst), len(dst))
}
mem.zero_explicit(&h, len(h))
mem.zero_explicit(&j0, len(j0))
mem.zero_explicit(&j0_enc, len(j0_enc))
mem.zero_explicit(&s, len(s))
return ok
}
@(private = "file")
init_ghash_hw :: proc(
ctx: ^Context_Impl_Hardware,
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
j0_enc: ^[_aes.GHASH_BLOCK_SIZE]byte,
nonce: []byte,
) {
// 1. Let H = CIPH(k, 0^128)
encrypt_block_hw(ctx, h[:], h[:])
// Define a block, J0, as follows:
if l := len(nonce); l == GCM_NONCE_SIZE {
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
copy(j0[:], nonce)
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
} else {
// If len(IV) != 96, then let s = 128 ceil(len(IV)/128) - len(IV),
// and let J0 = GHASHH(IV || 0^(s+64) || ceil(len(IV))^64).
hw_intel.ghash(j0[:], h[:], nonce)
tmp: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(tmp[8:], u64(l) * 8)
hw_intel.ghash(j0[:], h[:], tmp[:])
}
// ECB encrypt j0, so that we can just XOR with the tag.
encrypt_block_hw(ctx, j0_enc[:], j0[:])
}
@(private = "file", enable_target_feature = "sse2")
final_ghash_hw :: proc(
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
a_len: int,
t_len: int,
) {
blk: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
hw_intel.ghash(s[:], h[:], blk[:])
j0_vec := intrinsics.unaligned_load((^x86.__m128i)(j0))
s_vec := intrinsics.unaligned_load((^x86.__m128i)(s))
s_vec = x86._mm_xor_si128(s_vec, j0_vec)
intrinsics.unaligned_store((^x86.__m128i)(s), s_vec)
}
@(private = "file", enable_target_feature = "sse2,sse4.1,aes")
gctr_hw :: proc(
ctx: ^Context_Impl_Hardware,
dst: []byte,
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
src: []byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
nonce: ^[_aes.GHASH_BLOCK_SIZE]byte,
is_seal: bool,
) #no_bounds_check {
sks: [15]x86.__m128i = ---
for i in 0 ..= ctx._num_rounds {
sks[i] = intrinsics.unaligned_load((^x86.__m128i)(&ctx._sk_exp_enc[i]))
}
// Setup the counter block
ctr_blk := intrinsics.unaligned_load((^x86.__m128i)(nonce))
ctr := endian.unchecked_get_u32be(nonce[GCM_NONCE_SIZE:]) + 1
src, dst := src, dst
// Note: Instead of doing GHASH and CTR separately, it is more
// performant to interleave (stitch) the two operations together.
// This results in an unreadable mess, so we opt for simplicity
// as performance is adequate.
blks: [CTR_STRIDE_HW]x86.__m128i = ---
nr_blocks := len(src) / BLOCK_SIZE
for nr_blocks >= CTR_STRIDE_HW {
if !is_seal {
hw_intel.ghash(s[:], h[:], src[:CTR_STRIDE_BYTES_HW])
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i], ctr = hw_inc_ctr32(&ctr_blk, ctr)
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_xor_si128(blks[i], sks[0])
}
#unroll for i in 1 ..= 9 {
#unroll for j in 0 ..< CTR_STRIDE_HW {
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
}
}
switch ctx._num_rounds {
case _aes.ROUNDS_128:
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[10])
}
case _aes.ROUNDS_192:
#unroll for i in 10 ..= 11 {
#unroll for j in 0 ..< CTR_STRIDE_HW {
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
}
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[12])
}
case _aes.ROUNDS_256:
#unroll for i in 10 ..= 13 {
#unroll for j in 0 ..< CTR_STRIDE_HW {
blks[j] = x86._mm_aesenc_si128(blks[j], sks[i])
}
}
#unroll for i in 0 ..< CTR_STRIDE_HW {
blks[i] = x86._mm_aesenclast_si128(blks[i], sks[14])
}
}
xor_blocks_hw(dst, src, blks[:])
if is_seal {
hw_intel.ghash(s[:], h[:], dst[:CTR_STRIDE_BYTES_HW])
}
src = src[CTR_STRIDE_BYTES_HW:]
dst = dst[CTR_STRIDE_BYTES_HW:]
nr_blocks -= CTR_STRIDE_HW
}
// Handle the remainder.
for n := len(src); n > 0; {
l := min(n, BLOCK_SIZE)
if !is_seal {
hw_intel.ghash(s[:], h[:], src[:l])
}
blks[0], ctr = hw_inc_ctr32(&ctr_blk, ctr)
blks[0] = x86._mm_xor_si128(blks[0], sks[0])
#unroll for i in 1 ..= 9 {
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
}
switch ctx._num_rounds {
case _aes.ROUNDS_128:
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[10])
case _aes.ROUNDS_192:
#unroll for i in 10 ..= 11 {
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
}
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[12])
case _aes.ROUNDS_256:
#unroll for i in 10 ..= 13 {
blks[0] = x86._mm_aesenc_si128(blks[0], sks[i])
}
blks[0] = x86._mm_aesenclast_si128(blks[0], sks[14])
}
if l == BLOCK_SIZE {
xor_blocks_hw(dst, src, blks[:1])
} else {
blk: [BLOCK_SIZE]byte
copy(blk[:], src)
xor_blocks_hw(blk[:], blk[:], blks[:1])
copy(dst, blk[:l])
}
if is_seal {
hw_intel.ghash(s[:], h[:], dst[:l])
}
dst = dst[l:]
src = src[l:]
n -= l
}
mem.zero_explicit(&blks, size_of(blks))
mem.zero_explicit(&sks, size_of(sks))
}
// BUG: Sticking this in gctr_hw (like the other implementations) crashes
// the compiler.
//
// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity`
@(private = "file", enable_target_feature = "sse4.1")
hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) {
ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3)
return ret, ctr + 1
}

View File

@@ -0,0 +1,41 @@
package aes
import "core:crypto/_aes/ct64"
import "core:mem"
import "core:reflect"
@(private)
Context_Impl :: union {
ct64.Context,
Context_Impl_Hardware,
}
// Implementation is an AES implementation. Most callers will not need
// to use this as the package will automatically select the most performant
// implementation available (See `is_hardware_accelerated()`).
Implementation :: enum {
Portable,
Hardware,
}
@(private)
init_impl :: proc(ctx: ^Context_Impl, key: []byte, impl: Implementation) {
impl := impl
if !is_hardware_accelerated() {
impl = .Portable
}
switch impl {
case .Portable:
reflect.set_union_variant_typeid(ctx^, typeid_of(ct64.Context))
ct64.init(&ctx.(ct64.Context), key)
case .Hardware:
reflect.set_union_variant_typeid(ctx^, typeid_of(Context_Impl_Hardware))
init_impl_hw(&ctx.(Context_Impl_Hardware), key)
}
}
@(private)
reset_impl :: proc "contextless" (ctx: ^Context_Impl) {
mem.zero_explicit(ctx, size_of(Context_Impl))
}

View File

@@ -0,0 +1,44 @@
//+build !amd64
package aes
@(private = "file")
ERR_HW_NOT_SUPPORTED :: "crypto/aes: hardware implementation unsupported"
// is_hardware_accelerated returns true iff hardware accelerated AES
// is supported.
is_hardware_accelerated :: proc "contextless" () -> bool {
return false
}
@(private)
Context_Impl_Hardware :: struct {}
@(private)
init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
panic(ERR_HW_NOT_SUPPORTED)
}

View File

@@ -0,0 +1,18 @@
//+build amd64
package aes
import "core:crypto/_aes/hw_intel"
// is_hardware_accelerated returns true iff hardware accelerated AES
// is supported.
is_hardware_accelerated :: proc "contextless" () -> bool {
return hw_intel.is_supported()
}
@(private)
Context_Impl_Hardware :: hw_intel.Context
@(private, enable_target_feature = "sse2,aes")
init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
hw_intel.init(ctx, key)
}

View File

@@ -7,6 +7,7 @@ See:
*/
package chacha20
import "core:bytes"
import "core:encoding/endian"
import "core:math/bits"
import "core:mem"
@@ -121,14 +122,15 @@ seek :: proc(ctx: ^Context, block_nr: u64) {
xor_bytes :: proc(ctx: ^Context, dst, src: []byte) {
assert(ctx._is_initialized)
// TODO: Enforcing that dst and src alias exactly or not at all
// is a good idea, though odd aliasing should be extremely uncommon.
src, dst := src, dst
if dst_len := len(dst); dst_len < len(src) {
src = src[:dst_len]
}
if bytes.alias_inexactly(dst, src) {
panic("crypto/chacha20: dst and src alias inexactly")
}
for remaining := len(src); remaining > 0; {
// Process multiple blocks at once
if ctx._off == _BLOCK_SIZE {

View File

@@ -4,6 +4,7 @@ helper routines.
*/
package crypto
import "base:runtime"
import "core:mem"
// compare_constant_time returns 1 iff a and b are equal, 0 otherwise.
@@ -49,6 +50,9 @@ compare_byte_ptrs_constant_time :: proc "contextless" (a, b: ^byte, n: int) -> i
// the system entropy source. This routine will block if the system entropy
// source is not ready yet. All system entropy source failures are treated
// as catastrophic, resulting in a panic.
//
// Support for the system entropy source can be checked with the
// `HAS_RAND_BYTES` boolean constant.
rand_bytes :: proc (dst: []byte) {
// zero-fill the buffer first
mem.zero_explicit(raw_data(dst), len(dst))
@@ -56,8 +60,27 @@ rand_bytes :: proc (dst: []byte) {
_rand_bytes(dst)
}
// has_rand_bytes returns true iff the target has support for accessing the
// system entropty source.
has_rand_bytes :: proc () -> bool {
return _has_rand_bytes()
// random_generator returns a `runtime.Random_Generator` backed by the
// system entropy source.
//
// Support for the system entropy source can be checked with the
// `HAS_RAND_BYTES` boolean constant.
random_generator :: proc() -> runtime.Random_Generator {
return {
procedure = proc(data: rawptr, mode: runtime.Random_Generator_Mode, p: []byte) {
switch mode {
case .Read:
rand_bytes(p)
case .Reset:
// do nothing
case .Query_Info:
if len(p) != size_of(runtime.Random_Generator_Query_Info) {
return
}
info := (^runtime.Random_Generator_Query_Info)(raw_data(p))
info^ += {.Uniform, .Cryptographic, .External_Entropy}
}
},
data = nil,
}
}

View File

@@ -61,7 +61,7 @@ init_256 :: proc(ctx: ^Context, key, domain_sep: []byte) {
update :: proc(ctx: ^Context, data: []byte) {
assert(ctx.is_initialized)
shake.write(transmute(^shake.Context)(ctx), data)
shake.write((^shake.Context)(ctx), data)
}
// final finalizes the Context, writes the tag to dst, and calls reset
@@ -75,7 +75,7 @@ final :: proc(ctx: ^Context, dst: []byte) {
panic("crypto/kmac: invalid KMAC tag_size, too short")
}
_sha3.final_cshake(transmute(^_sha3.Context)(ctx), dst)
_sha3.final_cshake((^_sha3.Context)(ctx), dst)
}
// clone clones the Context other into ctx.
@@ -84,7 +84,7 @@ clone :: proc(ctx, other: ^Context) {
return
}
shake.clone(transmute(^shake.Context)(ctx), transmute(^shake.Context)(other))
shake.clone((^shake.Context)(ctx), (^shake.Context)(other))
}
// reset sanitizes the Context. The Context must be re-initialized to
@@ -94,7 +94,7 @@ reset :: proc(ctx: ^Context) {
return
}
shake.reset(transmute(^shake.Context)(ctx))
shake.reset((^shake.Context)(ctx))
}
@(private)
@@ -107,7 +107,7 @@ _init_kmac :: proc(ctx: ^Context, key, s: []byte, sec_strength: int) {
panic("crypto/kmac: invalid KMAC key, too short")
}
ctx_ := transmute(^_sha3.Context)(ctx)
ctx_ := (^_sha3.Context)(ctx)
_sha3.init_cshake(ctx_, N_KMAC, s, sec_strength)
_sha3.bytepad(ctx_, [][]byte{key}, _sha3.rate_cshake(sec_strength))
}

View File

@@ -66,12 +66,12 @@ init_512 :: proc(ctx: ^Context) {
@(private)
_init :: proc(ctx: ^Context) {
ctx.dsbyte = _sha3.DS_KECCAK
_sha3.init(transmute(^_sha3.Context)(ctx))
_sha3.init((^_sha3.Context)(ctx))
}
// update adds more data to the Context.
update :: proc(ctx: ^Context, data: []byte) {
_sha3.update(transmute(^_sha3.Context)(ctx), data)
_sha3.update((^_sha3.Context)(ctx), data)
}
// final finalizes the Context, writes the digest to hash, and calls
@@ -80,16 +80,16 @@ update :: proc(ctx: ^Context, data: []byte) {
// Iff finalize_clone is set, final will work on a copy of the Context,
// which is useful for for calculating rolling digests.
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
_sha3.final(transmute(^_sha3.Context)(ctx), hash, finalize_clone)
_sha3.final((^_sha3.Context)(ctx), hash, finalize_clone)
}
// clone clones the Context other into ctx.
clone :: proc(ctx, other: ^Context) {
_sha3.clone(transmute(^_sha3.Context)(ctx), transmute(^_sha3.Context)(other))
_sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other))
}
// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
_sha3.reset(transmute(^_sha3.Context)(ctx))
_sha3.reset((^_sha3.Context)(ctx))
}

View File

@@ -1,16 +1,15 @@
//+build freebsd, openbsd
//+build freebsd, openbsd, netbsd
package crypto
foreign import libc "system:c"
HAS_RAND_BYTES :: true
foreign libc {
arc4random_buf :: proc(buf: [^]byte, nbytes: uint) ---
}
@(private)
_rand_bytes :: proc(dst: []byte) {
arc4random_buf(raw_data(dst), len(dst))
}
_has_rand_bytes :: proc() -> bool {
return true
}

View File

@@ -5,14 +5,13 @@ import "core:fmt"
import CF "core:sys/darwin/CoreFoundation"
import Sec "core:sys/darwin/Security"
HAS_RAND_BYTES :: true
@(private)
_rand_bytes :: proc(dst: []byte) {
err := Sec.RandomCopyBytes(count=len(dst), bytes=raw_data(dst))
if err != .Success {
msg := CF.StringCopyToOdinString(Sec.CopyErrorMessageString(err))
panic(fmt.tprintf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", err, msg))
msg := CF.StringCopyToOdinString(Sec.CopyErrorMessageString(err))
fmt.panicf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", err, msg)
}
}
_has_rand_bytes :: proc() -> bool {
return true
}

View File

@@ -2,14 +2,14 @@
//+build !windows
//+build !openbsd
//+build !freebsd
//+build !netbsd
//+build !darwin
//+build !js
package crypto
HAS_RAND_BYTES :: false
@(private)
_rand_bytes :: proc(dst: []byte) {
unimplemented("crypto: rand_bytes not supported on this OS")
}
_has_rand_bytes :: proc() -> bool {
return false
}

View File

@@ -6,8 +6,12 @@ foreign odin_env {
env_rand_bytes :: proc "contextless" (buf: []byte) ---
}
HAS_RAND_BYTES :: true
@(private)
_MAX_PER_CALL_BYTES :: 65536 // 64kiB
@(private)
_rand_bytes :: proc(dst: []byte) {
dst := dst
@@ -18,7 +22,3 @@ _rand_bytes :: proc(dst: []byte) {
dst = dst[to_read:]
}
}
_has_rand_bytes :: proc() -> bool {
return true
}

View File

@@ -4,8 +4,12 @@ import "core:fmt"
import "core:sys/linux"
HAS_RAND_BYTES :: true
@(private)
_MAX_PER_CALL_BYTES :: 33554431 // 2^25 - 1
@(private)
_rand_bytes :: proc (dst: []byte) {
dst := dst
l := len(dst)
@@ -28,13 +32,9 @@ _rand_bytes :: proc (dst: []byte) {
// All other failures are things that should NEVER happen
// unless the kernel interface changes (ie: the Linux
// developers break userland).
panic(fmt.tprintf("crypto: getrandom failed: %v", errno))
fmt.panicf("crypto: getrandom failed: %v", errno)
}
l -= n_read
dst = dst[n_read:]
}
}
_has_rand_bytes :: proc() -> bool {
return true
}

View File

@@ -4,24 +4,23 @@ import win32 "core:sys/windows"
import "core:os"
import "core:fmt"
HAS_RAND_BYTES :: true
@(private)
_rand_bytes :: proc(dst: []byte) {
ret := (os.Errno)(win32.BCryptGenRandom(nil, raw_data(dst), u32(len(dst)), win32.BCRYPT_USE_SYSTEM_PREFERRED_RNG))
if ret != os.ERROR_NONE {
switch ret {
case os.ERROR_INVALID_HANDLE:
// The handle to the first parameter is invalid.
// This should not happen here, since we explicitly pass nil to it
panic("crypto: BCryptGenRandom Invalid handle for hAlgorithm")
case os.ERROR_INVALID_PARAMETER:
// One of the parameters was invalid
panic("crypto: BCryptGenRandom Invalid parameter")
case:
// Unknown error
panic(fmt.tprintf("crypto: BCryptGenRandom failed: %d\n", ret))
case os.ERROR_INVALID_HANDLE:
// The handle to the first parameter is invalid.
// This should not happen here, since we explicitly pass nil to it
panic("crypto: BCryptGenRandom Invalid handle for hAlgorithm")
case os.ERROR_INVALID_PARAMETER:
// One of the parameters was invalid
panic("crypto: BCryptGenRandom Invalid parameter")
case:
// Unknown error
fmt.panicf("crypto: BCryptGenRandom failed: %d\n", ret)
}
}
}
_has_rand_bytes :: proc() -> bool {
return true
}

View File

@@ -112,7 +112,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
return false
}
b_ := transmute(^[32]byte)(raw_data(b))
b_ := (^[32]byte)(raw_data(b))
s: field.Tight_Field_Element = ---
defer field.fe_clear(&s)
@@ -297,7 +297,7 @@ ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
// 2. Return the 32-byte little-endian encoding of s. More
// specifically, this is the encoding of the canonical
// representation of s as an integer between 0 and p-1, inclusive.
dst_ := transmute(^[32]byte)(raw_data(dst))
dst_ := (^[32]byte)(raw_data(dst))
field.fe_to_bytes(dst_, &tmp)
field.fe_clear_vec([]^field.Tight_Field_Element{&u1, &u2, &tmp, &z_inv, &ix0, &iy0, &x, &y})
@@ -417,7 +417,7 @@ ge_is_identity :: proc(ge: ^Group_Element) -> int {
@(private)
ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) {
b_ := transmute(^[32]byte)(raw_data(b))
b_ := (^[32]byte)(raw_data(b))
// The MAP function is defined on 32-byte strings as:
//

View File

@@ -46,7 +46,7 @@ sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
panic("crypto/ristretto255: invalid wide input size")
}
b_ := transmute(^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
b_ := (^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
grp.sc_set_bytes_wide(sc, b_)
}

View File

@@ -68,12 +68,12 @@ init_512 :: proc(ctx: ^Context) {
@(private)
_init :: proc(ctx: ^Context) {
ctx.dsbyte = _sha3.DS_SHA3
_sha3.init(transmute(^_sha3.Context)(ctx))
_sha3.init((^_sha3.Context)(ctx))
}
// update adds more data to the Context.
update :: proc(ctx: ^Context, data: []byte) {
_sha3.update(transmute(^_sha3.Context)(ctx), data)
_sha3.update((^_sha3.Context)(ctx), data)
}
// final finalizes the Context, writes the digest to hash, and calls
@@ -82,16 +82,16 @@ update :: proc(ctx: ^Context, data: []byte) {
// Iff finalize_clone is set, final will work on a copy of the Context,
// which is useful for for calculating rolling digests.
final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
_sha3.final(transmute(^_sha3.Context)(ctx), hash, finalize_clone)
_sha3.final((^_sha3.Context)(ctx), hash, finalize_clone)
}
// clone clones the Context other into ctx.
clone :: proc(ctx, other: ^Context) {
_sha3.clone(transmute(^_sha3.Context)(ctx), transmute(^_sha3.Context)(other))
_sha3.clone((^_sha3.Context)(ctx), (^_sha3.Context)(other))
}
// reset sanitizes the Context. The Context must be re-initialized to
// be used again.
reset :: proc(ctx: ^Context) {
_sha3.reset(transmute(^_sha3.Context)(ctx))
_sha3.reset((^_sha3.Context)(ctx))
}

Some files were not shown because too many files have changed in this diff Show More