Merge branch 'odin-lang:master' into master

2026-02-12 22:33:36 +00:00 · 2025-10-06 02:41:44 +01:00
parent dbbe96ae5c 0f97382fa3
commit 6de2d6e8ca
798 changed files with 86630 additions and 16140 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,7 +6,7 @@ jobs:
    name: NetBSD Build, Check, and Test
    runs-on: ubuntu-latest
    env:
-      PKGSRC_BRANCH: 2024Q3
+      PKGSRC_BRANCH: 2025Q2
    steps:
    - uses: actions/checkout@v4
    - name: Build, Check, and Test
@@ -30,13 +30,13 @@ jobs:
          gmake -C vendor/stb/src
          gmake -C vendor/cgltf/src
          gmake -C vendor/miniaudio/src
-          ./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_amd64
-          ./odin check examples/all -vet -strict-style -disallow-do -target:netbsd_arm64
-          ./odin check vendor/sdl3  -vet -strict-style -disallow-do -target:netbsd_amd64 -no-entry-point
-          ./odin check vendor/sdl3  -vet -strict-style -disallow-do -target:netbsd_arm64 -no-entry-point
-          ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-          ./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-          ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_amd64
+          ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_arm64
+          ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_amd64 -no-entry-point
+          ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_arm64 -no-entry-point
+          ./odin test tests/core/normal.odin -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          ./odin test tests/core/speed.odin -file -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          ./odin test tests/vendor -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
          (cd tests/issues; ./run.sh)
          ./odin check tests/benchmark -vet -strict-style -no-entry-point

@@ -52,7 +52,7 @@ jobs:
        usesh: true
        copyback: false
        prepare: |
-          pkg install -y gmake git bash python3 libxml2 llvm17
+          pkg install -y gmake git bash python3 libxml2 llvm18
        run: |
          # `set -e` is needed for test failures to register. https://github.com/vmactions/freebsd-vm/issues/72
          set -e -x
@@ -63,11 +63,11 @@ jobs:
          gmake -C vendor/stb/src
          gmake -C vendor/cgltf/src
          gmake -C vendor/miniaudio/src
-          ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
-          ./odin check vendor/sdl3  -vet -strict-style -disallow-do -target:freebsd_amd64 -no-entry-point
-          ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-          ./odin test tests/core/speed.odin -file -all-packages -vet -strict-style -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
-          ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64
+          ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64 -no-entry-point
+          ./odin test tests/core/normal.odin -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          ./odin test tests/core/speed.odin -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -o:speed -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          ./odin test tests/vendor -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
          (cd tests/issues; ./run.sh)
          ./odin check tests/benchmark -vet -strict-style -no-entry-point
  ci:
@@ -75,32 +75,35 @@ jobs:
      fail-fast: false
      matrix:
        # MacOS 13 runs on Intel, 14 runs on ARM
-        os: [ubuntu-latest, macos-13, macos-14]
+        os: [macos-14, ubuntu-latest]
    runs-on: ${{ matrix.os }}
-    name: ${{ matrix.os == 'macos-14' && 'MacOS ARM' || (matrix.os == 'macos-13' && 'MacOS Intel' || 'Ubuntu') }} Build, Check, and Test
+    name: ${{ matrix.os == 'macos-14' && 'MacOS ARM' || (matrix.os == 'macos-13' && 'MacOS Intel') || (matrix.os == 'ubuntu-latest' && 'Ubuntu') }} Build, Check, and Test
    timeout-minutes: 15
    steps:
-      - uses: actions/checkout@v4

-      - name: Download LLVM (Linux)
-        if: matrix.os == 'ubuntu-latest'
-        run: |
-          wget https://apt.llvm.org/llvm.sh
-          chmod +x llvm.sh
-          sudo ./llvm.sh 17
-          echo "/usr/lib/llvm-17/bin" >> $GITHUB_PATH
+      - uses: actions/checkout@v4

      - name: Download LLVM (MacOS Intel)
        if: matrix.os == 'macos-13'
        run: |
-          brew install llvm@17 lua@5.4
-          echo "/usr/local/opt/llvm@17/bin" >> $GITHUB_PATH
+          brew update
+          brew install llvm@20 lua@5.4 lld
+          echo "$(brew --prefix llvm@20)/bin" >> $GITHUB_PATH

      - name: Download LLVM (MacOS ARM)
        if: matrix.os == 'macos-14'
        run: |
-          brew install llvm@17 wasmtime lua@5.4
-          echo "/opt/homebrew/opt/llvm@17/bin" >> $GITHUB_PATH
+          brew update
+          brew install llvm@20 wasmtime lua@5.4 lld
+          echo "$(brew --prefix llvm@20)/bin" >> $GITHUB_PATH
+
+      - name: Download LLVM (Ubuntu)
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 20
+          echo "/usr/lib/llvm-20/bin" >> $GITHUB_PATH

      - name: Build Odin
        run: ./build_odin.sh release
@@ -120,57 +123,62 @@ jobs:
      - name: Odin run -debug
        run: ./odin run examples/demo -debug
      - name: Odin check examples/all
-        run: ./odin check examples/all -strict-style -vet -disallow-do
-      - name: Odin check vendor/sdl3
-        run: ./odin check vendor/sdl3  -strict-style -vet -disallow-do -no-entry-point
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do
+      - name: Odin check examples/all/sdl3
+        run: ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point
      - name: Normal Core library tests
-        run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+        run: ./odin test tests/core/normal.odin -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Optimized Core library tests
-        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Vendor library tests
-        run: ./odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+        run: ./odin test tests/vendor -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Internals tests
-        run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+        run: ./odin test tests/internal -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: GitHub Issue tests
        run: |
          cd tests/issues
          ./run.sh

-      - name: Check benchmarks
-        run: ./odin check tests/benchmark -vet -strict-style -no-entry-point
-      - name: Odin check examples/all for Linux i386
-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_i386
-        if: matrix.os == 'ubuntu-latest'
-      - name: Odin check examples/all for Linux arm64
-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:linux_arm64
-        if: matrix.os == 'ubuntu-latest'
-      - name: Odin check examples/all for FreeBSD amd64
-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:freebsd_amd64
-        if: matrix.os == 'ubuntu-latest'
-      - name: Odin check examples/all for OpenBSD amd64
-        run: ./odin check examples/all -vet -strict-style -disallow-do -target:openbsd_amd64
-        if: matrix.os == 'ubuntu-latest'
-
-      - name: Odin check vendor/sdl3 for Linux i386
-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:linux_i386
-        if: matrix.os == 'ubuntu-latest'
-      - name: Odin check vendor/sdl3 for Linux arm64
-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:linux_arm64
-        if: matrix.os == 'ubuntu-latest'
-      - name: Odin check vendor/sdl3 for FreeBSD amd64
-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:freebsd_amd64
-        if: matrix.os == 'ubuntu-latest'
-      - name: Odin check vendor/sdl3 for OpenBSD amd64
-        run: ./odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point -target:openbsd_amd64
-        if: matrix.os == 'ubuntu-latest'
-
-
      - name: Run demo on WASI WASM32
        run: |
-          ./odin build examples/demo -target:wasi_wasm32 -vet -strict-style -disallow-do -out:demo.wasm
+          ./odin build examples/demo -target:wasi_wasm32 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -out:demo
          wasmtime ./demo.wasm
        if: matrix.os == 'macos-14'

+      - name: Check benchmarks
+        run: ./odin check tests/benchmark -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point
+      - name: Odin check examples/all for Linux i386
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_i386
+      - name: Odin check examples/all for Linux arm64
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm64
+      - name: Odin check examples/all for FreeBSD amd64
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64
+      - name: Odin check examples/all for OpenBSD amd64
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:openbsd_amd64
+      - name: Odin check examples/all for js_wasm32
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point -target:js_wasm32
+      - name: Odin check examples/all for js_wasm64p32
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point -target:js_wasm64p32
+
+      - name: Odin check examples/all/sdl3 for Linux i386
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point -target:linux_i386
+      - name: Odin check examples/all/sdl3 for Linux arm64
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point -target:linux_arm64
+      - name: Odin check examples/all/sdl3 for FreeBSD amd64
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point -target:freebsd_amd64
+      - name: Odin check examples/all/sdl3 for OpenBSD amd64
+        if: matrix.os == 'ubuntu-latest'
+        run: ./odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point -target:openbsd_amd64
+
  build_windows:
    name: Windows Build, Check, and Test
    runs-on: windows-2022
@@ -200,38 +208,38 @@ jobs:
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin run examples/demo -debug -vet -strict-style -disallow-do
+          odin run examples/demo -debug -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do
      - name: Odin check examples/all
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin check examples/all -vet -strict-style -disallow-do
-      - name: Odin check vendor/sdl3
+          odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do
+      - name: Odin check examples/all/sdl3
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin check vendor/sdl3 -vet -strict-style -disallow-do -no-entry-point
+          odin check examples/all/sdl3 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point
      - name: Core library tests
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          odin test tests/core/normal.odin -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Optimized core library tests
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          odin test tests/core/speed.odin -o:speed -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Vendor library tests
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
          copy vendor\lua\5.4\windows\*.dll .
-          odin test tests/vendor -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          odin test tests/vendor -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Odin internals tests
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true
+          odin test tests/internal -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -sanitize:address
      - name: Check issues
        shell: cmd
        run: |
@@ -249,12 +257,6 @@ jobs:
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
          cd tests\documentation
          call build.bat
-      - name: core:math/big tests
-        shell: cmd
-        run: |
-          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
-          cd tests\core\math\big
-          call build.bat
      - name: Odin check examples/all for Windows 32bits
        shell: cmd
        run: |
@@ -291,25 +293,25 @@ jobs:
          make -C vendor/miniaudio/src

      - name: Odin check examples/all
-        run: ./odin check examples/all -target:linux_riscv64 -vet -strict-style -disallow-do
+        run: ./odin check examples/all -target:linux_riscv64 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do

-      - name: Odin check vendor/sdl3
-        run: ./odin check vendor/sdl3 -target:linux_riscv64 -vet -strict-style -disallow-do -no-entry-point
+      - name: Odin check examples/all/sdl3
+        run: ./odin check examples/all/sdl3 -target:linux_riscv64 -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -no-entry-point

      - name: Install riscv64 toolchain and qemu
        run: sudo apt-get install -y qemu-user qemu-user-static gcc-12-riscv64-linux-gnu libc6-riscv64-cross

      - name: Odin run
-        run: ./odin run examples/demo -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath
+        run: ./odin run examples/demo -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

      - name: Odin run -debug
-        run: ./odin run examples/demo -debug -vet -strict-style -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath
+        run: ./odin run examples/demo -debug -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

      - name: Normal Core library tests
-        run: ./odin test tests/core/normal.odin -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath
+        run: ./odin test tests/core/normal.odin -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

      - name: Optimized Core library tests
-        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath
+        run: ./odin test tests/core/speed.odin -o:speed -file -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath

      - name: Internals tests
-        run: ./odin test tests/internal -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath
+        run: ./odin test tests/internal -all-packages -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_riscv64 -extra-linker-flags:"-fuse-ld=/usr/bin/riscv64-linux-gnu-gcc-12 -static -Wl,-static" -no-rpath
--- a/.github/workflows/cover.yml
+++ b/.github/workflows/cover.yml
@@ -0,0 +1,60 @@
+name: Test Coverage
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+  build_linux_amd64:
+    runs-on: ubuntu-latest
+    name: Linux AMD64 Test Coverage
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download LLVM (Ubuntu)
+        if: matrix.os == 'ubuntu-latest'
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 20
+          echo "/usr/lib/llvm-20/bin" >> $GITHUB_PATH
+
+      - name: Install kcov
+        run: |
+          sudo apt-get update
+          sudo apt-get install binutils-dev build-essential cmake libssl-dev libcurl4-openssl-dev libelf-dev libstdc++-12-dev zlib1g-dev libdw-dev libiberty-dev
+          git clone https://github.com/SimonKagstrom/kcov.git
+          mkdir kcov/build
+          cd kcov/build
+          cmake ..
+          sudo make
+          sudo make install
+          cd ../..
+          kcov --version
+
+      - name: Build Odin
+        run: ./build_odin.sh release
+
+      - name: Odin report
+        run: ./odin report
+
+      - name: Normal Core library tests
+        run: |
+          ./odin build tests/core/normal.odin -build-mode:test -debug -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_amd64
+          mkdir kcov-out
+          kcov --exclude-path=tests,/usr kcov-out ./normal.bin .
+
+      - name: Optimized Core library tests
+        run: |
+          ./odin build tests/core/speed.odin -build-mode:test -debug -file -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_amd64
+          kcov --exclude-path=tests,/usr kcov-out ./speed.bin .
+
+      - name: Internals tests
+        run: |
+          ./odin build tests/internal -build-mode:test -debug -all-packages -vet -strict-style -disallow-do -define:ODIN_TEST_FANCY=false -define:ODIN_TEST_FAIL_ON_BAD_MEMORY=true -target:linux_amd64
+          kcov --exclude-path=tests,/usr kcov-out ./internal .
+
+      - uses: codecov/codecov-action@v5
+        with:
+          name: Ubuntu Coverage # optional
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true # optional (default = false
+          directory: kcov-out/kcov-merged
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -47,20 +47,15 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
-      - uses: jirutka/setup-alpine@v1
-        with:
-          branch: v3.20
-      - name: (Linux) Download LLVM
+      - name: (Linux) Download LLVM and Build Odin
        run: |
-          apk add --no-cache \
-          musl-dev llvm18-dev clang18 git mold lz4 \
-          libxml2-static llvm18-static zlib-static zstd-static \
-          make
-        shell: alpine.sh --root {0}
-      - name: build odin
-        # NOTE: this build does slow compile times because of musl
-        run: ci/build_linux_static.sh
-        shell: alpine.sh {0}
+          docker run --rm -v "$PWD:/src" -w /src alpine sh -c '
+            apk add --no-cache \
+              musl-dev llvm20-dev clang20 git mold lz4 \
+              libxml2-static llvm20-static zlib-static zstd-static \
+              make &&
+            ./ci/build_linux_static.sh
+          '
      - name: Odin run
        run: ./odin run examples/demo
      - name: Copy artifacts
@@ -74,6 +69,7 @@ jobs:
          cp -r core $FILE
          cp -r vendor $FILE
          cp -r examples $FILE
+          ./ci/remove_windows_binaries.sh $FILE
          # Creating a tarball so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
          tar -czvf dist.tar.gz $FILE
      - name: Odin run
@@ -85,16 +81,57 @@ jobs:
        with:
          name: linux_artifacts
          path: dist.tar.gz
+  build_linux_arm:
+    name: Linux ARM Build
+    if: github.repository == 'odin-lang/Odin'
+    runs-on: ubuntu-24.04-arm
+    steps:
+      - uses: actions/checkout@v4
+      - name: (Linux ARM) Download LLVM and Build Odin
+        run: |
+          docker run --rm -v "$PWD:/src" -w /src arm64v8/alpine sh -c '
+            apk add --no-cache \
+              musl-dev llvm20-dev clang20 git mold lz4 \
+              libxml2-static llvm20-static zlib-static zstd-static \
+              make &&
+            ./ci/build_linux_static.sh
+          '
+      - name: Odin run
+        run: ./odin run examples/demo
+      - name: Copy artifacts
+        run: |
+          FILE="odin-linux-arm64-nightly+$(date -I)"
+          mkdir $FILE
+          cp odin $FILE
+          cp LICENSE $FILE
+          cp -r shared $FILE
+          cp -r base $FILE
+          cp -r core $FILE
+          cp -r vendor $FILE
+          cp -r examples $FILE
+          ./ci/remove_windows_binaries.sh $FILE
+          # Creating a tarball so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
+          tar -czvf dist.tar.gz $FILE
+      - name: Odin run
+        run: |
+          FILE="odin-linux-arm64-nightly+$(date -I)"
+          $FILE/odin run examples/demo
+      - name: Upload artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: linux_arm_artifacts
+          path: dist.tar.gz
  build_macos:
    name: MacOS Build
    if: github.repository == 'odin-lang/Odin'
-    runs-on: macos-13
+    runs-on: macos-14 # Intel machine
    steps:
      - uses: actions/checkout@v4
      - name: Download LLVM and setup PATH
        run: |
-          brew install llvm@18 dylibbundler
-          echo "/usr/local/opt/llvm@18/bin" >> $GITHUB_PATH
+          brew update
+          brew install llvm@20 dylibbundler lld@20
+
      - name: build odin
        # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
        # not link with libunwind bundled with LLVM but link with libunwind on the system.
@@ -110,6 +147,7 @@ jobs:
          cp -r core $FILE
          cp -r vendor $FILE
          cp -r examples $FILE
+          ./ci/remove_windows_binaries.sh $FILE
          dylibbundler -b -x $FILE/odin -d $FILE/libs -od -p @executable_path/libs
          # Creating a tarball so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
          tar -czvf dist.tar.gz $FILE
@@ -130,8 +168,9 @@ jobs:
      - uses: actions/checkout@v4
      - name: Download LLVM and setup PATH
        run: |
-          brew install llvm@18 dylibbundler
-          echo "/opt/homebrew/opt/llvm@18/bin" >> $GITHUB_PATH
+          brew update
+          brew install llvm@20 dylibbundler lld@20
+
      - name: build odin
        # These -L makes the linker prioritize system libraries over LLVM libraries, this is mainly to
        # not link with libunwind bundled with LLVM but link with libunwind on the system.
@@ -147,6 +186,7 @@ jobs:
          cp -r core $FILE
          cp -r vendor $FILE
          cp -r examples $FILE
+          ./ci/remove_windows_binaries.sh $FILE
          dylibbundler -b -x $FILE/odin -d $FILE/libs -od -p @executable_path/libs
          # Creating a tarball so executable permissions are retained, see https://github.com/actions/upload-artifact/issues/38
          tar -czvf dist.tar.gz $FILE
@@ -161,7 +201,7 @@ jobs:
          path: dist.tar.gz
  upload_b2:
    runs-on: [ubuntu-latest]
-    needs: [build_windows, build_macos, build_macos_arm, build_linux]
+    needs: [build_windows, build_macos, build_macos_arm, build_linux, build_linux_arm]
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
@@ -190,6 +230,12 @@ jobs:
          name: linux_artifacts
          path: linux_artifacts

+      - name: Download Ubuntu ARM artifacts
+        uses: actions/download-artifact@v4.1.7
+        with:
+          name: linux_arm_artifacts
+          path: linux_arm_artifacts
+
      - name: Download macOS artifacts
        uses: actions/download-artifact@v4.1.7
        with:
@@ -217,6 +263,7 @@ jobs:
          file linux_artifacts/dist.tar.gz
          python3 ci/nightly.py artifact windows-amd64 windows_artifacts/
          python3 ci/nightly.py artifact linux-amd64 linux_artifacts/dist.tar.gz
+          python3 ci/nightly.py artifact linux-arm64 linux_arm_artifacts/dist.tar.gz
          python3 ci/nightly.py artifact macos-amd64 macos_artifacts/dist.tar.gz
          python3 ci/nightly.py artifact macos-arm64 macos_arm_artifacts/dist.tar.gz
          python3 ci/nightly.py prune
--- a/.gitignore
+++ b/.gitignore
@@ -277,6 +277,7 @@ odin
 *.bin
 demo.bin
 libLLVM*.so*
+*.a

 # shared collection
 shared/
@@ -293,5 +294,17 @@ build.sh

 # RAD debugger project file
 *.raddbg
-
+*.rdi
+tests/issues/build/*
 misc/featuregen/featuregen
+
+# Clangd stuff
+.cache/
+.clangd
+compile_commands.json
+
+# Dev cmake helpers
+build/
+cmake-build*/
+CMakeLists.txt
+sandbox/
--- a/LLVM-C.dll
+++ b/LLVM-C.dll
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@
        <img src="https://img.shields.io/discord/568138951836172421?logo=discord">
    </a>
    <a href="https://github.com/odin-lang/odin/actions">
-        <img src="https://github.com/odin-lang/odin/workflows/CI/badge.svg?branch=master&event=push">
+        <img src="https://github.com/odin-lang/odin/actions/workflows/ci.yml/badge.svg?branch=master&event=push">
    </a>
 </p>

--- a/base/builtin/builtin.odin
+++ b/base/builtin/builtin.odin
@@ -7,13 +7,232 @@ nil   :: nil
 false :: 0!=0
 true  :: 0==0

-ODIN_OS      :: ODIN_OS
-ODIN_ARCH    :: ODIN_ARCH
-ODIN_ENDIAN  :: ODIN_ENDIAN
-ODIN_VENDOR  :: ODIN_VENDOR
-ODIN_VERSION :: ODIN_VERSION
-ODIN_ROOT    :: ODIN_ROOT
-ODIN_DEBUG   :: ODIN_DEBUG
+// The following constants are added in `checker.cpp`'s `init_universal` procedure.
+
+/*
+	An `enum` value indicating the target's CPU architecture.
+	Possible values are: `.amd64`, `.i386`, `.arm32`, `.arm64`, `.wasm32`, `.wasm64p32`, and `.riscv64`.
+*/
+ODIN_ARCH                       :: ODIN_ARCH
+
+/*
+	A `string` indicating the target's CPU architecture.
+	Possible values are: "amd64", "i386", "arm32", "arm64", "wasm32", "wasm64p32", "riscv64".
+*/
+ODIN_ARCH_STRING                :: ODIN_ARCH_STRING
+
+/*
+	An `enum` value indicating the type of compiled output, chosen using `-build-mode`.
+	Possible values are: `.Executable`, `.Dynamic`, `.Static`, `.Object`, `.Assembly`, and `.LLVM_IR`.
+*/
+ODIN_BUILD_MODE                 :: ODIN_BUILD_MODE
+
+/*
+	A `string` containing the name of the folder that contains the entry point,
+	e.g. for `%ODIN_ROOT%/examples/demo`, this would contain `demo`.
+*/
+ODIN_BUILD_PROJECT_NAME         :: ODIN_BUILD_PROJECT_NAME
+
+/*
+	An `i64` containing the time at which the executable was compiled, in nanoseconds.
+	This is compatible with the `time.Time` type, i.e. `time.Time{_nsec=ODIN_COMPILE_TIMESTAMP}`
+*/
+ODIN_COMPILE_TIMESTAMP          :: ODIN_COMPILE_TIMESTAMP
+
+/*
+	`true` if the `-debug` command line switch is passed, which enables debug info generation.
+*/
+ODIN_DEBUG                      :: ODIN_DEBUG
+
+/*
+	`true` if the `-default-to-nil-allocator` command line switch is passed,
+	which sets the initial `context.allocator` to an allocator that does nothing.
+*/
+ODIN_DEFAULT_TO_NIL_ALLOCATOR   :: ODIN_DEFAULT_TO_NIL_ALLOCATOR
+
+/*
+	`true` if the `-default-to-panic-allocator` command line switch is passed,
+	which sets the initial `context.allocator` to an allocator that panics if allocated from.
+*/
+ODIN_DEFAULT_TO_PANIC_ALLOCATOR :: ODIN_DEFAULT_TO_PANIC_ALLOCATOR
+
+/*
+	`true` if the `-disable-assert` command line switch is passed,
+	which removes all calls to `assert` from the program.
+*/
+ODIN_DISABLE_ASSERT             :: ODIN_DISABLE_ASSERT
+
+/*
+	An `enum` value indicating the endianness of the target.
+	Possible values are: `.Little` and `.Big`.
+*/
+ODIN_ENDIAN                     :: ODIN_ENDIAN
+
+/*
+	An `string` indicating the endianness of the target.
+	Possible values are: "little" and "big".
+*/
+ODIN_ENDIAN_STRING              :: ODIN_ENDIAN_STRING
+
+/*
+	An `enum` value set using the `-error-pos-style` switch, indicating the source location style used for compile errors and warnings.
+	Possible values are: `.Default` (Odin-style) and `.Unix`.
+*/
+ODIN_ERROR_POS_STYLE            :: ODIN_ERROR_POS_STYLE
+
+/*
+	`true` if the `-foreign-error-procedures` command line switch is passed,
+	which inhibits generation of runtime error procedures, so that they can be in a separate compilation unit.
+*/
+ODIN_FOREIGN_ERROR_PROCEDURES   :: ODIN_FOREIGN_ERROR_PROCEDURES
+
+/*
+	A `string` describing the microarchitecture used for code generation.
+	If not set using the `-microarch` command line switch, the compiler will pick a default.
+	Possible values include, but are not limited to: "sandybridge", "x86-64-v2".
+*/
+ODIN_MICROARCH_STRING           :: ODIN_MICROARCH_STRING
+
+/*
+	An `int` value representing the minimum OS version given to the linker, calculated as `major * 10_000 + minor * 100 + revision`.
+	If not set using the `-minimum-os-version` command line switch, it defaults to `0`, except on Darwin, where it's `11_00_00`.
+*/
+ODIN_MINIMUM_OS_VERSION         :: ODIN_MINIMUM_OS_VERSION
+
+/*
+	`true` if the `-no-bounds-check` command line switch is passed, which disables bounds checking at runtime.
+*/
+ODIN_NO_BOUNDS_CHECK            :: ODIN_NO_BOUNDS_CHECK
+
+/*
+	`true` if the `-no-crt` command line switch is passed, which inhibits linking with the C Runtime Library, a.k.a. LibC.
+*/
+ODIN_NO_CRT                     :: ODIN_NO_CRT
+
+/*
+	`true` if the `-no-entry-point` command line switch is passed, which makes the declaration of a `main` procedure optional.
+*/
+ODIN_NO_ENTRY_POINT             :: ODIN_NO_ENTRY_POINT
+
+/*
+	`true` if the `-no-rtti` command line switch is passed, which inhibits generation of full Runtime Type Information.
+*/
+ODIN_NO_RTTI                    :: ODIN_NO_RTTI
+
+/*
+	`true` if the `-no-type-assert` command line switch is passed, which disables type assertion checking program wide.
+*/
+ODIN_NO_TYPE_ASSERT             :: ODIN_NO_TYPE_ASSERT
+
+/*
+	An `enum` value indicating the optimization level selected using the `-o` command line switch.
+	Possible values are: `.None`, `.Minimal`, `.Size`, `.Speed`, and `.Aggressive`.
+
+	If `ODIN_OPTIMIZATION_MODE` is anything other than `.None` or `.Minimal`, the compiler will also perform a unity build,
+	and `ODIN_USE_SEPARATE_MODULES` will be set to `false` as a result.
+*/
+ODIN_OPTIMIZATION_MODE          :: ODIN_OPTIMIZATION_MODE
+
+/*
+	An `enum` value indicating what the target operating system is.
+*/
+ODIN_OS                         :: ODIN_OS
+
+/*
+	A `string` indicating what the target operating system is.
+*/
+ODIN_OS_STRING                  :: ODIN_OS_STRING
+
+/*
+	An `enum` value indicating the platform subtarget, chosen using the `-subtarget` switch.
+	Possible values are: `.Default` `.iPhone`, .iPhoneSimulator, and `.Android`.
+*/
+ODIN_PLATFORM_SUBTARGET         :: ODIN_PLATFORM_SUBTARGET
+
+/*
+	A `string` representing the path of the folder containing the Odin compiler,
+	relative to which we expect to find the `base` and `core` package collections.
+*/
+ODIN_ROOT                       :: ODIN_ROOT
+
+/*
+	A `bit_set` indicating the sanitizer flags set using the `-sanitize` command line switch.
+	Supported flags are `.Address`, `.Memory`, and `.Thread`.
+*/
+ODIN_SANITIZER_FLAGS            :: ODIN_SANITIZER_FLAGS
+
+/*
+	`true` if the code is being compiled via an invocation of `odin test`.
+*/
+ODIN_TEST                       :: ODIN_TEST
+
+/*
+	`true` if built using the experimental Tilde backend.
+*/
+ODIN_TILDE                      :: ODIN_TILDE
+
+/*
+	`true` by default, meaning each each package is built into its own object file, and then linked together.
+	`false` if the `-use-single-module` command line switch to force a unity build is provided.
+
+	If `ODIN_OPTIMIZATION_MODE` is anything other than `.None` or `.Minimal`, the compiler will also perform a unity build,
+	and this constant will also be set to `false`.
+*/
+ODIN_USE_SEPARATE_MODULES       :: ODIN_USE_SEPARATE_MODULES
+
+/*
+	`true` if Valgrind integration is supported on the target.
+*/
+ODIN_VALGRIND_SUPPORT           :: ODIN_VALGRIND_SUPPORT
+
+/*
+	A `string` which identifies the compiler being used. The official compiler sets this to `"odin"`.
+*/
+ODIN_VENDOR                     :: ODIN_VENDOR
+
+/*
+	A `string` containing the version of the Odin compiler, typically in the format `dev-YYYY-MM`.
+*/
+ODIN_VERSION                    :: ODIN_VERSION
+
+/*
+	A `string` containing the Git hash part of the Odin version.
+	Empty if `.git` could not be detected at the time the compiler was built.
+*/
+ODIN_VERSION_HASH               :: ODIN_VERSION_HASH
+
+/*
+	An `enum` set by the `-subsystem` flag, specifying which Windows subsystem the PE file was created for.
+	Possible values are:
+		`.Unknown` - Default and only value on non-Windows platforms
+		`.Console` - Default on Windows
+		`.Windows` - Can be used by graphical applications so Windows doesn't open an empty console
+
+	There are some other possible values for e.g. EFI applications, but only Console and Windows are supported.
+
+	See also: https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64
+*/
+ODIN_WINDOWS_SUBSYSTEM          :: ODIN_WINDOWS_SUBSYSTEM
+
+/*
+	An `string` set by the `-subsystem` flag, specifying which Windows subsystem the PE file was created for.
+	Possible values are:
+		"UNKNOWN" - Default and only value on non-Windows platforms
+		"CONSOLE" - Default on Windows
+		"WINDOWS" - Can be used by graphical applications so Windows doesn't open an empty console
+
+	There are some other possible values for e.g. EFI applications, but only Console and Windows are supported.
+
+	See also: https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header64
+*/
+ODIN_WINDOWS_SUBSYSTEM_STRING   :: ODIN_WINDOWS_SUBSYSTEM_STRING
+
+/*
+	`true` if LLVM supports the f16 type.
+*/
+__ODIN_LLVM_F16_SUPPORTED       :: __ODIN_LLVM_F16_SUPPORTED
+
+

 byte :: u8 // alias

@@ -119,7 +338,8 @@ jmag       :: proc(value: Quaternion) -> Float ---
 kmag       :: proc(value: Quaternion) -> Float ---
 conj       :: proc(value: Complex_Or_Quaternion) -> Complex_Or_Quaternion ---

-expand_values :: proc(value: Struct_Or_Array) -> (A, B, C, ...) ---
+expand_values   :: proc(value: Struct_Or_Array) -> (A, B, C, ...) ---
+compress_values :: proc(values: ...) -> Struct_Or_Array_Like_Type ---

 min   :: proc(values: ..T) -> T ---
 max   :: proc(values: ..T) -> T ---
@@ -130,3 +350,6 @@ soa_zip :: proc(slices: ...) -> #soa[]Struct ---
 soa_unzip :: proc(value: $S/#soa[]$E) -> (slices: ...) ---

 unreachable :: proc() -> ! ---
+
+// Where T is a string, slice, dynamic array, or pointer to an array type
+raw_data :: proc(t: $T) -> rawptr
--- a/base/intrinsics/intrinsics.odin
+++ b/base/intrinsics/intrinsics.odin
@@ -32,6 +32,7 @@ trap       :: proc() -> ! ---
 alloca             :: proc(size, align: int) -> [^]u8 ---
 cpu_relax          :: proc() ---
 read_cycle_counter :: proc() -> i64 ---
+read_cycle_counter_frequency :: proc() -> i64 ---

 count_ones           :: proc(x: $T) -> T where type_is_integer(T) || type_is_simd_vector(T) ---
 count_zeros          :: proc(x: $T) -> T where type_is_integer(T) || type_is_simd_vector(T) ---
@@ -140,6 +141,7 @@ type_is_quaternion :: proc($T: typeid) -> bool ---
 type_is_string     :: proc($T: typeid) -> bool ---
 type_is_typeid     :: proc($T: typeid) -> bool ---
 type_is_any        :: proc($T: typeid) -> bool ---
+type_is_string16   :: proc($T: typeid) -> bool ---

 type_is_endian_platform       :: proc($T: typeid) -> bool ---
 type_is_endian_little         :: proc($T: typeid) -> bool ---
@@ -169,6 +171,7 @@ type_is_union            :: proc($T: typeid) -> bool ---
 type_is_enum             :: proc($T: typeid) -> bool ---
 type_is_proc             :: proc($T: typeid) -> bool ---
 type_is_bit_set          :: proc($T: typeid) -> bool ---
+type_is_bit_field        :: proc($T: typeid) -> bool ---
 type_is_simd_vector      :: proc($T: typeid) -> bool ---
 type_is_matrix           :: proc($T: typeid) -> bool ---

@@ -212,6 +215,10 @@ type_is_subtype_of :: proc($T, $U: typeid) -> bool ---

 type_field_index_of :: proc($T: typeid, $name: string) -> uintptr ---

+// "Contiguous" means that the set of enum constants, when sorted, have a difference of either 0 or 1 between consecutive values.
+// This is the exact opposite of "sparse".
+type_enum_is_contiguous :: proc($T: typeid) -> bool where type_is_enum(T) ---
+
 type_equal_proc  :: proc($T: typeid) -> (equal:  proc "contextless" (rawptr, rawptr) -> bool)                 where type_is_comparable(T) ---
 type_hasher_proc :: proc($T: typeid) -> (hasher: proc "contextless" (data: rawptr, seed: uintptr) -> uintptr) where type_is_comparable(T) ---

@@ -221,8 +228,14 @@ type_map_cell_info :: proc($T: typeid)           -> ^runtime.Map_Cell_Info ---
 type_convert_variants_to_pointers :: proc($T: typeid) -> typeid where type_is_union(T) ---
 type_merge :: proc($U, $V: typeid) -> typeid where type_is_union(U), type_is_union(V) ---

+type_integer_to_unsigned :: proc($T: typeid) -> type where type_is_integer(T), !type_is_unsigned(T) ---
+type_integer_to_signed   :: proc($T: typeid) -> type where type_is_integer(T), type_is_unsigned(T) ---
+
 type_has_shared_fields :: proc($U, $V: typeid) -> bool where type_is_struct(U), type_is_struct(V) ---

+// Returns the canonicalized name of the type, of which is used to produce the pseudo-unique 'typeid'
+type_canonical_name :: proc($T: typeid) -> string ---
+
 constant_utf16_cstring :: proc($literal: string) -> [^]u16 ---

 constant_log2 :: proc($v: $T) -> T where type_is_integer(T) ---
@@ -274,8 +287,12 @@ simd_lanes_ge :: proc(a, b: #simd[N]T) -> #simd[N]Integer ---
 simd_extract :: proc(a: #simd[N]T, idx: uint) -> T ---
 simd_replace :: proc(a: #simd[N]T, idx: uint, elem: T) -> #simd[N]T ---

+simd_reduce_add_bisect  :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
+simd_reduce_mul_bisect  :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
 simd_reduce_add_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
 simd_reduce_mul_ordered :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
+simd_reduce_add_pairs   :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
+simd_reduce_mul_pairs   :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
 simd_reduce_min         :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
 simd_reduce_max         :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
 simd_reduce_and         :: proc(a: #simd[N]T) -> T where type_is_integer(T) || type_is_float(T)---
@@ -298,10 +315,11 @@ simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)
 simd_masked_expand_load    :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) ---
 simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U)              where type_is_integer(U) || type_is_boolean(U) ---

-
+simd_indices :: proc($T: typeid/#simd[$N]$E) -> T where type_is_numeric(T) ---

 simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T ---
 simd_select  :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T ---
+simd_runtime_swizzle :: proc(table: #simd[N]T, indices: #simd[N]T) -> #simd[N]T where type_is_integer(T) ---

 // Lane-wise operations
 simd_ceil    :: proc(a: #simd[N]any_float) -> #simd[N]any_float ---
@@ -349,19 +367,26 @@ x86_cpuid  :: proc(ax, cx: u32) -> (eax, ebx, ecx, edx: u32) ---
 x86_xgetbv :: proc(cx: u32) -> (eax, edx: u32) ---


+
 // Darwin targets only
 objc_object   :: struct{}
 objc_selector :: struct{}
 objc_class    :: struct{}
-objc_id    :: ^objc_object
-objc_SEL   :: ^objc_selector
-objc_Class :: ^objc_class
+objc_ivar     :: struct{}
+
+objc_id           :: ^objc_object
+objc_SEL          :: ^objc_selector
+objc_Class        :: ^objc_class
+objc_Ivar         :: ^objc_ivar
+objc_instancetype :: distinct objc_id

 objc_find_selector     :: proc($name: string) -> objc_SEL   ---
 objc_register_selector :: proc($name: string) -> objc_SEL   ---
 objc_find_class        :: proc($name: string) -> objc_Class ---
 objc_register_class    :: proc($name: string) -> objc_Class ---
-
+objc_ivar_get          :: proc(self: ^$T) -> ^$U ---
+objc_block             :: proc(invoke: $T, ..any) -> ^Objc_Block(T) where type_is_proc(T) ---
+objc_super             :: proc(obj: ^$T) -> ^$U where type_is_subtype_of(T, objc_object) && type_is_subtype_of(U, objc_object) ---

 valgrind_client_request :: proc(default: uintptr, request: uintptr, a0, a1, a2, a3, a4: uintptr) -> uintptr ---

--- a/base/runtime/core.odin
+++ b/base/runtime/core.odin
@@ -61,6 +61,11 @@ Type_Info_Struct_Soa_Kind :: enum u8 {
 	Dynamic = 3,
 }

+Type_Info_String_Encoding_Kind :: enum u8 {
+	UTF_8  = 0,
+	UTF_16 = 1,
+}
+
 // Variant Types
 Type_Info_Named :: struct {
 	name: string,
@@ -73,7 +78,7 @@ Type_Info_Rune       :: struct {}
 Type_Info_Float      :: struct {endianness: Platform_Endianness}
 Type_Info_Complex    :: struct {}
 Type_Info_Quaternion :: struct {}
-Type_Info_String     :: struct {is_cstring: bool}
+Type_Info_String     :: struct {is_cstring: bool, encoding: Type_Info_String_Encoding_Kind}
 Type_Info_Boolean    :: struct {}
 Type_Info_Any        :: struct {}
 Type_Info_Type_Id    :: struct {}
@@ -110,13 +115,12 @@ Type_Info_Parameters :: struct { // Only used for procedures parameters and resu
 	types:        []^Type_Info,
 	names:        []string,
 }
-Type_Info_Tuple :: Type_Info_Parameters // Will be removed eventually

 Type_Info_Struct_Flags :: distinct bit_set[Type_Info_Struct_Flag; u8]
 Type_Info_Struct_Flag :: enum u8 {
 	packed    = 0,
 	raw_union = 1,
-	no_copy   = 2,
+	_         = 2,
 	align     = 3,
 }

@@ -398,6 +402,11 @@ Raw_String :: struct {
 	len:  int,
 }

+Raw_String16 :: struct {
+	data: [^]u16,
+	len:  int,
+}
+
 Raw_Slice :: struct {
 	data: rawptr,
 	len:  int,
@@ -442,13 +451,21 @@ Raw_Any :: struct {
 	data: rawptr,
 	id:   typeid,
 }
-#assert(size_of(Raw_Any) == size_of(any))
+when !ODIN_NO_RTTI {
+	#assert(size_of(Raw_Any) == size_of(any))
+}

 Raw_Cstring :: struct {
 	data: [^]byte,
 }
 #assert(size_of(Raw_Cstring) == size_of(cstring))

+Raw_Cstring16 :: struct {
+	data: [^]u16,
+}
+#assert(size_of(Raw_Cstring16) == size_of(cstring16))
+
+
 Raw_Soa_Pointer :: struct {
 	data:  rawptr,
 	index: int,
@@ -556,11 +573,18 @@ ALL_ODIN_OS_TYPES :: Odin_OS_Types{
 	// Defined internally by the compiler
 	Odin_Platform_Subtarget_Type :: enum int {
 		Default,
-		iOS,
+		iPhone,
+		iPhoneSimulator
+		Android,
 	}
 */
 Odin_Platform_Subtarget_Type :: type_of(ODIN_PLATFORM_SUBTARGET)

+Odin_Platform_Subtarget_Types :: bit_set[Odin_Platform_Subtarget_Type]
+
+@(builtin)
+ODIN_PLATFORM_SUBTARGET_IOS :: ODIN_PLATFORM_SUBTARGET == .iPhone || ODIN_PLATFORM_SUBTARGET == .iPhoneSimulator
+
 /*
 	// Defined internally by the compiler
 	Odin_Sanitizer_Flag :: enum u32 {
--- a/base/runtime/core_builtin.odin
+++ b/base/runtime/core_builtin.odin
@@ -5,6 +5,11 @@ import "base:intrinsics"
@builtin
 Maybe :: union($T: typeid) {T}

+/*
+Represents an Objective-C block with a given procedure signature T
+*/
+@builtin
+Objc_Block :: struct($T: typeid) where intrinsics.type_is_proc(T) { using _: intrinsics.objc_object }

 /*
 Recovers the containing/parent struct from a pointer to one of its fields.
@@ -49,7 +54,12 @@ container_of :: #force_inline proc "contextless" (ptr: $P/^$Field_Type, $T: type


 when !NO_DEFAULT_TEMP_ALLOCATOR {
-	@thread_local global_default_temp_allocator_data: Default_Temp_Allocator
+	when ODIN_ARCH == .i386 && ODIN_OS == .Windows {
+		// Thread-local storage is problematic on Windows i386
+		global_default_temp_allocator_data: Default_Temp_Allocator
+	} else {
+		@thread_local global_default_temp_allocator_data: Default_Temp_Allocator
+	}
 }

@(builtin, disabled=NO_DEFAULT_TEMP_ALLOCATOR)
@@ -60,37 +70,50 @@ init_global_temporary_allocator :: proc(size: int, backup_allocator := context.a
 }


+@(require_results)
+copy_slice_raw :: proc "contextless" (dst, src: rawptr, dst_len, src_len, elem_size: int) -> int {
+	n := min(dst_len, src_len)
+	if n > 0 {
+		intrinsics.mem_copy(dst, src, n*elem_size)
+	}
+	return n
+}
+
 // `copy_slice` is a built-in procedure that copies elements from a source slice `src` to a destination slice `dst`.
 // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
 // of len(src) and len(dst).
 //
 // Prefer the procedure group `copy`.
@builtin
-copy_slice :: proc "contextless" (dst, src: $T/[]$E) -> int {
-	n := max(0, min(len(dst), len(src)))
-	if n > 0 {
-		intrinsics.mem_copy(raw_data(dst), raw_data(src), n*size_of(E))
-	}
-	return n
+copy_slice :: #force_inline proc "contextless" (dst, src: $T/[]$E) -> int {
+	return copy_slice_raw(raw_data(dst), raw_data(src), len(dst), len(src), size_of(E))
 }
+
 // `copy_from_string` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`.
 // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
 // of len(src) and len(dst).
 //
 // Prefer the procedure group `copy`.
@builtin
-copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int {
-	n := max(0, min(len(dst), len(src)))
-	if n > 0 {
-		intrinsics.mem_copy(raw_data(dst), raw_data(src), n)
-	}
-	return n
+copy_from_string :: #force_inline proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int {
+	return copy_slice_raw(raw_data(dst), raw_data(src), len(dst), len(src), 1)
 }
+
+// `copy_from_string16` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`.
+// The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
+// of len(src) and len(dst).
+//
+// Prefer the procedure group `copy`.
+@builtin
+copy_from_string16 :: #force_inline proc "contextless" (dst: $T/[]$E/u16, src: $S/string16) -> int {
+	return copy_slice_raw(raw_data(dst), raw_data(src), len(dst), len(src), 2)
+}
+
 // `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`.
 // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum
 // of len(src) and len(dst).
@builtin
-copy :: proc{copy_slice, copy_from_string}
+copy :: proc{copy_slice, copy_from_string, copy_from_string16}



@@ -146,11 +169,17 @@ remove_range :: proc(array: ^$D/[dynamic]$T, #any_int lo, hi: int, loc := #calle
@builtin
 pop :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (res: E) #no_bounds_check {
 	assert(len(array) > 0, loc=loc)
-	res = array[len(array)-1]
-	(^Raw_Dynamic_Array)(array).len -= 1
+	_pop_type_erased(&res, (^Raw_Dynamic_Array)(array), size_of(E))
 	return res
 }

+_pop_type_erased :: proc(res: rawptr, array: ^Raw_Dynamic_Array, elem_size: int, loc := #caller_location) {
+	end := rawptr(uintptr(array.data) + uintptr(elem_size*(array.len-1)))
+	intrinsics.mem_copy_non_overlapping(res, end, elem_size)
+	array.len -= 1
+}
+
+

 // `pop_safe` trys to remove and return the end value of dynamic array `array` and reduces the length of `array` by 1.
 // If the operation is not possible, it will return false.
@@ -285,6 +314,15 @@ delete_map :: proc(m: $T/map[$K]$V, loc := #caller_location) -> Allocator_Error
 }


+@builtin
+delete_string16 :: proc(str: string16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	return mem_free_with_size(raw_data(str), len(str)*size_of(u16), allocator, loc)
+}
+@builtin
+delete_cstring16 :: proc(str: cstring16, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+	return mem_free((^u16)(str), allocator, loc)
+}
+
 // `delete` will try to free the underlying data of the passed built-in data structure (string, cstring, dynamic array, slice, or map), with the given `allocator` if the allocator supports this operation.
 //
 // Note: Prefer `delete` over the specific `delete_*` procedures where possible.
@@ -297,26 +335,27 @@ delete :: proc{
 	delete_map,
 	delete_soa_slice,
 	delete_soa_dynamic_array,
+	delete_string16,
+	delete_cstring16,
 }


 // The new built-in procedure allocates memory. The first argument is a type, not a value, and the value
 // return is a pointer to a newly allocated value of that type using the specified allocator, default is context.allocator
@(builtin, require_results)
-new :: proc($T: typeid, allocator := context.allocator, loc := #caller_location) -> (^T, Allocator_Error) #optional_allocator_error {
-	return new_aligned(T, align_of(T), allocator, loc)
+new :: proc($T: typeid, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) #optional_allocator_error {
+	t = (^T)(raw_data(mem_alloc_bytes(size_of(T), align_of(T), allocator, loc) or_return))
+	return
 }
@(require_results)
 new_aligned :: proc($T: typeid, alignment: int, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) {
-	data := mem_alloc_bytes(size_of(T), alignment, allocator, loc) or_return
-	t = (^T)(raw_data(data))
+	t = (^T)(raw_data(mem_alloc_bytes(size_of(T), alignment, allocator, loc) or_return))
 	return
 }

@(builtin, require_results)
 new_clone :: proc(data: $T, allocator := context.allocator, loc := #caller_location) -> (t: ^T, err: Allocator_Error) #optional_allocator_error {
-	t_data := mem_alloc_bytes(size_of(T), align_of(T), allocator, loc) or_return
-	t = (^T)(raw_data(t_data))
+	t = (^T)(raw_data(mem_alloc_bytes(size_of(T), align_of(T), allocator, loc) or_return))
 	if t != nil {
 		t^ = data
 	}
@@ -326,14 +365,21 @@ new_clone :: proc(data: $T, allocator := context.allocator, loc := #caller_locat
 DEFAULT_DYNAMIC_ARRAY_CAPACITY :: 8

@(require_results)
-make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
+make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocator := context.allocator, loc := #caller_location) -> (res: T, err: Allocator_Error) #optional_allocator_error {
+	err = _make_aligned_type_erased(&res, size_of(E), len, alignment, allocator, loc)
+	return
+}
+
+@(require_results)
+_make_aligned_type_erased :: proc(slice: rawptr, elem_size: int, len: int, alignment: int, allocator: Allocator, loc := #caller_location) -> Allocator_Error {
 	make_slice_error_loc(loc, len)
-	data, err := mem_alloc_bytes(size_of(E)*len, alignment, allocator, loc)
-	if data == nil && size_of(E) != 0 {
-		return nil, err
+	data, err := mem_alloc_bytes(elem_size*len, alignment, allocator, loc)
+	if data == nil && elem_size != 0 {
+		return err
 	}
-	s := Raw_Slice{raw_data(data), len}
-	return transmute(T)s, err
+	(^Raw_Slice)(slice).data = raw_data(data)
+	(^Raw_Slice)(slice).len  = len
+	return err
 }

 // `make_slice` allocates and initializes a slice. Like `new`, the first argument is a type, not a value.
@@ -341,24 +387,27 @@ make_aligned :: proc($T: typeid/[]$E, #any_int len: int, alignment: int, allocat
 //
 // Note: Prefer using the procedure group `make`.
@(builtin, require_results)
-make_slice :: proc($T: typeid/[]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
-	return make_aligned(T, len, align_of(E), allocator, loc)
+make_slice :: proc($T: typeid/[]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (res: T, err: Allocator_Error) #optional_allocator_error {
+	err = _make_aligned_type_erased(&res, size_of(E), len, align_of(E), allocator, loc)
+	return
 }
 // `make_dynamic_array` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
 // Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
 //
 // Note: Prefer using the procedure group `make`.
@(builtin, require_results)
-make_dynamic_array :: proc($T: typeid/[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
-	return make_dynamic_array_len_cap(T, 0, 0, allocator, loc)
+make_dynamic_array :: proc($T: typeid/[dynamic]$E, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	err = _make_dynamic_array_len_cap((^Raw_Dynamic_Array)(&array), size_of(E), align_of(E), 0, 0, allocator, loc)
+	return
 }
 // `make_dynamic_array_len` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
 // Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
 //
 // Note: Prefer using the procedure group `make`.
@(builtin, require_results)
-make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (T, Allocator_Error) #optional_allocator_error {
-	return make_dynamic_array_len_cap(T, len, len, allocator, loc)
+make_dynamic_array_len :: proc($T: typeid/[dynamic]$E, #any_int len: int, allocator := context.allocator, loc := #caller_location) -> (array: T, err: Allocator_Error) #optional_allocator_error {
+	err = _make_dynamic_array_len_cap((^Raw_Dynamic_Array)(&array), size_of(E), align_of(E), len, len, allocator, loc)
+	return
 }
 // `make_dynamic_array_len_cap` allocates and initializes a dynamic array. Like `new`, the first argument is a type, not a value.
 // Unlike `new`, `make`'s return value is the same as the type of its argument, not a pointer to it.
@@ -463,7 +512,7 @@ clear_map :: proc "contextless" (m: ^$T/map[$K]$V) {
 // Note: Prefer the procedure group `reserve`
@builtin
 reserve_map :: proc(m: ^$T/map[$K]$V, #any_int capacity: int, loc := #caller_location) -> Allocator_Error {
-	return __dynamic_map_reserve((^Raw_Map)(m), map_info(T), uint(capacity), loc) if m != nil else nil
+	return __dynamic_map_reserve((^Raw_Map)(m), map_info(T), uint(capacity), loc)
 }

 // Shrinks the capacity of a map down to the current length.
@@ -492,7 +541,7 @@ delete_key :: proc(m: ^$T/map[$K]$V, key: K) -> (deleted_key: K, deleted_value:
 	return
 }

-_append_elem :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, arg_ptr: rawptr, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+_append_elem :: #force_no_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, arg_ptr: rawptr, should_zero: bool, loc := #caller_location) -> (n: int, err: Allocator_Error) #optional_allocator_error {
 	if array == nil {
 		return
 	}
@@ -537,7 +586,7 @@ non_zero_append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc :
 	}
 }

-_append_elems :: #force_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, should_zero: bool, loc := #caller_location, args: rawptr, arg_len: int) -> (n: int, err: Allocator_Error) #optional_allocator_error {
+_append_elems :: #force_no_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, should_zero: bool, loc := #caller_location, args: rawptr, arg_len: int) -> (n: int, err: Allocator_Error) #optional_allocator_error {
 	if array == nil {
 		return 0, nil
 	}
@@ -648,6 +697,9 @@ append_nothing :: proc(array: ^$T/[dynamic]$E, loc := #caller_location) -> (n: i

@builtin
 inject_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast arg: E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(index >= 0, "Index must be positive.", loc)
+	}
 	if array == nil {
 		return
 	}
@@ -666,6 +718,9 @@ inject_at_elem :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcas

@builtin
 inject_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadcast args: ..E, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(index >= 0, "Index must be positive.", loc)
+	}
 	if array == nil {
 		return
 	}
@@ -689,6 +744,9 @@ inject_at_elems :: proc(array: ^$T/[dynamic]$E, #any_int index: int, #no_broadca

@builtin
 inject_at_elem_string :: proc(array: ^$T/[dynamic]$E/u8, #any_int index: int, arg: string, loc := #caller_location) -> (ok: bool, err: Allocator_Error) #no_bounds_check #optional_allocator_error {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(index >= 0, "Index must be positive.", loc)
+	}
 	if array == nil {
 		return
 	}
@@ -777,7 +835,7 @@ clear_dynamic_array :: proc "contextless" (array: ^$T/[dynamic]$E) {
 // `reserve_dynamic_array` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
 //
 // Note: Prefer the procedure group `reserve`.
-_reserve_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
+_reserve_dynamic_array :: #force_no_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
 	if a == nil {
 		return nil
 	}
@@ -821,7 +879,7 @@ non_zero_reserve_dynamic_array :: proc(array: ^$T/[dynamic]$E, #any_int capacity
 }


-_resize_dynamic_array :: #force_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
+_resize_dynamic_array :: #force_no_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, length: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
 	if a == nil {
 		return nil
 	}
--- a/base/runtime/core_builtin_soa.odin
+++ b/base/runtime/core_builtin_soa.odin
@@ -178,8 +178,31 @@ resize_soa :: proc(array: ^$T/#soa[dynamic]$E, #any_int length: int, loc := #cal
 	if array == nil {
 		return nil
 	}
-	reserve_soa(array, length, loc) or_return
+
 	footer := raw_soa_footer(array)
+
+	if length > footer.cap {
+		reserve_soa(array, length, loc) or_return
+	} else if size_of(E) > 0 && length > footer.len {
+		ti := type_info_base(type_info_of(typeid_of(T)))
+		si := &ti.variant.(Type_Info_Struct)
+
+		field_count := len(E) when intrinsics.type_is_array(E) else intrinsics.type_struct_field_count(E)
+
+		data := (^rawptr)(array)^
+
+		soa_offset := 0
+		for i in 0..<field_count {
+			type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
+
+			soa_offset = align_forward_int(soa_offset, align_of(E))
+
+			mem_zero(rawptr(uintptr(data) + uintptr(soa_offset) + uintptr(type.size * footer.len)), type.size * (length - footer.len))
+
+			soa_offset += type.size * footer.cap
+		}
+	}
+
 	footer.len = length
 	return nil
 }
@@ -249,17 +272,77 @@ _reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, zero_memory: boo

 	old_data := (^rawptr)(array)^

+	resize: if old_data != nil {
+
+		new_bytes, resize_err := array.allocator.procedure(
+			array.allocator.data, .Resize_Non_Zeroed, new_size, max_align,
+			old_data, old_size, loc,
+		)
+		new_data := raw_data(new_bytes)
+
+		#partial switch resize_err {
+		case .Mode_Not_Implemented: break resize
+		case .None: // continue resizing
+		case: return resize_err
+		}
+
+		footer.cap = capacity
+
+		old_offset := 0
+		new_offset := 0
+
+		// Correct data memory
+		// from: |x x y y z z _ _ _|
+		// to:   |x x _ y y _ z z _|
+
+		// move old data to the end of the new allocation to avoid overlap
+		old_data = rawptr(uintptr(new_data) + uintptr(new_size - old_size))
+		mem_copy(old_data, new_data, old_size)
+
+		// now:  |_ _ _ x x y y z z|
+
+		for i in 0..<field_count {
+			type := si.types[i].variant.(Type_Info_Multi_Pointer).elem
+
+			old_offset = align_forward_int(old_offset, max_align)
+			new_offset = align_forward_int(new_offset, max_align)
+
+			new_data_elem := rawptr(uintptr(new_data) + uintptr(new_offset))
+			old_data_elem := rawptr(uintptr(old_data) + uintptr(old_offset))
+
+			old_size_elem := type.size * old_cap
+			new_size_elem := type.size * capacity
+
+			mem_copy(new_data_elem, old_data_elem, old_size_elem)
+
+			(^rawptr)(uintptr(array) + i*size_of(rawptr))^ = new_data_elem
+
+			if zero_memory {
+				mem_zero(rawptr(uintptr(new_data_elem) + uintptr(old_size_elem)), new_size_elem - old_size_elem)
+			}
+
+			old_offset += old_size_elem
+			new_offset += new_size_elem
+		}
+
+		return nil
+	}
+
 	new_bytes := array.allocator.procedure(
 		array.allocator.data, .Alloc if zero_memory else .Alloc_Non_Zeroed, new_size, max_align,
 		nil, old_size, loc,
 	) or_return
 	new_data := raw_data(new_bytes)

-
 	footer.cap = capacity

 	old_offset := 0
 	new_offset := 0
+
+	// Correct data memory
+	// from: |x x y y z z| ... |_ _ _ _ _ _ _ _ _|
+	// to:                     |x x _ y y _ z z _|
+
 	for i in 0..<field_count {
 		type := si.types[i].variant.(Type_Info_Multi_Pointer).elem

@@ -277,10 +360,12 @@ _reserve_soa :: proc(array: ^$T/#soa[dynamic]$E, capacity: int, zero_memory: boo
 		new_offset += type.size * capacity
 	}

-	array.allocator.procedure(
-		array.allocator.data, .Free, 0, max_align,
-		old_data, old_size, loc,
-	) or_return
+	if old_data != nil {
+		array.allocator.procedure(
+			array.allocator.data, .Free, 0, max_align,
+			old_data, old_size, loc,
+		) or_return
+	}

 	return nil
 }
--- a/base/runtime/default_allocators_nil.odin
+++ b/base/runtime/default_allocators_nil.odin
@@ -23,7 +23,8 @@ nil_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 	return nil, .None
 }

-nil_allocator :: proc() -> Allocator {
+@(require_results)
+nil_allocator :: proc "contextless" () -> Allocator {
 	return Allocator{
 		procedure = nil_allocator_proc,
 		data = nil,
@@ -72,6 +73,7 @@ panic_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 	return nil, nil
 }

+@(require_results)
 panic_allocator :: proc() -> Allocator {
 	return Allocator{
 		procedure = panic_allocator_proc,
--- a/base/runtime/default_temp_allocator_arena.odin
+++ b/base/runtime/default_temp_allocator_arena.odin
@@ -1,6 +1,7 @@
 package runtime

 import "base:intrinsics"
+// import "base:sanitizer"

 DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: uint(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE)

@@ -43,15 +44,21 @@ memory_block_alloc :: proc(allocator: Allocator, capacity: uint, alignment: uint
 	block.base = ([^]byte)(uintptr(block) + base_offset)
 	block.capacity = uint(end - uintptr(block.base))

+	// sanitizer.address_poison(block.base, block.capacity)
+
 	// Should be zeroed
 	assert(block.used == 0)
 	assert(block.prev == nil)
 	return
 }

-memory_block_dealloc :: proc(block_to_free: ^Memory_Block, loc := #caller_location) {
+memory_block_dealloc :: proc "contextless" (block_to_free: ^Memory_Block, loc := #caller_location) {
 	if block_to_free != nil {
+
 		allocator := block_to_free.allocator
+		// sanitizer.address_unpoison(block_to_free.base, block_to_free.capacity)
+		context = default_context()
+		context.allocator = allocator
 		mem_free(block_to_free, allocator, loc)
 	}
 }
@@ -83,6 +90,7 @@ alloc_from_memory_block :: proc(block: ^Memory_Block, min_size, alignment: uint)
 		return
 	}
 	data = block.base[block.used+alignment_offset:][:min_size]
+	// sanitizer.address_unpoison(block.base[block.used:block.used+size])
 	block.used += size
 	return
 }
@@ -104,13 +112,15 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
 	if size == 0 {
 		return
 	}
-	
-	needed := align_forward_uint(size, alignment)
-	if arena.curr_block == nil || (safe_add(arena.curr_block.used, needed) or_else 0) > arena.curr_block.capacity {
+
+	prev_used := 0 if arena.curr_block == nil else arena.curr_block.used
+	data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
+	if err == .Out_Of_Memory {
 		if arena.minimum_block_size == 0 {
 			arena.minimum_block_size = DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE
 		}

+		needed := align_forward_uint(size, alignment)
 		block_size := max(needed, arena.minimum_block_size)

 		if arena.backing_allocator.procedure == nil {
@@ -121,10 +131,9 @@ arena_alloc :: proc(arena: ^Arena, size, alignment: uint, loc := #caller_locatio
 		new_block.prev = arena.curr_block
 		arena.curr_block = new_block
 		arena.total_capacity += new_block.capacity
+		prev_used = 0
+		data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
 	}
-
-	prev_used := arena.curr_block.used
-	data, err = alloc_from_memory_block(arena.curr_block, size, alignment)
 	arena.total_used += arena.curr_block.used - prev_used
 	return
 }
@@ -161,11 +170,12 @@ arena_free_all :: proc(arena: ^Arena, loc := #caller_location) {
 	if arena.curr_block != nil {
 		intrinsics.mem_zero(arena.curr_block.base, arena.curr_block.used)
 		arena.curr_block.used = 0
+		// sanitizer.address_poison(arena.curr_block.base, arena.curr_block.capacity)
 	}
 	arena.total_used = 0
 }

-arena_destroy :: proc(arena: ^Arena, loc := #caller_location) {
+arena_destroy :: proc "contextless" (arena: ^Arena, loc := #caller_location) {
 	for arena.curr_block != nil {
 		free_block := arena.curr_block
 		arena.curr_block = free_block.prev
@@ -177,6 +187,7 @@ arena_destroy :: proc(arena: ^Arena, loc := #caller_location) {
 	arena.total_capacity = 0
 }

+@(require_results)
 arena_allocator :: proc(arena: ^Arena) -> Allocator {
 	return Allocator{arena_allocator_proc, arena}
 }
@@ -225,6 +236,7 @@ arena_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
 					// grow data in-place, adjusting next allocation
 					block.used = uint(new_end)
 					data = block.base[start:new_end]
+					// sanitizer.address_unpoison(data)
 					return
 				}
 			}
@@ -298,6 +310,7 @@ arena_temp_end :: proc(temp: Arena_Temp, loc := #caller_location) {
 			assert(block.used >= temp.used, "out of order use of arena_temp_end", loc)
 			amount_to_zero := block.used-temp.used
 			intrinsics.mem_zero(block.base[temp.used:], amount_to_zero)
+			// sanitizer.address_poison(block.base[temp.used:block.capacity])
 			block.used = temp.used
 			arena.total_used -= amount_to_zero
 		}
--- a/base/runtime/default_temporary_allocator.odin
+++ b/base/runtime/default_temporary_allocator.odin
@@ -8,7 +8,7 @@ when NO_DEFAULT_TEMP_ALLOCATOR {
 	
 	default_temp_allocator_init :: proc(s: ^Default_Temp_Allocator, size: int, backing_allocator := context.allocator) {}
 	
-	default_temp_allocator_destroy :: proc(s: ^Default_Temp_Allocator) {}
+	default_temp_allocator_destroy :: proc "contextless" (s: ^Default_Temp_Allocator) {}
 	
 	default_temp_allocator_proc :: nil_allocator_proc

@@ -28,7 +28,7 @@ when NO_DEFAULT_TEMP_ALLOCATOR {
 		_ = arena_init(&s.arena, uint(size), backing_allocator)
 	}

-	default_temp_allocator_destroy :: proc(s: ^Default_Temp_Allocator) {
+	default_temp_allocator_destroy :: proc "contextless" (s: ^Default_Temp_Allocator) {
 		if s != nil {
 			arena_destroy(&s.arena)
 			s^ = {}
@@ -56,7 +56,7 @@ when NO_DEFAULT_TEMP_ALLOCATOR {
 	}

 	@(fini, private)
-	_destroy_temp_allocator_fini :: proc() {
+	_destroy_temp_allocator_fini :: proc "contextless" () {
 		default_temp_allocator_destroy(&global_default_temp_allocator_data)
 	}
 }
@@ -70,7 +70,7 @@ DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD :: #force_inline proc(ignore := false, loc :=
 	}
 }

-
+@(require_results)
 default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator {
 	return Allocator{
 		procedure = default_temp_allocator_proc,
--- a/base/runtime/doc.odin
+++ b/base/runtime/doc.odin
@@ -0,0 +1,244 @@
+
+/*
+Declarations which are required by the compiler
+
+## Descriptions of files
+
+There are a lot of files in this package and below is described roughly what
+kind of functionality is placed in different files:
+
+| File pattern         | Description
+|----------------------|------------------------------------------------------|
+| `core.odin`          | Contains the declarations that compiler will require to be present. Contains context-related declarations, `Type_Info` declarations and some other types used to implement the runtime and other packages. |
+| `core_builtin*.odin` | Contain `@(builtin)` declarations that can be used without importing the package. Most of them aren't required by the compiler |
+| `default_*.odin`     | Contain default implementations for context allocators |
+| `entry_*.odin`       | Contain OS-specific entry points |
+| `os_specific_*.odin` | Contain OS-specific utility procedures |
+| `*internal*.odin`    | Contain implementations for internal procedures that can be called by the compiler |
+
+## Implementing custom runtime
+
+For embedded and kernel development it might be required to re-implement parts
+of the `base:runtime` package. This can include changing the default printing
+procedures that handle console output when the program panics, custom
+entry-points, tailored for a specific platform or execution environment, or
+simply switching up implementations of some procedures.
+
+In case this is required, the following is suggested:
+
+1. Define `$ODIN_ROOT` environment variable to point to a directory within your
+   project that contains the following directories: `base/`, `core/` and `vendor/`.
+2. Inside the `$ODIN_ROOT/base` subdirectory, implement the *necessary
+   declarations*.
+
+What constitutes the necessary definitions is described below.
+
+### Context-related
+
+The compiler will require these declarations as they concern the `context`
+variable.
+
+* `Maybe`
+* `Source_Code_Location`
+* `Context`
+* `Allocator`
+* `Random_Generator`
+* `Logger`
+* `__init_context`
+
+### Runtime initialization/cleanup
+
+These are not strictly required for compilation, but if global variables or
+`@(init)`/`@(fini)` blocks are used, these procedures need to be called inside
+the entry point.
+
+* `_startup_runtime`
+* `_cleanup_runtime`
+
+### Type assertion check
+
+These procedures are called every time `.(Type)` expressions are used in order
+to check the union tag or the underlying type of `any` before returning the
+value of the underlying type. These are not required if `-no-type-assert` is
+specified.
+
+* `type_assertion_check`
+* `type_assertion_check2` (takes in typeid)
+
+### Bounds checking procedures
+
+These procedures are called every time index or slicing expression are used in
+order to perform bounds-checking before the actual operation. These are not
+required if the `-no-bounds-check` option is specified.
+
+* `bounds_check_error`
+* `matrix_bounds_check_error`
+* `slice_expr_error_hi`
+* `slice_expr_error_lo_hi`
+* `multi_pointer_slice_expr_error`
+
+### cstring calls
+
+If `cstring` or `cstring16` types are used, these procedures are required.
+
+* `cstring_to_string`
+* `cstring_len`
+* `cstring16_to_string16`
+* `cstring16_len`
+
+### Comparison
+
+These procedures are required for comparison operators between strings and other
+compound types to function properly. If strings, structs nor unions are compared,
+only `string_eq` procedure is required.
+
+* `memory_equal`
+* `memory_compare`
+* `memory_compare_zero`
+* `cstring_eq`
+* `cstring16_eq`
+* `cstring_ne`
+* `cstring16_ne`
+* `cstring_lt`
+* `cstring16_lt`
+* `cstring_gt`
+* `cstring16_gt`
+* `cstring_le`
+* `cstring16_le`
+* `cstring_ge`
+* `cstring16_ge`
+* `string_eq`
+* `string16_eq`
+* `string_ne`
+* `string16_ne`
+* `string_lt`
+* `string16_lt`
+* `string_gt`
+* `string16_gt`
+* `string_le`
+* `string16_le`
+* `string_ge`
+* `string16_ge`
+* `complex32_eq`
+* `complex32_ne`
+* `complex64_eq`
+* `complex64_ne`
+* `complex128_eq`
+* `complex128_ne`
+* `quaternion64_eq`
+* `quaternion64_ne`
+* `quaternion128_eq`
+* `quaternion128_ne`
+* `quaternion256_eq`
+* `quaternion256_ne`
+
+### for-in `string` type
+
+These procedures are required to iterate strings using `for ... in` loop. If this
+kind of loop isn't used, these procedures aren't required.
+
+* `string_decode_rune`
+* `string_decode_last_rune` (for `#reverse for`)
+
+### Required when RTTI is enabled (the vast majority of targets)
+
+These declarations are required unless the `-no-rtti` compiler option is
+specified. Note that in order to be useful, some other procedures need to be
+implemented. Those procedures aren't mentioned here as the compiler won't
+complain if they're missing.
+
+* `Type_Info`
+* `type_table`
+* `__type_info_of`
+
+### Hashing
+
+Required if maps are used
+
+* `default_hasher`
+* `default_hasher_cstring`
+* `default_hasher_string`
+
+### Pseudo-CRT required procedured due to LLVM but useful in general
+
+* `memset`
+* `memcpy`
+* `memove`
+
+### Procedures required by the LLVM backend if u128/i128 is used
+
+* `umodti3`
+* `udivti3`
+* `modti3`
+* `divti3`
+* `fixdfti`
+* `fixunsdfti`
+* `fixunsdfdi`
+* `floattidf`
+* `floattidf_unsigned`
+* `truncsfhf2`
+* `truncdfhf2`
+* `gnu_h2f_ieee`
+* `gnu_f2h_ieee`
+* `extendhfsf2`
+
+### Procedures required by the LLVM backend if f16 is used (WASM only)
+
+* `__ashlti3`
+* `__multi3`
+
+### When -no-crt is defined (windows only)
+
+* `_tls_index`
+* `_fltused`
+
+### Arithmetic
+
+* `quo_complex32`
+* `quo_complex64`
+* `quo_complex128`
+
+* `mul_quaternion64`
+* `mul_quaternion128`
+* `mul_quaternion256`
+
+* `quo_quaternion64`
+* `quo_quaternion128`
+* `quo_quaternion256`
+
+* `abs_complex32`
+* `abs_complex64`
+* `abs_complex128`
+
+* `abs_quaternion64`
+* `abs_quaternion128`
+* `abs_quaternion256`
+
+## Map specific calls
+
+* `map_seed_from_map_data`
+* `__dynamic_map_check_grow` (for static map calls)
+* `map_insert_hash_dynamic`  (for static map calls)
+* `__dynamic_map_get` (for dynamic map calls)
+* `__dynamic_map_set` (for dynamic map calls)
+
+## Dynamic literals (`[dynamic]T` and `map[K]V`) (can be disabled with `-no-dynamic-literals`)
+
+* `__dynamic_array_reserve`
+* `__dynamic_array_append`
+* `__dynamic_map_reserve`
+
+### Objective-C specific
+
+* `objc_lookUpClass`
+* `sel_registerName`
+* `objc_allocateClassPair`
+
+### Other required declarations
+
+This is required without conditions.
+
+* `Load_Directory_File`
+
+*/
+package runtime
--- a/base/runtime/docs.odin
+++ b/base/runtime/docs.odin
@@ -1,180 +0,0 @@
-package runtime
-
-/*
-
-package runtime has numerous entities (declarations) which are required by the compiler to function.
-
-
-## Basic types and calls (and anything they rely on)
-
-Source_Code_Location
-Context
-Allocator
-Logger
-
-__init_context
-_cleanup_runtime
-
-
-## cstring calls
-
-cstring_to_string
-cstring_len
-
-
-
-## Required when RTTI is enabled (the vast majority of targets)
-
-Type_Info
-
-type_table
-__type_info_of
-
-
-## Hashing
-
-default_hasher
-default_hasher_cstring
-default_hasher_string
-
-
-## Pseudo-CRT required procedured due to LLVM but useful in general
-memset
-memcpy
-memove
-
-
-## Procedures required by the LLVM backend if u128/i128 is used
-umodti3
-udivti3
-modti3
-divti3
-fixdfti
-fixunsdfti
-fixunsdfdi
-floattidf
-floattidf_unsigned
-truncsfhf2
-truncdfhf2
-gnu_h2f_ieee
-gnu_f2h_ieee
-extendhfsf2
-
-## Procedures required by the LLVM backend if f16 is used
-__ashlti3 // wasm specific
-__multi3  // wasm specific
-
-
-## Required an entry point is defined (i.e. 'main')
-
-args__
-
-
-## When -no-crt is defined (and not a wasm target) (mostly due to LLVM)
-_tls_index
-_fltused
-
-
-## Bounds checking procedures (when not disabled with -no-bounds-check)
-
-bounds_check_error
-matrix_bounds_check_error
-slice_expr_error_hi
-slice_expr_error_lo_hi
-multi_pointer_slice_expr_error
-
-
-## Type assertion check
-
-type_assertion_check
-type_assertion_check2 // takes in typeid
-
-
-## Arithmetic
-
-quo_complex32
-quo_complex64
-quo_complex128
-
-mul_quaternion64
-mul_quaternion128
-mul_quaternion256
-
-quo_quaternion64
-quo_quaternion128
-quo_quaternion256
-
-abs_complex32
-abs_complex64
-abs_complex128
-
-abs_quaternion64
-abs_quaternion128
-abs_quaternion256
-
-
-## Comparison
-
-memory_equal
-memory_compare
-memory_compare_zero
-
-cstring_eq
-cstring_ne
-cstring_lt
-cstring_gt
-cstring_le
-cstring_gt
-
-string_eq
-string_ne
-string_lt
-string_gt
-string_le
-string_gt
-
-complex32_eq
-complex32_ne
-complex64_eq
-complex64_ne
-complex128_eq
-complex128_ne
-
-quaternion64_eq
-quaternion64_ne
-quaternion128_eq
-quaternion128_ne
-quaternion256_eq
-quaternion256_ne
-
-
-## Map specific calls
-
-map_seed_from_map_data
-__dynamic_map_check_grow // static map calls
-map_insert_hash_dynamic  // static map calls
-__dynamic_map_get // dynamic map calls
-__dynamic_map_set // dynamic map calls
-
-
-## Dynamic literals ([dynamic]T and map[K]V) (can be disabled with -no-dynamic-literals)
-
-__dynamic_array_reserve
-__dynamic_array_append
-
-__dynamic_map_reserve
-
-
-## Objective-C specific
-
-objc_lookUpClass
-sel_registerName
-objc_allocateClassPair
-
-
-## for-in `string` type
-
-string_decode_rune
-string_decode_last_rune // #reverse for
-
-*/
--- a/base/runtime/dynamic_map_internal.odin
+++ b/base/runtime/dynamic_map_internal.odin
@@ -985,6 +985,9 @@ __dynamic_map_entry :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_
 // IMPORTANT: USED WITHIN THE COMPILER
@(private)
 __dynamic_map_reserve :: proc "odin" (#no_alias m: ^Raw_Map, #no_alias info: ^Map_Info, new_capacity: uint, loc := #caller_location) -> Allocator_Error {
+	if m == nil {
+		return nil
+	}
 	return map_reserve_dynamic(m, info, uintptr(new_capacity), loc)
 }

@@ -1029,3 +1032,32 @@ default_hasher_cstring :: proc "contextless" (data: rawptr, seed: uintptr) -> ui
 	h &= HASH_MASK
 	return uintptr(h) | uintptr(uintptr(h) == 0)
 }
+
+default_hasher_f64 :: proc "contextless" (f: f64, seed: uintptr) -> uintptr {
+	f := f
+	buf: [size_of(f)]u8
+	if f == 0 {
+		return default_hasher(&buf, seed, size_of(buf))
+	}
+	if f != f {
+		// TODO(bill): What should the logic be for NaNs?
+		return default_hasher(&f, seed, size_of(f))
+	}
+	return default_hasher(&f, seed, size_of(f))
+}
+
+default_hasher_complex128 :: proc "contextless" (x, y: f64, seed: uintptr) -> uintptr {
+	seed := seed
+	seed = default_hasher_f64(x, seed)
+	seed = default_hasher_f64(y, seed)
+	return seed
+}
+
+default_hasher_quaternion256 :: proc "contextless" (x, y, z, w: f64, seed: uintptr) -> uintptr {
+	seed := seed
+	seed = default_hasher_f64(x, seed)
+	seed = default_hasher_f64(y, seed)
+	seed = default_hasher_f64(z, seed)
+	seed = default_hasher_f64(w, seed)
+	return seed
+}
--- a/base/runtime/entry_unix_no_crt_amd64.asm
+++ b/base/runtime/entry_unix_no_crt_amd64.asm
@@ -3,6 +3,7 @@ bits 64
 extern _start_odin
 global _start

+section .note.GNU-stack
 section .text

 ;; Entry point for programs that specify -no-crt option
@@ -35,7 +36,7 @@ _start:
    xor rbp, rbp
    ;; Load argc into 1st param reg, argv into 2nd param reg
    pop rdi
-    mov rdx, rsi
+    mov rsi, rsp
    ;; Align stack pointer down to 16-bytes (sysv calling convention)
    and rsp, -16
    ;; Call into odin entry point
--- a/base/runtime/entry_windows.odin
+++ b/base/runtime/entry_windows.odin
@@ -28,7 +28,19 @@ when ODIN_BUILD_MODE == .Dynamic {
 		return true
 	}
 } else when !ODIN_TEST && !ODIN_NO_ENTRY_POINT {
-	when ODIN_ARCH == .i386 || ODIN_NO_CRT {
+	when ODIN_ARCH == .i386 && !ODIN_NO_CRT {
+		// Windows i386 with CRT: libcmt provides mainCRTStartup which calls _main
+		// Note: "c" calling convention adds underscore prefix automatically on i386
+		@(link_name="main", linkage="strong", require)
+		main :: proc "c" (argc: i32, argv: [^]cstring) -> i32 {
+			args__ = argv[:argc]
+			context = default_context()
+			#force_no_inline _startup_runtime()
+			intrinsics.__entry_point()
+			#force_no_inline _cleanup_runtime()
+			return 0
+		}
+	} else when ODIN_NO_CRT {
 		@(link_name="mainCRTStartup", linkage="strong", require)
 		mainCRTStartup :: proc "system" () -> i32 {
 			context = default_context()
--- a/base/runtime/heap_allocator.odin
+++ b/base/runtime/heap_allocator.odin
@@ -2,6 +2,7 @@ package runtime

 import "base:intrinsics"

+@(require_results)
 heap_allocator :: proc() -> Allocator {
 	return Allocator{
 		procedure = heap_allocator_proc,
@@ -70,10 +71,12 @@ heap_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,

 		new_memory = aligned_alloc(new_size, new_alignment, p, old_size, zero_memory) or_return

-		// NOTE: heap_resize does not zero the new memory, so we do it
-		if zero_memory && new_size > old_size {
-			new_region := raw_data(new_memory[old_size:])
-			intrinsics.mem_zero(new_region, new_size - old_size)
+		when ODIN_OS != .Windows {
+			// NOTE: heap_resize does not zero the new memory, so we do it
+			if zero_memory && new_size > old_size {
+				new_region := raw_data(new_memory[old_size:])
+				conditional_mem_zero(new_region, new_size - old_size)
+			}
 		}
 		return
 	}
--- a/base/runtime/heap_allocator_unix.odin
+++ b/base/runtime/heap_allocator_unix.odin
@@ -3,7 +3,7 @@
 package runtime

 when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/base/runtime/heap_allocator_windows.odin
+++ b/base/runtime/heap_allocator_windows.odin
@@ -1,5 +1,7 @@
 package runtime

+import "../sanitizer"
+
 foreign import kernel32 "system:Kernel32.lib"

@(private="file")
@@ -16,7 +18,10 @@ foreign kernel32 {

 _heap_alloc :: proc "contextless" (size: int, zero_memory := true) -> rawptr {
 	HEAP_ZERO_MEMORY :: 0x00000008
-	return HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
+	ptr := HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY if zero_memory else 0, uint(size))
+	// NOTE(lucas): asan not guarunteed to unpoison win32 heap out of the box, do it ourselves
+	sanitizer.address_unpoison(ptr, size)
+	return ptr
 }
 _heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
 	if new_size == 0 {
@@ -28,7 +33,10 @@ _heap_resize :: proc "contextless" (ptr: rawptr, new_size: int) -> rawptr {
 	}

 	HEAP_ZERO_MEMORY :: 0x00000008
-	return HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
+	new_ptr := HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, ptr, uint(new_size))
+	// NOTE(lucas): asan not guarunteed to unpoison win32 heap out of the box, do it ourselves
+	sanitizer.address_unpoison(new_ptr, new_size)
+	return new_ptr
 }
 _heap_free :: proc "contextless" (ptr: rawptr) {
 	if ptr == nil {
--- a/base/runtime/internal.odin
+++ b/base/runtime/internal.odin
@@ -16,6 +16,12 @@ RUNTIME_REQUIRE :: false // !ODIN_TILDE
@(private)
 __float16 :: f16 when __ODIN_LLVM_F16_SUPPORTED else u16

+HAS_HARDWARE_SIMD :: false when (ODIN_ARCH == .amd64 || ODIN_ARCH == .i386) && !intrinsics.has_target_feature("sse2") else
+	false when (ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32) && !intrinsics.has_target_feature("neon") else
+	false when (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128") else
+	false when (ODIN_ARCH == .riscv64) && !intrinsics.has_target_feature("v") else
+	true
+

@(private)
 byte_slice :: #force_inline proc "contextless" (data: rawptr, len: int) -> []byte #no_bounds_check {
@@ -117,7 +123,7 @@ mem_copy_non_overlapping :: proc "contextless" (dst, src: rawptr, len: int) -> r

 DEFAULT_ALIGNMENT :: 2*align_of(rawptr)

-mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+mem_alloc_bytes :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
 	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil{
 		return nil, nil
@@ -125,7 +131,7 @@ mem_alloc_bytes :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNM
 	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
 }

-mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+mem_alloc :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
 	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
@@ -133,7 +139,7 @@ mem_alloc :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, a
 	return allocator.procedure(allocator.data, .Alloc, size, alignment, nil, 0, loc)
 }

-mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
+mem_alloc_non_zeroed :: #force_no_inline proc(size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, loc := #caller_location) -> ([]byte, Allocator_Error) {
 	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if size == 0 || allocator.procedure == nil {
 		return nil, nil
@@ -141,7 +147,7 @@ mem_alloc_non_zeroed :: #force_inline proc(size: int, alignment: int = DEFAULT_A
 	return allocator.procedure(allocator.data, .Alloc_Non_Zeroed, size, alignment, nil, 0, loc)
 }

-mem_free :: #force_inline proc(ptr: rawptr, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+mem_free :: #force_no_inline proc(ptr: rawptr, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
 	if ptr == nil || allocator.procedure == nil {
 		return nil
 	}
@@ -149,7 +155,7 @@ mem_free :: #force_inline proc(ptr: rawptr, allocator := context.allocator, loc
 	return err
 }

-mem_free_with_size :: #force_inline proc(ptr: rawptr, byte_count: int, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+mem_free_with_size :: #force_no_inline proc(ptr: rawptr, byte_count: int, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
 	if ptr == nil || allocator.procedure == nil {
 		return nil
 	}
@@ -157,7 +163,7 @@ mem_free_with_size :: #force_inline proc(ptr: rawptr, byte_count: int, allocator
 	return err
 }

-mem_free_bytes :: #force_inline proc(bytes: []byte, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
+mem_free_bytes :: #force_no_inline proc(bytes: []byte, allocator := context.allocator, loc := #caller_location) -> Allocator_Error {
 	if bytes == nil || allocator.procedure == nil {
 		return nil
 	}
@@ -166,14 +172,14 @@ mem_free_bytes :: #force_inline proc(bytes: []byte, allocator := context.allocat
 }


-mem_free_all :: #force_inline proc(allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
+mem_free_all :: #force_no_inline proc(allocator := context.allocator, loc := #caller_location) -> (err: Allocator_Error) {
 	if allocator.procedure != nil {
 		_, err = allocator.procedure(allocator.data, .Free_All, 0, 0, nil, 0, loc)
 	}
 	return
 }

-_mem_resize :: #force_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
+_mem_resize :: #force_no_inline proc(ptr: rawptr, old_size, new_size: int, alignment: int = DEFAULT_ALIGNMENT, allocator := context.allocator, should_zero: bool, loc := #caller_location) -> (data: []byte, err: Allocator_Error) {
 	assert(is_power_of_two_int(alignment), "Alignment must be a power of two", loc)
 	if allocator.procedure == nil {
 		return nil, nil
@@ -224,156 +230,296 @@ non_zero_mem_resize :: proc(ptr: rawptr, old_size, new_size: int, alignment: int
 	return _mem_resize(ptr, old_size, new_size, alignment, allocator, false, loc)
 }

+conditional_mem_zero :: proc "contextless" (data: rawptr, n_: int) #no_bounds_check {
+	// When acquiring memory from the OS for the first time it's likely that the
+	// OS already gives the zero page mapped multiple times for the request. The
+	// actual allocation does not have physical pages allocated to it until those
+	// pages are written to which causes a page-fault. This is often called COW
+	// (Copy on Write)
+	//
+	// You do not want to actually zero out memory in this case because it would
+	// cause a bunch of page faults decreasing the speed of allocations and
+	// increase the amount of actual resident physical memory used.
+	//
+	// Instead a better technique is to check if memory is zerored before zeroing
+	// it. This turns out to be an important optimization in practice, saving
+	// nearly half (or more) the amount of physical memory used by an application.
+	// This is why every implementation of calloc in libc does this optimization.
+	//
+	// It may seem counter-intuitive but most allocations in an application are
+	// wasted and never used. When you consider something like a [dynamic]T which
+	// always doubles in capacity on resize but you rarely ever actually use the
+	// full capacity of a dynamic array it means you have a lot of resident waste
+	// if you actually zeroed the remainder of the memory.
+	//
+	// Keep in mind the OS is already guaranteed to give you zeroed memory by
+	// mapping in this zero page multiple times so in the best case there is no
+	// need to actually zero anything. As for testing all this memory for a zero
+	// value, it costs nothing because the the same zero page is used for the
+	// whole allocation and will exist in L1 cache for the entire zero checking
+	// process.
+
+	if n_ <= 0 {
+		return
+	}
+	n := uint(n_)
+
+	n_words := n / size_of(uintptr)
+	p_words := ([^]uintptr)(data)[:n_words]
+	p_bytes := ([^]byte)(data)[size_of(uintptr) * n_words:n]
+	for &p_word in p_words {
+		if p_word != 0 {
+			p_word = 0
+		}
+	}
+	for &p_byte in p_bytes {
+		if p_byte != 0 {
+			p_byte = 0
+		}
+	}
+}
+
 memory_equal :: proc "contextless" (x, y: rawptr, n: int) -> bool {
 	switch {
 	case n == 0: return true
 	case x == y: return true
 	}
-	a, b := ([^]byte)(x), ([^]byte)(y)
-	length := uint(n)
+	a, b := cast([^]byte)x, cast([^]byte)y

-	for i := uint(0); i < length; i += 1 {
+	n := uint(n)
+	i := uint(0)
+	m := uint(0)
+
+	if n >= 8 {
+		when HAS_HARDWARE_SIMD {
+			// Avoid using 256-bit SIMD on platforms where its emulation is
+			// likely to be less than ideal.
+			when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
+				m = n / 32 * 32
+				for /**/; i < m; i += 32 {
+					load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
+					load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
+					ne := intrinsics.simd_lanes_ne(load_a, load_b)
+					if intrinsics.simd_reduce_or(ne) != 0 {
+						return false
+					}
+				}
+			}
+		}
+
+		m = (n-i) / 16 * 16
+		for /**/; i < m; i += 16 {
+			load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
+			load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
+			ne := intrinsics.simd_lanes_ne(load_a, load_b)
+			if intrinsics.simd_reduce_or(ne) != 0 {
+				return false
+			}
+		}
+
+		m = (n-i) / size_of(uintptr) * size_of(uintptr)
+		for /**/; i < m; i += size_of(uintptr) {
+			if intrinsics.unaligned_load(cast(^uintptr)&a[i]) != intrinsics.unaligned_load(cast(^uintptr)&b[i]) {
+				return false
+			}
+		}
+	}
+
+	for /**/; i < n; i += 1 {
 		if a[i] != b[i] {
 			return false
 		}
 	}
 	return true
-	
-/*
-
-	when size_of(uint) == 8 {
-		if word_length := length >> 3; word_length != 0 {
-			for _ in 0..<word_length {
-				if intrinsics.unaligned_load((^u64)(a)) != intrinsics.unaligned_load((^u64)(b)) {
-					return false
-				}
-				a = a[size_of(u64):]
-				b = b[size_of(u64):]
-			}
-		}
-		
-		if length & 4 != 0 {
-			if intrinsics.unaligned_load((^u32)(a)) != intrinsics.unaligned_load((^u32)(b)) {
-				return false
-			}
-			a = a[size_of(u32):]
-			b = b[size_of(u32):]
-		}
-		
-		if length & 2 != 0 {
-			if intrinsics.unaligned_load((^u16)(a)) != intrinsics.unaligned_load((^u16)(b)) {
-				return false
-			}
-			a = a[size_of(u16):]
-			b = b[size_of(u16):]
-		}
-		
-		if length & 1 != 0 && a[0] != b[0] {
-			return false	
-		}
-		return true
-	} else {
-		if word_length := length >> 2; word_length != 0 {
-			for _ in 0..<word_length {
-				if intrinsics.unaligned_load((^u32)(a)) != intrinsics.unaligned_load((^u32)(b)) {
-					return false
-				}
-				a = a[size_of(u32):]
-				b = b[size_of(u32):]
-			}
-		}
-		
-		length &= 3
-		
-		if length != 0 {
-			for i in 0..<length {
-				if a[i] != b[i] {
-					return false
-				}
-			}
-		}
-
-		return true
-	}
-*/
-
 }
-memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
+
+memory_compare :: proc "contextless" (x, y: rawptr, n: int) -> int #no_bounds_check {
 	switch {
-	case a == b:   return 0
-	case a == nil: return -1
-	case b == nil: return +1
+	case x == y:   return 0
+	case x == nil: return -1
+	case y == nil: return +1
 	}
+	a, b := cast([^]byte)x, cast([^]byte)y
+	
+	n := uint(n)
+	i := uint(0)
+	m := uint(0)

-	x := uintptr(a)
-	y := uintptr(b)
-	n := uintptr(n)
-
-	SU :: size_of(uintptr)
-	fast := n/SU + 1
-	offset := (fast-1)*SU
-	curr_block := uintptr(0)
-	if n < SU {
-		fast = 0
-	}
-
-	for /**/; curr_block < fast; curr_block += 1 {
-		va := (^uintptr)(x + curr_block * size_of(uintptr))^
-		vb := (^uintptr)(y + curr_block * size_of(uintptr))^
-		if va ~ vb != 0 {
-			for pos := curr_block*SU; pos < n; pos += 1 {
-				a := (^byte)(x+pos)^
-				b := (^byte)(y+pos)^
-				if a ~ b != 0 {
-					return -1 if (int(a) - int(b)) < 0 else +1
+	when HAS_HARDWARE_SIMD {
+		when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
+			m = n / 32 * 32
+			for /**/; i < m; i += 32 {
+				load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
+				load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
+				comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+				if intrinsics.simd_reduce_or(comparison) != 0 {
+					sentinel: #simd[32]u8 = u8(0xFF)
+					indices := intrinsics.simd_indices(#simd[32]u8)
+					index_select := intrinsics.simd_select(comparison, indices, sentinel)
+					index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+					return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
 				}
 			}
 		}
 	}

-	for /**/; offset < n; offset += 1 {
-		a := (^byte)(x+offset)^
-		b := (^byte)(y+offset)^
-		if a ~ b != 0 {
-			return -1 if (int(a) - int(b)) < 0 else +1
+	m = (n-i) / 16 * 16
+	for /**/; i < m; i += 16 {
+		load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
+		load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+		if intrinsics.simd_reduce_or(comparison) != 0 {
+			sentinel: #simd[16]u8 = u8(0xFF)
+			indices := intrinsics.simd_indices(#simd[16]u8)
+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+			return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
 		}
 	}

+	// 64-bit SIMD is faster than using a `uintptr` to detect a difference then
+	// re-iterating with the byte-by-byte loop, at least on AMD64.
+	m = (n-i) / 8 * 8
+	for /**/; i < m; i += 8 {
+		load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
+		load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+		if intrinsics.simd_reduce_or(comparison) != 0 {
+			sentinel: #simd[8]u8 = u8(0xFF)
+			indices := intrinsics.simd_indices(#simd[8]u8)
+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+			return -1 if a[i+index_reduce] < b[i+index_reduce] else +1
+		}
+	}
+
+	for /**/; i < n; i += 1 {
+		if a[i] ~ b[i] != 0 {
+			return -1 if int(a[i]) - int(b[i]) < 0 else +1
+		}
+	}
 	return 0
 }

 memory_compare_zero :: proc "contextless" (a: rawptr, n: int) -> int #no_bounds_check {
-	x := uintptr(a)
-	n := uintptr(n)
+	n := uint(n)
+	i := uint(0)
+	m := uint(0)

-	SU :: size_of(uintptr)
-	fast := n/SU + 1
-	offset := (fast-1)*SU
-	curr_block := uintptr(0)
-	if n < SU {
-		fast = 0
+	// Because we're comparing against zero, we never return -1, as that would
+	// indicate the compared value is less than zero.
+	//
+	// Note that a zero return value here means equality.
+
+	bytes := ([^]u8)(a)
+
+	if n >= 8 {
+		when HAS_HARDWARE_SIMD {
+			when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
+				scanner32: #simd[32]u8
+				m = n / 32 * 32
+				for /**/; i < m; i += 32 {
+					load := intrinsics.unaligned_load(cast(^#simd[32]u8)&bytes[i])
+					ne := intrinsics.simd_lanes_ne(scanner32, load)
+					if intrinsics.simd_reduce_or(ne) > 0 {
+						return 1
+					}
+				}
+			}
+		}
+
+		scanner16: #simd[16]u8
+		m = (n-i) / 16 * 16
+		for /**/; i < m; i += 16 {
+			load := intrinsics.unaligned_load(cast(^#simd[16]u8)&bytes[i])
+			ne := intrinsics.simd_lanes_ne(scanner16, load)
+			if intrinsics.simd_reduce_or(ne) != 0 {
+				return 1
+			}
+		}
+
+		m = (n-i) / size_of(uintptr) * size_of(uintptr)
+		for /**/; i < m; i += size_of(uintptr) {
+			if intrinsics.unaligned_load(cast(^uintptr)&bytes[i]) != 0 {
+				return 1
+			}
+		}
 	}

-	for /**/; curr_block < fast; curr_block += 1 {
-		va := (^uintptr)(x + curr_block * size_of(uintptr))^
-		if va ~ 0 != 0 {
-			for pos := curr_block*SU; pos < n; pos += 1 {
-				a := (^byte)(x+pos)^
-				if a ~ 0 != 0 {
-					return -1 if int(a) < 0 else +1
+	for /**/; i < n; i += 1 {
+		if bytes[i] != 0 {
+			return 1
+		}
+	}
+	return 0
+}
+
+memory_prefix_length :: proc "contextless" (x, y: rawptr, n: int) -> (idx: int) #no_bounds_check {
+	switch {
+	case x == y:   return n
+	case x == nil: return 0
+	case y == nil: return 0
+	}
+	a, b := cast([^]byte)x, cast([^]byte)y
+
+	n := uint(n)
+	i := uint(0)
+	m := uint(0)
+
+	when HAS_HARDWARE_SIMD {
+		when ODIN_ARCH == .amd64 && intrinsics.has_target_feature("avx2") {
+			m = n / 32 * 32
+			for /**/; i < m; i += 32 {
+				load_a := intrinsics.unaligned_load(cast(^#simd[32]u8)&a[i])
+				load_b := intrinsics.unaligned_load(cast(^#simd[32]u8)&b[i])
+				comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+				if intrinsics.simd_reduce_or(comparison) != 0 {
+					sentinel: #simd[32]u8 = u8(0xFF)
+					indices := intrinsics.simd_indices(#simd[32]u8)
+					index_select := intrinsics.simd_select(comparison, indices, sentinel)
+					index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+					return int(i + index_reduce)
 				}
 			}
 		}
 	}

-	for /**/; offset < n; offset += 1 {
-		a := (^byte)(x+offset)^
-		if a ~ 0 != 0 {
-			return -1 if int(a) < 0 else +1
+	m = (n-i) / 16 * 16
+	for /**/; i < m; i += 16 {
+		load_a := intrinsics.unaligned_load(cast(^#simd[16]u8)&a[i])
+		load_b := intrinsics.unaligned_load(cast(^#simd[16]u8)&b[i])
+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+		if intrinsics.simd_reduce_or(comparison) != 0 {
+			sentinel: #simd[16]u8 = u8(0xFF)
+			indices := intrinsics.simd_indices(#simd[16]u8)
+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+			return int(i + index_reduce)
 		}
 	}

-	return 0
+	// 64-bit SIMD is faster than using a `uintptr` to detect a difference then
+	// re-iterating with the byte-by-byte loop, at least on AMD64.
+	m = (n-i) / 8 * 8
+	for /**/; i < m; i += 8 {
+		load_a := intrinsics.unaligned_load(cast(^#simd[8]u8)&a[i])
+		load_b := intrinsics.unaligned_load(cast(^#simd[8]u8)&b[i])
+		comparison := intrinsics.simd_lanes_ne(load_a, load_b)
+		if intrinsics.simd_reduce_or(comparison) != 0 {
+			sentinel: #simd[8]u8 = u8(0xFF)
+			indices := intrinsics.simd_indices(#simd[8]u8)
+			index_select := intrinsics.simd_select(comparison, indices, sentinel)
+			index_reduce := cast(uint)intrinsics.simd_reduce_min(index_select)
+			return int(i + index_reduce)
+		}
+	}
+
+	for /**/; i < n; i += 1 {
+		if a[i] ~ b[i] != 0 {
+			return int(i)
+		}
+	}
+	return int(n)
 }

 string_eq :: proc "contextless" (lhs, rhs: string) -> bool {
@@ -396,12 +542,40 @@ string_cmp :: proc "contextless" (a, b: string) -> int {
 	return ret
 }

+
+string16_eq :: proc "contextless" (lhs, rhs: string16) -> bool {
+	x := transmute(Raw_String16)lhs
+	y := transmute(Raw_String16)rhs
+	if x.len != y.len {
+		return false
+	}
+	return #force_inline memory_equal(x.data, y.data, x.len*size_of(u16))
+}
+
+string16_cmp :: proc "contextless" (a, b: string16) -> int {
+	x := transmute(Raw_String16)a
+	y := transmute(Raw_String16)b
+
+	ret := memory_compare(x.data, y.data, min(x.len, y.len)*size_of(u16))
+	if ret == 0 && x.len != y.len {
+		return -1 if x.len < y.len else +1
+	}
+	return ret
+}
+
 string_ne :: #force_inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b) }
 string_lt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) < 0 }
 string_gt :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) > 0 }
 string_le :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) <= 0 }
 string_ge :: #force_inline proc "contextless" (a, b: string) -> bool { return string_cmp(a, b) >= 0 }

+string16_ne :: #force_inline proc "contextless" (a, b: string16) -> bool { return !string16_eq(a, b) }
+string16_lt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) < 0 }
+string16_gt :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) > 0 }
+string16_le :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) <= 0 }
+string16_ge :: #force_inline proc "contextless" (a, b: string16) -> bool { return string16_cmp(a, b) >= 0 }
+
+
 cstring_len :: proc "contextless" (s: cstring) -> int {
 	p0 := uintptr((^byte)(s))
 	p := p0
@@ -411,6 +585,16 @@ cstring_len :: proc "contextless" (s: cstring) -> int {
 	return int(p - p0)
 }

+cstring16_len :: proc "contextless" (s: cstring16) -> int {
+	p := ([^]u16)(s)
+	n := 0
+	for p != nil && p[0] != 0 {
+		p = p[1:]
+		n += 1
+	}
+	return n
+}
+
 cstring_to_string :: proc "contextless" (s: cstring) -> string {
 	if s == nil {
 		return ""
@@ -420,6 +604,15 @@ cstring_to_string :: proc "contextless" (s: cstring) -> string {
 	return transmute(string)Raw_String{ptr, n}
 }

+cstring16_to_string16 :: proc "contextless" (s: cstring16) -> string16 {
+	if s == nil {
+		return ""
+	}
+	ptr := (^u16)(s)
+	n := cstring16_len(s)
+	return transmute(string16)Raw_String16{ptr, n}
+}
+

 cstring_eq :: proc "contextless" (lhs, rhs: cstring) -> bool {
 	x := ([^]byte)(lhs)
@@ -462,6 +655,46 @@ cstring_gt :: #force_inline proc "contextless" (a, b: cstring) -> bool { return
 cstring_le :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) <= 0 }
 cstring_ge :: #force_inline proc "contextless" (a, b: cstring) -> bool { return cstring_cmp(a, b) >= 0 }

+cstring16_eq :: proc "contextless" (lhs, rhs: cstring16) -> bool {
+	x := ([^]u16)(lhs)
+	y := ([^]u16)(rhs)
+	if x == y {
+		return true
+	}
+	if (x == nil) ~ (y == nil) {
+		return false
+	}
+	xn := cstring16_len(lhs)
+	yn := cstring16_len(rhs)
+	if xn != yn {
+		return false
+	}
+	return #force_inline memory_equal(x, y, xn*size_of(u16))
+}
+
+cstring16_cmp :: proc "contextless" (lhs, rhs: cstring16) -> int {
+	x := ([^]u16)(lhs)
+	y := ([^]u16)(rhs)
+	if x == y {
+		return 0
+	}
+	if (x == nil) ~ (y == nil) {
+		return -1 if x == nil else +1
+	}
+	xn := cstring16_len(lhs)
+	yn := cstring16_len(rhs)
+	ret := memory_compare(x, y, min(xn, yn)*size_of(u16))
+	if ret == 0 && xn != yn {
+		return -1 if xn < yn else +1
+	}
+	return ret
+}
+
+cstring16_ne :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return !cstring16_eq(a, b) }
+cstring16_lt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) < 0 }
+cstring16_gt :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) > 0 }
+cstring16_le :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) <= 0 }
+cstring16_ge :: #force_inline proc "contextless" (a, b: cstring16) -> bool { return cstring16_cmp(a, b) >= 0 }

 complex32_eq :: #force_inline proc "contextless"  (a, b: complex32)  -> bool { return real(a) == real(b) && imag(a) == imag(b) }
 complex32_ne :: #force_inline proc "contextless"  (a, b: complex32)  -> bool { return real(a) != real(b) || imag(a) != imag(b) }
@@ -483,7 +716,7 @@ quaternion256_eq :: #force_inline proc "contextless" (a, b: quaternion256) -> bo
 quaternion256_ne :: #force_inline proc "contextless" (a, b: quaternion256) -> bool { return real(a) != real(b) || imag(a) != imag(b) || jmag(a) != jmag(b) || kmag(a) != kmag(b) }


-string_decode_rune :: #force_inline proc "contextless" (s: string) -> (rune, int) {
+string_decode_rune :: proc "contextless" (s: string) -> (rune, int) {
 	// NOTE(bill): Duplicated here to remove dependency on package unicode/utf8

 	@(static, rodata) accept_sizes := [256]u8{
@@ -597,6 +830,68 @@ string_decode_last_rune :: proc "contextless" (s: string) -> (rune, int) {
 	return r, size
 }

+
+string16_decode_rune :: proc "contextless" (s: string16) -> (rune, int) {
+	REPLACEMENT_CHAR :: '\ufffd'
+	_surr1           :: 0xd800
+	_surr2           :: 0xdc00
+	_surr3           :: 0xe000
+	_surr_self       :: 0x10000
+
+	r := rune(REPLACEMENT_CHAR)
+
+	if len(s) < 1 {
+		return r, 0
+	}
+
+	w := 1
+	switch c := s[0]; {
+	case c < _surr1, _surr3 <= c:
+		r = rune(c)
+	case _surr1 <= c && c < _surr2 && 1 < len(s) &&
+		_surr2 <= s[1] && s[1] < _surr3:
+		r1, r2 := rune(c), rune(s[1])
+		if _surr1 <= r1 && r1 < _surr2 && _surr2 <= r2 && r2 < _surr3 {
+			r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
+		}
+		w += 1
+	}
+	return r, w
+}
+
+string16_decode_last_rune :: proc "contextless" (s: string16) -> (rune, int) {
+	REPLACEMENT_CHAR :: '\ufffd'
+	_surr1           :: 0xd800
+	_surr2           :: 0xdc00
+	_surr3           :: 0xe000
+	_surr_self       :: 0x10000
+
+	r := rune(REPLACEMENT_CHAR)
+
+	if len(s) < 1 {
+		return r, 0
+	}
+
+	n := len(s)-1
+	c := s[n]
+	w := 1
+	if _surr2 <= c && c < _surr3 {
+		if n >= 1 {
+			r1 := rune(s[n-1])
+			r2 := rune(c)
+			if _surr1 <= r1 && r1 < _surr2 {
+				r = (r1-_surr1)<<10 | (r2 - _surr2) + _surr_self
+			}
+			w = 2
+		}
+	} else if c < _surr1 || _surr3 <= c {
+		r = rune(c)
+	}
+	return r, w
+}
+
+
+
 abs_complex32 :: #force_inline proc "contextless" (x: complex32) -> f16 {
 	p, q := abs(real(x)), abs(imag(x))
 	if p < q {
@@ -1106,3 +1401,11 @@ __read_bits :: proc "contextless" (dst, src: [^]byte, offset: uintptr, size: uin
 		dst[j>>3]  |= the_bit<<(j&7)
 	}
 }
+
+when .Address in ODIN_SANITIZER_FLAGS {
+	foreign {
+		@(require)
+		__asan_unpoison_memory_region :: proc "system" (address: rawptr, size: uint) ---
+	}
+}
+
--- a/base/runtime/os_specific_bsd.odin
+++ b/base/runtime/os_specific_bsd.odin
@@ -9,7 +9,7 @@ foreign libc {
 	@(link_name="write")
 	_unix_write :: proc(fd: i32, buf: rawptr, size: int) -> int ---

-	when ODIN_OS == .NetBSD {
+	when ODIN_OS == .NetBSD || ODIN_OS == .OpenBSD {
 		@(link_name="__errno") __error :: proc() -> ^i32 ---
 	} else {
 		__error :: proc() -> ^i32 ---
--- a/base/runtime/print.odin
+++ b/base/runtime/print.odin
@@ -293,7 +293,14 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
 		print_string("quaternion")
 		print_u64(u64(8*ti.size))
 	case Type_Info_String:
+		if info.is_cstring {
+			print_byte('c')
+		}
 		print_string("string")
+		switch info.encoding {
+		case .UTF_8:  /**/
+		case .UTF_16: print_string("16")
+		}
 	case Type_Info_Boolean:
 		switch ti.id {
 		case bool: print_string("bool")
@@ -403,7 +410,7 @@ print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) {
 		print_string("struct ")
 		if .packed    in info.flags { print_string("#packed ") }
 		if .raw_union in info.flags { print_string("#raw_union ") }
-		if .no_copy   in info.flags { print_string("#no_copy ") }
+		// if .no_copy   in info.flags { print_string("#no_copy ") }
 		if .align in info.flags {
 			print_string("#align(")
 			print_u64(u64(ti.align))
--- a/base/runtime/procs_darwin.odin
+++ b/base/runtime/procs_darwin.odin
@@ -1,22 +1,56 @@
 #+private
 package runtime

+@(priority_index=-1e5)
+foreign import ObjC "system:objc"
+
@(priority_index=-1e6)
-foreign import "system:Foundation.framework"
+foreign import libSystem "system:System"

 import "base:intrinsics"

-objc_id :: ^intrinsics.objc_object
+objc_id    :: ^intrinsics.objc_object
 objc_Class :: ^intrinsics.objc_class
-objc_SEL :: ^intrinsics.objc_selector
+objc_SEL   :: ^intrinsics.objc_selector
+objc_Ivar  :: ^intrinsics.objc_ivar
+objc_BOOL  :: bool

-foreign Foundation {
-	objc_lookUpClass :: proc "c" (name: cstring) -> objc_Class ---
-	sel_registerName :: proc "c" (name: cstring) -> objc_SEL ---
-	objc_allocateClassPair :: proc "c" (superclass: objc_Class, name: cstring, extraBytes: uint) -> objc_Class ---
-
-	objc_msgSend        :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
-	objc_msgSend_fpret  :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> f64 ---
-	objc_msgSend_fp2ret :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> complex128 ---
-	objc_msgSend_stret  :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
+objc_super :: struct {
+	receiver: 	 objc_id,
+	super_class: objc_Class,
+}
+
+objc_IMP :: proc "c" (object: objc_id, sel: objc_SEL, #c_vararg args: ..any) -> objc_id
+
+foreign ObjC {
+	sel_registerName :: proc "c" (name: cstring) -> objc_SEL ---
+
+	objc_msgSend             :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
+	objc_msgSend_fpret       :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> f64 ---
+	objc_msgSend_fp2ret      :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) -> complex128 ---
+	objc_msgSend_stret       :: proc "c" (self: objc_id, op: objc_SEL, #c_vararg args: ..any) ---
+
+	// See: https://github.com/opensource-apple/objc4/blob/cd5e62a5597ea7a31dccef089317abb3a661c154/runtime/objc-abi.h#L111
+	objc_msgSendSuper2       :: proc "c" (super: rawptr, op: objc_SEL, #c_vararg args: ..any) -> objc_id ---
+	objc_msgSendSuper2_stret :: proc "c" (super: ^objc_super, op: objc_SEL, #c_vararg args: ..any) ---
+
+
+	objc_lookUpClass          :: proc "c" (name: cstring) -> objc_Class ---
+	objc_allocateClassPair    :: proc "c" (superclass: objc_Class, name: cstring, extraBytes: uint) -> objc_Class ---
+	objc_registerClassPair    :: proc "c" (cls : objc_Class) ---
+	class_addMethod           :: proc "c" (cls: objc_Class, name: objc_SEL, imp: objc_IMP, types: cstring) -> objc_BOOL ---
+	class_addIvar             :: proc "c" (cls: objc_Class, name: cstring, size: uint, alignment: u8, types: cstring) -> objc_BOOL ---
+	class_getInstanceVariable :: proc "c" (cls : objc_Class, name: cstring) -> objc_Ivar ---
+	class_getInstanceSize     :: proc "c" (cls : objc_Class) -> uint ---
+	class_getSuperclass       :: proc "c" (cls : objc_Class) -> objc_Class ---
+	ivar_getOffset            :: proc "c" (v: objc_Ivar) -> uintptr ---
+	object_getClass           :: proc "c" (obj: objc_id) -> objc_Class ---
+}
+
+foreign libSystem {
+	_NSConcreteGlobalBlock: intrinsics.objc_class
+	_NSConcreteStackBlock:  intrinsics.objc_class
+
+	_Block_object_assign  :: proc "c" (rawptr, rawptr, i32) ---
+	_Block_object_dispose :: proc "c" (rawptr, i32) ---
 }
--- a/base/runtime/procs_js.odin
+++ b/base/runtime/procs_js.odin
@@ -3,8 +3,8 @@ package runtime

 init_default_context_for_js: Context
@(init, private="file")
-init_default_context :: proc() {
-	init_default_context_for_js = context
+init_default_context :: proc "contextless" () {
+	__init_context(&init_default_context_for_js)
 }

@(export)
--- a/base/runtime/random_generator.odin
+++ b/base/runtime/random_generator.odin
@@ -97,7 +97,7 @@ default_random_generator_proc :: proc(data: rawptr, mode: Random_Generator_Mode,
 			for &v in p {
 				if pos == 0 {
 					val = read_u64(r)
-					pos = 7
+					pos = 8
 				}
 				v = byte(val)
 				val >>= 8
--- a/base/runtime/thread_management.odin
+++ b/base/runtime/thread_management.odin
@@ -1,10 +1,14 @@
 package runtime

-Thread_Local_Cleaner :: #type proc "odin" ()
+Thread_Local_Cleaner_Odin :: #type proc "odin" ()
+Thread_Local_Cleaner_Contextless :: #type proc "contextless" ()
+
+Thread_Local_Cleaner :: union #shared_nil {Thread_Local_Cleaner_Odin, Thread_Local_Cleaner_Contextless}

@(private="file")
 thread_local_cleaners: [8]Thread_Local_Cleaner

+
 // Add a procedure that will be run at the end of a thread for the purpose of
 // deallocating state marked as `thread_local`.
 //
@@ -29,6 +33,9 @@ run_thread_local_cleaners :: proc "odin" () {
 		if p == nil {
 			break
 		}
-		p()
+		switch v in p {
+		case Thread_Local_Cleaner_Odin:        v()
+		case Thread_Local_Cleaner_Contextless: v()
+		}
 	}
 }
--- a/base/runtime/wasm_allocator.odin
+++ b/base/runtime/wasm_allocator.odin
@@ -89,10 +89,12 @@ wasm_allocator_init :: proc(a: ^WASM_Allocator, alignment: uint = 8) {

 global_default_wasm_allocator_data: WASM_Allocator

+@(require_results)
 default_wasm_allocator :: proc() -> Allocator {
 	return wasm_allocator(&global_default_wasm_allocator_data)
 }

+@(require_results)
 wasm_allocator :: proc(a: ^WASM_Allocator) -> Allocator {
 	return {
 		data      = a,
--- a/base/sanitizer/address.odin
+++ b/base/sanitizer/address.odin
@@ -0,0 +1,601 @@
+#+no-instrumentation
+package sanitizer
+
+Address_Death_Callback :: #type proc "c" (pc: rawptr, bp: rawptr, sp: rawptr, addr: rawptr, is_write: i32, access_size: uint)
+
+@(private="file")
+ASAN_ENABLED :: .Address in ODIN_SANITIZER_FLAGS
+
+@(private="file")
+@(default_calling_convention="system")
+foreign {
+	__asan_poison_memory_region      :: proc(address: rawptr, size: uint) ---
+	__asan_unpoison_memory_region    :: proc(address: rawptr, size: uint) ---
+	__sanitizer_set_death_callback   :: proc(callback: Address_Death_Callback) ---
+	__asan_region_is_poisoned        :: proc(begin: rawptr, size: uint) -> rawptr ---
+	__asan_address_is_poisoned       :: proc(addr: rawptr) -> i32 ---
+	__asan_describe_address          :: proc(addr: rawptr) ---
+	__asan_report_present            :: proc() -> i32 ---
+	__asan_get_report_pc             :: proc() -> rawptr ---
+	__asan_get_report_bp             :: proc() -> rawptr ---
+	__asan_get_report_sp             :: proc() -> rawptr ---
+	__asan_get_report_address        :: proc() -> rawptr ---
+	__asan_get_report_access_type    :: proc() -> i32 ---
+	__asan_get_report_access_size    :: proc() -> uint ---
+	__asan_get_report_description    :: proc() -> cstring ---
+	__asan_locate_address            :: proc(addr: rawptr, name: rawptr, name_size: uint, region_address: ^rawptr, region_size: ^uint) -> cstring ---
+	__asan_get_alloc_stack           :: proc(addr: rawptr, trace: rawptr, size: uint, thread_id: ^i32) -> uint ---
+	__asan_get_free_stack            :: proc(addr: rawptr, trace: rawptr, size: uint, thread_id: ^i32) -> uint ---
+	__asan_get_shadow_mapping        :: proc(shadow_scale: ^uint, shadow_offset: ^uint) ---
+	__asan_print_accumulated_stats   :: proc() ---
+	__asan_get_current_fake_stack    :: proc() -> rawptr ---
+	__asan_addr_is_in_fake_stack     :: proc(fake_stack: rawptr, addr: rawptr, beg: ^rawptr, end: ^rawptr) -> rawptr ---
+	__asan_handle_no_return          :: proc() ---
+	__asan_update_allocation_context :: proc(addr: rawptr) -> i32 ---
+}
+
+Address_Access_Type :: enum {
+	none,
+	read,
+	write,
+}
+
+Address_Located_Address :: struct {
+	category: string,
+	name: string,
+	region: []byte,
+}
+
+Address_Shadow_Mapping :: struct {
+	scale: uint,
+	offset: uint,
+}
+
+/*
+Marks a slice as unaddressable
+
+Code instrumented with `-sanitize:address` is forbidden from accessing any address
+within the slice. This procedure is not thread-safe because no two threads can
+poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_poison_slice :: proc "contextless" (region: $T/[]$E) {
+	when ASAN_ENABLED {
+		__asan_poison_memory_region(raw_data(region), size_of(E) * len(region))
+	}
+}
+
+/*
+Marks a slice as addressable
+
+Code instrumented with `-sanitize:address` is allowed to access any address
+within the slice again. This procedure is not thread-safe because no two threads
+can poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_unpoison_slice :: proc "contextless" (region: $T/[]$E) {
+	when ASAN_ENABLED {
+		__asan_unpoison_memory_region(raw_data(region), size_of(E) * len(region))
+	}
+}
+
+/*
+Marks a pointer as unaddressable
+
+Code instrumented with `-sanitize:address` is forbidden from accessing any address
+within the region the pointer points to. This procedure is not thread-safe because no
+two threads can poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_poison_ptr :: proc "contextless" (ptr: ^$T) {
+	when ASAN_ENABLED {
+		__asan_poison_memory_region(ptr, size_of(T))
+	}
+}
+
+/*
+Marks a pointer as addressable
+
+Code instrumented with `-sanitize:address` is allowed to access any address
+within the region the pointer points to again. This procedure is not thread-safe
+because no two threads can poison or unpoison memory in the same memory region
+region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_unpoison_ptr :: proc "contextless" (ptr: ^$T) {
+	when ASAN_ENABLED {
+		__asan_unpoison_memory_region(ptr, size_of(T))
+	}
+}
+
+/*
+Marks the region covering `[ptr, ptr+len)` as unaddressable
+
+Code instrumented with `-sanitize:address` is forbidden from accessing any address
+within the region. This procedure is not thread-safe because no two threads can
+poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_poison_rawptr :: proc "contextless" (ptr: rawptr, len: int) {
+	when ASAN_ENABLED {
+		assert_contextless(len >= 0)
+		__asan_poison_memory_region(ptr, uint(len))
+	}
+}
+
+/*
+Marks the region covering `[ptr, ptr+len)` as unaddressable
+
+Code instrumented with `-sanitize:address` is forbidden from accessing any address
+within the region. This procedure is not thread-safe because no two threads can
+poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_poison_rawptr_uint :: proc "contextless" (ptr: rawptr, len: uint) {
+	when ASAN_ENABLED {
+		__asan_poison_memory_region(ptr, len)
+	}
+}
+
+/*
+Marks the region covering `[ptr, ptr+len)` as addressable
+
+Code instrumented with `-sanitize:address` is allowed to access any address
+within the region again. This procedure is not thread-safe because no two
+threads can poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_unpoison_rawptr :: proc "contextless" (ptr: rawptr, len: int) {
+	when ASAN_ENABLED {
+		assert_contextless(len >= 0)
+		__asan_unpoison_memory_region(ptr, uint(len))
+	}
+}
+
+/*
+Marks the region covering `[ptr, ptr+len)` as addressable
+
+Code instrumented with `-sanitize:address` is allowed to access any address
+within the region again. This procedure is not thread-safe because no two
+threads can poison or unpoison memory in the same memory region region simultaneously.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_unpoison_rawptr_uint :: proc "contextless" (ptr: rawptr, len: uint) {
+	when ASAN_ENABLED {
+		__asan_unpoison_memory_region(ptr, len)
+	}
+}
+
+address_poison :: proc {
+	address_poison_slice,
+	address_poison_ptr,
+	address_poison_rawptr,
+	address_poison_rawptr_uint,
+}
+
+address_unpoison :: proc {
+	address_unpoison_slice,
+	address_unpoison_ptr,
+	address_unpoison_rawptr,
+	address_unpoison_rawptr_uint,
+}
+
+/*
+Registers a callback to be run when asan detects a memory error right before terminating
+the process.
+
+This can be used for logging and/or debugging purposes.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_set_death_callback :: proc "contextless" (callback: Address_Death_Callback) {
+	when ASAN_ENABLED {
+		__sanitizer_set_death_callback(callback)
+	}
+}
+
+/*
+Checks if the memory region covered by the slice is poisoned.
+
+If it is poisoned this procedure returns the address which would result
+in an asan error.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_region_is_poisoned_slice :: proc "contextless" (region: $T/[]$E) -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_region_is_poisoned(raw_data(region), size_of(E) * len(region))
+	} else {
+		return nil
+	}
+}
+
+/*
+Checks if the memory region pointed to by the pointer is poisoned.
+
+If it is poisoned this procedure returns the address which would result
+in an asan error.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_region_is_poisoned_ptr :: proc "contextless" (ptr: ^$T) -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_region_is_poisoned(ptr, size_of(T))
+	} else {
+		return nil
+	}
+}
+
+/*
+Checks if the memory region covered by `[ptr, ptr+len)` is poisoned.
+
+If it is poisoned this procedure returns the address which would result
+in an asan error.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_region_is_poisoned_rawptr :: proc "contextless" (region: rawptr, len: int) -> rawptr {
+	when ASAN_ENABLED {
+		assert_contextless(len >= 0)
+		return __asan_region_is_poisoned(region, uint(len))
+	} else {
+		return nil
+	}
+}
+
+/*
+Checks if the memory region covered by `[ptr, ptr+len)` is poisoned.
+
+If it is poisoned this procedure returns the address which would result
+in an asan error.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_region_is_poisoned_rawptr_uint :: proc "contextless" (region: rawptr, len: uint) -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_region_is_poisoned(region, len)
+	} else {
+		return nil
+	}
+}
+
+
+address_region_is_poisoned :: proc {
+	address_region_is_poisoned_slice,
+	address_region_is_poisoned_ptr,
+	address_region_is_poisoned_rawptr,
+	address_region_is_poisoned_rawptr_uint,
+}
+
+/*
+Checks if the address is poisoned.
+
+If it is poisoned this procedure returns `true`, otherwise it returns
+`false`.
+
+When asan is not enabled this procedure returns `false`.
+*/
+@(no_sanitize_address)
+address_is_poisoned :: proc "contextless" (address: rawptr) -> bool {
+	when ASAN_ENABLED {
+		return __asan_address_is_poisoned(address) != 0
+	} else {
+		return false
+	}
+}
+
+/*
+Describes the sanitizer state for an address.
+
+This procedure prints the description out to `stdout`.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_describe_address :: proc "contextless" (address: rawptr) {
+	when ASAN_ENABLED {
+		__asan_describe_address(address)
+	}
+}
+
+/*
+Returns `true` if an asan error has occured, otherwise it returns
+`false`.
+
+When asan is not enabled this procedure returns `false`.
+*/
+@(no_sanitize_address)
+address_report_present :: proc "contextless" () -> bool {
+	when ASAN_ENABLED {
+		return __asan_report_present() != 0
+	} else {
+		return false
+	}
+}
+
+/*
+Returns the program counter register value of an asan error.
+
+If no asan error has occurd `nil` is returned.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_get_report_pc :: proc "contextless" () -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_get_report_pc()
+	} else {
+		return nil
+	}
+}
+
+/*
+Returns the base pointer register value of an asan error.
+
+If no asan error has occurd `nil` is returned.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_get_report_bp :: proc "contextless" () -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_get_report_bp()
+	} else {
+		return nil
+	}
+}
+
+/*
+Returns the stack pointer register value of an asan error.
+
+If no asan error has occurd `nil` is returned.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_get_report_sp :: proc "contextless" () -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_get_report_sp()
+	} else {
+		return nil
+	}
+}
+
+/*
+Returns the report buffer address of an asan error.
+
+If no asan error has occurd `nil` is returned.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_get_report_address :: proc "contextless" () -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_get_report_address()
+	} else {
+		return nil
+	}
+}
+
+/*
+Returns the address access type of an asan error.
+
+If no asan error has occurd `.none` is returned.
+
+When asan is not enabled this procedure returns `.none`.
+*/
+@(no_sanitize_address)
+address_get_report_access_type :: proc "contextless" () -> Address_Access_Type {
+	when ASAN_ENABLED {
+		if ! address_report_present() {
+			return .none
+		}
+		return __asan_get_report_access_type() == 0 ? .read : .write
+	} else {
+		return .none
+	}
+}
+
+/*
+Returns the access size of an asan error.
+
+If no asan error has occurd `0` is returned.
+
+When asan is not enabled this procedure returns `0`.
+*/
+@(no_sanitize_address)
+address_get_report_access_size :: proc "contextless" () -> uint {
+	when ASAN_ENABLED {
+		return __asan_get_report_access_size()
+	} else {
+		return 0
+	}
+}
+
+/*
+Returns the bug description of an asan error.
+
+If no asan error has occurd an empty string is returned.
+
+When asan is not enabled this procedure returns an empty string.
+*/
+@(no_sanitize_address)
+address_get_report_description :: proc "contextless" () -> string {
+	when ASAN_ENABLED {
+		return string(__asan_get_report_description())
+	} else {
+		return ""
+	}
+}
+
+/*
+Returns asan information about the address provided, writing the category into `data`.
+
+The information provided include:
+* The category of the address, i.e. stack, global, heap, etc.
+* The name of the variable this address belongs to
+* The memory region of the address
+
+When asan is not enabled this procedure returns zero initialised values.
+*/
+@(no_sanitize_address)
+address_locate_address :: proc "contextless" (addr: rawptr, data: []byte) -> Address_Located_Address {
+	when ASAN_ENABLED {
+		out_addr: rawptr
+		out_size: uint
+		str := __asan_locate_address(addr, raw_data(data), len(data), &out_addr, &out_size)
+		return { string(str), string(cstring(raw_data(data))), (cast([^]byte)out_addr)[:out_size] }, 
+	} else {
+		return { "", "", {} }
+	}
+}
+
+/*
+Returns the allocation stack trace and thread id for a heap address.
+
+The stack trace is filled into the `data` slice.
+
+When asan is not enabled this procedure returns a zero initialised value.
+*/
+@(no_sanitize_address)
+address_get_alloc_stack_trace :: proc "contextless" (addr: rawptr, data: []rawptr) -> ([]rawptr, int) {
+	when ASAN_ENABLED {
+		out_thread: i32
+		__asan_get_alloc_stack(addr, raw_data(data), len(data), &out_thread)
+		return data, int(out_thread)
+	} else {
+		return {}, 0
+	}
+}
+
+/*
+Returns the free stack trace and thread id for a heap address.
+
+The stack trace is filled into the `data` slice.
+
+When asan is not enabled this procedure returns zero initialised values.
+*/
+@(no_sanitize_address)
+address_get_free_stack_trace :: proc "contextless" (addr: rawptr, data: []rawptr) -> ([]rawptr, int) {
+	when ASAN_ENABLED {
+		out_thread: i32
+		__asan_get_free_stack(addr, raw_data(data), len(data), &out_thread)
+		return data, int(out_thread)
+	} else {
+		return {}, 0
+	}
+}
+
+/*
+Returns the current asan shadow memory mapping.
+
+When asan is not enabled this procedure returns a zero initialised value.
+*/
+@(no_sanitize_address)
+address_get_shadow_mapping :: proc "contextless" () -> Address_Shadow_Mapping {
+	when ASAN_ENABLED {
+		result: Address_Shadow_Mapping
+		__asan_get_shadow_mapping(&result.scale, &result.offset)
+		return result
+	} else {
+		return {}
+	}
+}
+
+/*
+Prints asan statistics to `stderr`
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_print_accumulated_stats :: proc "contextless" () {
+	when ASAN_ENABLED {
+		__asan_print_accumulated_stats()
+	}
+}
+
+/*
+Returns the address of the current fake stack used by asan.
+
+This pointer can be then used for `address_is_in_fake_stack`.
+
+When asan is not enabled this procedure returns `nil`.
+*/
+@(no_sanitize_address)
+address_get_current_fake_stack :: proc "contextless" () -> rawptr {
+	when ASAN_ENABLED {
+		return __asan_get_current_fake_stack()
+	} else {
+		return nil
+	}
+}
+
+/*
+Returns if an address belongs to a given fake stack and if so the region of the fake frame.
+
+When asan is not enabled this procedure returns zero initialised values.
+*/
+@(no_sanitize_address)
+address_is_in_fake_stack :: proc "contextless" (fake_stack: rawptr, addr: rawptr) -> ([]byte, bool) {
+	when ASAN_ENABLED {
+		begin: rawptr
+		end: rawptr
+		if __asan_addr_is_in_fake_stack(fake_stack, addr, &begin, &end) == nil {
+			return {}, false
+		}
+		return ((cast([^]byte)begin)[:uintptr(end)-uintptr(begin)]), true
+	} else {
+		return {}, false
+	}
+}
+
+/*
+Performs shadow memory cleanup for the current thread before a procedure with no return is called
+i.e. a procedure such as `panic` and `os.exit`.
+
+When asan is not enabled this procedure does nothing.
+*/
+@(no_sanitize_address)
+address_handle_no_return :: proc "contextless" () {
+	when ASAN_ENABLED {
+		__asan_handle_no_return()
+	}
+}
+
+/*
+Updates the allocation stack trace for the given address.
+
+Returns `true` if successful, otherwise it returns `false`.
+
+When asan is not enabled this procedure returns `false`.
+*/
+@(no_sanitize_address)
+address_update_allocation_context :: proc "contextless" (addr: rawptr) -> bool {
+	when ASAN_ENABLED {
+		return __asan_update_allocation_context(addr) != 0
+	} else {
+		return false
+	}
+}
+
--- a/base/sanitizer/doc.odin
+++ b/base/sanitizer/doc.odin
@@ -0,0 +1,38 @@
+/*
+The `sanitizer` package implements various procedures for interacting with sanitizers
+from user code.
+
+An odin project can be linked with various sanitizers to help identify various different
+bugs. These sanitizers are:
+
+## Address
+
+Enabled with `-sanitize:address` when building an odin project.
+
+The address sanitizer (asan) is a runtime memory error detector used to help find common memory
+related bugs. Typically asan interacts with libc but Odin code can be marked up to interact
+with the asan runtime to extend the memory error detection outside of libc using this package.
+For more information about asan see: https://clang.llvm.org/docs/AddressSanitizer.html
+
+Procedures can be made exempt from asan when marked up with @(no_sanitize_address)
+
+## Memory
+
+Enabled with `-sanitize:memory` when building an odin project.
+
+The memory sanitizer is another runtime memory error detector with the sole purpose to catch the
+use of uninitialized memory. This is not a very common bug in Odin as by default everything is
+set to zero when initialised (ZII).
+For more information about the memory sanitizer see: https://clang.llvm.org/docs/MemorySanitizer.html
+
+## Thread
+
+Enabled with `-sanitize:thread` when building an odin project.
+
+The thread sanitizer is a runtime data race detector. It can be used to detect if multiple threads
+are concurrently writing and accessing a memory location without proper syncronisation.
+For more information about the thread sanitizer see: https://clang.llvm.org/docs/ThreadSanitizer.html
+
+*/
+package sanitizer
+
--- a/base/sanitizer/memory.odin
+++ b/base/sanitizer/memory.odin
@@ -0,0 +1,74 @@
+#+no-instrumentation
+package sanitizer
+
+@(private="file")
+MSAN_ENABLED :: .Memory in ODIN_SANITIZER_FLAGS
+
+@(private="file")
+@(default_calling_convention="system")
+foreign {
+	__msan_unpoison :: proc(addr: rawptr, size: uint) ---
+}
+
+/*
+Marks a slice as fully initialized.
+
+Code instrumented with `-sanitize:memory` will be permitted to access any
+address within the slice as if it had already been initialized.
+
+When msan is not enabled this procedure does nothing.
+*/
+memory_unpoison_slice :: proc "contextless" (region: $T/[]$E) {
+	when MSAN_ENABLED {
+		__msan_unpoison(raw_data(region),  size_of(E) * len(region))
+	}
+}
+
+/*
+Marks a pointer as fully initialized.
+
+Code instrumented with `-sanitize:memory` will be permitted to access memory
+within the region the pointer points to as if it had already been initialized.
+
+When msan is not enabled this procedure does nothing.
+*/
+memory_unpoison_ptr :: proc "contextless" (ptr: ^$T) {
+	when MSAN_ENABLED {
+		__msan_unpoison(ptr, size_of(T))
+	}
+}
+
+/*
+Marks the region covering `[ptr, ptr+len)` as fully initialized.
+
+Code instrumented with `-sanitize:memory` will be permitted to access memory
+within this range as if it had already been initialized.
+
+When msan is not enabled this procedure does nothing.
+*/
+memory_unpoison_rawptr :: proc "contextless" (ptr: rawptr, len: int) {
+	when MSAN_ENABLED {
+		__msan_unpoison(ptr, uint(len))
+	}
+}
+
+/*
+Marks the region covering `[ptr, ptr+len)` as fully initialized.
+
+Code instrumented with `-sanitize:memory` will be permitted to access memory
+within this range as if it had already been initialized.
+
+When msan is not enabled this procedure does nothing.
+*/
+memory_unpoison_rawptr_uint :: proc "contextless" (ptr: rawptr, len: uint) {
+	when MSAN_ENABLED {
+		__msan_unpoison(ptr, len)
+	}
+}
+
+memory_unpoison :: proc {
+	memory_unpoison_slice,
+	memory_unpoison_ptr,
+	memory_unpoison_rawptr,
+	memory_unpoison_rawptr_uint,
+}
--- a/bin/lld-link.exe
+++ b/bin/lld-link.exe
--- a/bin/llvm/windows/LLVM-C.lib
+++ b/bin/llvm/windows/LLVM-C.lib
--- a/bin/wasm-ld.exe
+++ b/bin/wasm-ld.exe
--- a/build.bat
+++ b/build.bat
@@ -4,12 +4,12 @@ setlocal EnableDelayedExpansion

 where /Q cl.exe || (
 	set __VSCMD_ARG_NO_LOGO=1
-	for /f "tokens=*" %%i in ('"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -requires Microsoft.VisualStudio.Workload.NativeDesktop -property installationPath') do set VS=%%i
+	for /f "tokens=*" %%i in ('"C:\Program Files (x86)\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath') do set VS=%%i
 	if "!VS!" equ "" (
-		echo ERROR: Visual Studio installation not found
+		echo ERROR: MSVC installation not found
 		exit /b 1
 	)
-	call "!VS!\VC\Auxiliary\Build\vcvarsall.bat" amd64 || exit /b 1
+	call "!VS!\Common7\Tools\vsdevcmd.bat" -arch=x64 -host_arch=x64 || exit /b 1
 )

 if "%VSCMD_ARG_TGT_ARCH%" neq "x64" (
@@ -19,16 +19,27 @@ if "%VSCMD_ARG_TGT_ARCH%" neq "x64" (
 	)
 )

+where /Q git.exe || goto skip_git_hash
+if not exist .git\ goto skip_git_hash
+for /f "tokens=1,2" %%i IN ('git show "--pretty=%%cd %%h" "--date=format:%%Y-%%m-%%d" --no-patch --no-notes HEAD') do (
+	set CURR_DATE_TIME=%%i
+	set GIT_SHA=%%j
+)
+if %ERRORLEVEL% equ 0 (
+	goto have_git_hash_and_date
+)
+:skip_git_hash
 pushd misc
 cl /nologo get-date.c
-popd
-
-for /f %%i in ('misc\get-date') do (
+for /f %%i in ('get-date') do (
 	set CURR_DATE_TIME=%%i
+	rem Don't set GIT_SHA
 )
+popd
+:have_git_hash_and_date
 set curr_year=%CURR_DATE_TIME:~0,4%
-set curr_month=%CURR_DATE_TIME:~4,2%
-set curr_day=%CURR_DATE_TIME:~6,2%
+set curr_month=%CURR_DATE_TIME:~5,2%
+set curr_day=%CURR_DATE_TIME:~8,2%

 :: Make sure this is a decent name and not generic
 set exe_name=odin.exe
@@ -61,31 +72,14 @@ if %release_mode% equ 0 (
 set V4=0
 set odin_version_full="%V1%.%V2%.%V3%.%V4%"
 set odin_version_raw="dev-%V1%-%V2%"
-
 set compiler_flags= -nologo -Oi -TP -fp:precise -Gm- -MP -FC -EHsc- -GR- -GF
 rem Parse source code as utf-8 even on shift-jis and other codepages
 rem See https://learn.microsoft.com/en-us/cpp/build/reference/utf-8-set-source-and-executable-character-sets-to-utf-8?view=msvc-170
 set compiler_flags= %compiler_flags% /utf-8
-set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\"
+set compiler_defines= -DODIN_VERSION_RAW=\"%odin_version_raw%\" -DGIT_SHA=\"%GIT_SHA%\"

 rem fileversion is defined as {Major,Minor,Build,Private: u16} so a bit limited
-set rc_flags=-nologo ^
-DV1=%V1% -DV2=%V2% -DV3=%V3% -DV4=%V4% ^
-DVF=%odin_version_full% -DNIGHTLY=%nightly%
-
-where /Q git.exe || goto skip_git_hash
-if not exist .git\ goto skip_git_hash
-for /f "tokens=1,2" %%i IN ('git show "--pretty=%%cd %%h" "--date=format:%%Y-%%m" --no-patch --no-notes HEAD') do (
-	set odin_version_raw=dev-%%i
-	set GIT_SHA=%%j
-)
-if %ERRORLEVEL% equ 0 (
-	set compiler_defines=%compiler_defines% -DGIT_SHA=\"%GIT_SHA%\"
-	set rc_flags=%rc_flags% -DGIT_SHA=%GIT_SHA% -DVP=%odin_version_raw%:%GIT_SHA%
-) else (
-	set rc_flags=%rc_flags% -DVP=%odin_version_raw%
-)
-:skip_git_hash
+set rc_flags="-DGIT_SHA=%GIT_SHA% -DVP=dev-%V1%-%V2%:%GIT_SHA% nologo -DV1=%V1% -DV2=%V2% -DV3=%V3% -DV4=%V4% -DVF=%odin_version_full% -DNIGHTLY=%nightly%"

 if %nightly% equ 1 set compiler_defines=%compiler_defines% -DNIGHTLY

@@ -138,6 +132,7 @@ del *.ilk > NUL 2> NUL

 rc %rc_flags% %odin_rc%
 cl %compiler_settings% "src\main.cpp" "src\libtommath.cpp" /link %linker_settings% -OUT:%exe_name%
+if %errorlevel% neq 0 goto end_of_build
 mt -nologo -inputresource:%exe_name%;#1 -manifest misc\odin.manifest -outputresource:%exe_name%;#1 -validate_manifest -identity:"odin, processorArchitecture=amd64, version=%odin_version_full%, type=win32"
 if %errorlevel% neq 0 goto end_of_build

--- a/build_odin.sh
+++ b/build_odin.sh
@@ -6,7 +6,6 @@ set -eu
 : ${LDFLAGS=}
 : ${LLVM_CONFIG=}

-CPPFLAGS="$CPPFLAGS -DODIN_VERSION_RAW=\"dev-$(date +"%Y-%m")\""
 CXXFLAGS="$CXXFLAGS -std=c++14"
 DISABLED_WARNINGS="-Wno-switch -Wno-macro-redefined -Wno-unused-value"
 LDFLAGS="$LDFLAGS -pthread -lm"
@@ -15,8 +14,12 @@ OS_NAME="$(uname -s)"

 if [ -d ".git" ] && [ -n "$(command -v git)" ]; then
 	GIT_SHA=$(git show --pretty='%h' --no-patch --no-notes HEAD)
+	GIT_DATE=$(git show "--pretty=%cd" "--date=format:%Y-%m" --no-patch --no-notes HEAD)
 	CPPFLAGS="$CPPFLAGS -DGIT_SHA=\"$GIT_SHA\""
+else
+	GIT_DATE=$(date +"%Y-%m")
 fi
+CPPFLAGS="$CPPFLAGS -DODIN_VERSION_RAW=\"dev-$GIT_DATE\""

 error() {
 	printf "ERROR: %s\n" "$1"
@@ -25,7 +28,8 @@ error() {

 # Brew advises people not to add llvm to their $PATH, so try and use brew to find it.
 if [ -z "$LLVM_CONFIG" ] &&  [ -n "$(command -v brew)" ]; then
-    if   [ -n "$(command -v $(brew --prefix llvm@19)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@19)/bin/llvm-config"
+    if   [ -n "$(command -v $(brew --prefix llvm@20)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@20)/bin/llvm-config"
+    elif [ -n "$(command -v $(brew --prefix llvm@19)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@19)/bin/llvm-config"
    elif [ -n "$(command -v $(brew --prefix llvm@18)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@18)/bin/llvm-config"
    elif [ -n "$(command -v $(brew --prefix llvm@17)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@17)/bin/llvm-config"
    elif [ -n "$(command -v $(brew --prefix llvm@14)/bin/llvm-config)" ]; then LLVM_CONFIG="$(brew --prefix llvm@14)/bin/llvm-config"
@@ -34,7 +38,8 @@ fi

 if [ -z "$LLVM_CONFIG" ]; then
 	# darwin, linux, openbsd
-	if   [ -n "$(command -v llvm-config-19)" ]; then LLVM_CONFIG="llvm-config-19"
+	if   [ -n "$(command -v llvm-config-20)" ]; then LLVM_CONFIG="llvm-config-20"
+	elif [ -n "$(command -v llvm-config-19)" ]; then LLVM_CONFIG="llvm-config-19"
 	elif [ -n "$(command -v llvm-config-18)" ]; then LLVM_CONFIG="llvm-config-18"
 	elif [ -n "$(command -v llvm-config-17)" ]; then LLVM_CONFIG="llvm-config-17"
 	elif [ -n "$(command -v llvm-config-14)" ]; then LLVM_CONFIG="llvm-config-14"
@@ -42,6 +47,7 @@ if [ -z "$LLVM_CONFIG" ]; then
 	elif [ -n "$(command -v llvm-config-12)" ]; then LLVM_CONFIG="llvm-config-12"
 	elif [ -n "$(command -v llvm-config-11)" ]; then LLVM_CONFIG="llvm-config-11"
 	# freebsd
+	elif [ -n "$(command -v llvm-config20)" ]; then  LLVM_CONFIG="llvm-config20"
 	elif [ -n "$(command -v llvm-config19)" ]; then  LLVM_CONFIG="llvm-config19"
 	elif [ -n "$(command -v llvm-config18)" ]; then  LLVM_CONFIG="llvm-config18"
 	elif [ -n "$(command -v llvm-config17)" ]; then  LLVM_CONFIG="llvm-config17"
@@ -69,15 +75,15 @@ LLVM_VERSION_MAJOR="$(echo $LLVM_VERSION | awk -F. '{print $1}')"
 LLVM_VERSION_MINOR="$(echo $LLVM_VERSION | awk -F. '{print $2}')"
 LLVM_VERSION_PATCH="$(echo $LLVM_VERSION | awk -F. '{print $3}')"

-if [ $LLVM_VERSION_MAJOR -lt 11 ] || ([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]) || [ $LLVM_VERSION_MAJOR -gt 19 ]; then
-	error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14, 17, 18 or 19"
+if [ $LLVM_VERSION_MAJOR -lt 11 ] || ([ $LLVM_VERSION_MAJOR -gt 14 ] && [ $LLVM_VERSION_MAJOR -lt 17 ]) || [ $LLVM_VERSION_MAJOR -gt 20 ]; then
+	error "Invalid LLVM version $LLVM_VERSION: must be 11, 12, 13, 14, 17, 18, 19 or 20"
 fi

 case "$OS_NAME" in
 Darwin)
 	if [ "$OS_ARCH" = "arm64" ]; then
 		if [ $LLVM_VERSION_MAJOR -lt 13 ]; then
-			error "Invalid LLVM version $LLVM_VERSION: Darwin Arm64 requires LLVM 13, 14, 17, 18 or 19"
+			error "Invalid LLVM version $LLVM_VERSION: Darwin Arm64 requires LLVM 13, 14, 17, 18, 19 or 20"
 		fi
 	fi

--- a/check_all.bat
+++ b/check_all.bat
@@ -0,0 +1,75 @@
+@echo off
+
+if "%1" == "" (
+	echo Checking darwin_amd64 - expect vendor:cgltf panic
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_amd64
+	echo Checking darwin_arm64 - expect vendor:cgltf panic
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_arm64
+	echo Checking linux_i386
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_i386
+	echo Checking linux_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_amd64
+	echo Checking linux_arm64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm64
+	echo Checking linux_arm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm32
+	echo Checking linux_riscv64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_riscv64
+	echo Checking windows_i386
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_i386
+	echo Checking windows_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_amd64
+	echo Checking freebsd_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64
+	echo Checking freebsd_arm64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_arm64
+	echo Checking netbsd_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_amd64
+	echo Checking netbsd_arm64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_arm64
+	echo Checking openbsd_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:openbsd_amd64
+)
+
+if "%1" == "freestanding" (
+	echo Checking freestanding_wasm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
+	echo Checking freestanding_wasm64p32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
+	echo Checking freestanding_amd64_sysv
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_sysv
+	echo Checking freestanding_amd64_win64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_win64
+	echo Checking freestanding_arm64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm64
+	echo Checking freestanding_arm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm32
+	echo Checking freestanding_riscv64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_riscv64
+)
+
+if "%1" == "rare" (
+	echo Checking essence_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:essence_amd64
+	echo Checking freebsd_i386
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_i386
+	echo Checking haiku_amd64
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:haiku_amd64
+)
+
+if "%1" == "wasm" (
+	echo Checking freestanding_wasm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
+	echo Checking freestanding_wasm64p32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
+	echo Checking wasi_wasm64p32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm64p32
+	echo Checking wasi_wasm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm32
+	echo Checking js_wasm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm32
+	echo Checking orca_wasm32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:orca_wasm32
+	echo Checking js_wasm64p32
+	odin check examples\all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm64p32
+)
--- a/check_all.sh
+++ b/check_all.sh
@@ -0,0 +1,78 @@
+#!/bin/sh
+
+case $1 in
+freestanding)
+	echo Checking freestanding_wasm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
+	echo Checking freestanding_wasm64p32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
+	echo Checking freestanding_amd64_sysv
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_sysv
+	echo Checking freestanding_amd64_win64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_amd64_win64
+	echo Checking freestanding_arm64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm64
+	echo Checking freestanding_arm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_arm32
+	echo Checking freestanding_riscv64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_riscv64
+	;;
+
+rare)
+	echo Checking essence_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:essence_amd64
+	echo Checking freebsd_i386
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_i386
+	echo Checking haiku_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:haiku_amd64
+	;;
+
+wasm)
+	echo Checking freestanding_wasm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm32
+	echo Checking freestanding_wasm64p32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freestanding_wasm64p32
+	echo Checking wasi_wasm64p32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm64p32
+	echo Checking wasi_wasm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:wasi_wasm32
+	echo Checking js_wasm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm32
+	echo Checking orca_wasm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:orca_wasm32
+	echo Checking js_wasm64p32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:js_wasm64p32
+	;;
+
+*)
+	echo Checking darwin_amd64 - expect vendor:cgltf panic
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_amd64
+	echo Checking darwin_arm64 - expect vendor:cgltf panic
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:darwin_arm64
+	echo Checking linux_i386
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_i386
+	echo Checking linux_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_amd64
+	echo Checking linux_arm64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm64
+	echo Checking linux_arm32
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_arm32
+	echo Checking linux_riscv64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:linux_riscv64
+	echo Checking windows_i386
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_i386
+	echo Checking windows_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:windows_amd64
+	echo Checking freebsd_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_amd64
+	echo Checking freebsd_arm64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:freebsd_arm64
+	echo Checking netbsd_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_amd64
+	echo Checking netbsd_arm64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:netbsd_arm64
+	echo Checking openbsd_amd64
+	odin check examples/all -vet -vet-tabs -strict-style -vet-style -warnings-as-errors -disallow-do -target:openbsd_amd64
+	;;
+
+esac
--- a/ci/build_linux_static.sh
+++ b/ci/build_linux_static.sh
@@ -1,8 +1,8 @@
 #!/usr/bin/env sh
 # Intended for use in Alpine containers, see the "nightly" Github action for a list of dependencies

-CXX="clang++-18"
-LLVM_CONFIG="llvm-config-18"
+CXX="clang++-20"
+LLVM_CONFIG="llvm-config-20"

 DISABLED_WARNINGS="-Wno-switch -Wno-macro-redefined -Wno-unused-value"

--- a/ci/remove_windows_binaries.sh
+++ b/ci/remove_windows_binaries.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env sh
+
+find "$1" -type f \(\
+	-iname "*.exe"    \
+	-o -iname "*.dll" \
+	-o -iname "*.lib" \
+	-o -iname "*.pdb" \
+    \) -delete
--- a/codecov.yml
+++ b/codecov.yml
@@ -0,0 +1,6 @@
+comment: false
+coverage:
+  status:
+    project:
+      default:
+        threshold: 1%
--- a/core/bufio/reader.odin
+++ b/core/bufio/reader.odin
@@ -257,7 +257,7 @@ reader_read_rune :: proc(b: ^Reader) -> (r: rune, size: int, err: io.Error) {
 	for b.r+utf8.UTF_MAX > b.w &&
 	    !utf8.full_rune(b.buf[b.r:b.w]) &&
 	    b.err == nil &&
-	    b.w-b.w < len(b.buf) {
+	    b.w-b.r < len(b.buf) {
 		_reader_read_new_chunk(b) or_return
 	}

--- a/core/bytes/bytes.odin
+++ b/core/bytes/bytes.odin
@@ -350,7 +350,7 @@ index_byte :: proc "contextless" (s: []byte, c: byte) -> (index: int) #no_bounds
 	}

 	c_vec: simd.u8x16 = c
-	when !simd.IS_EMULATED {
+	when simd.HAS_HARDWARE_SIMD {
 		// Note: While this is something that could also logically take
 		// advantage of AVX512, the various downclocking and power
 		// consumption related woes make premature to have a dedicated
@@ -485,7 +485,7 @@ last_index_byte :: proc "contextless" (s: []byte, c: byte) -> int #no_bounds_che
 	}

 	c_vec: simd.u8x16 = c
-	when !simd.IS_EMULATED {
+	when simd.HAS_HARDWARE_SIMD {
 		// Note: While this is something that could also logically take
 		// advantage of AVX512, the various downclocking and power
 		// consumption related woes make premature to have a dedicated
--- a/core/c/libc/complex.odin
+++ b/core/c/libc/complex.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/ctype.odin
+++ b/core/c/libc/ctype.odin
@@ -3,7 +3,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/errno.odin
+++ b/core/c/libc/errno.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/locale.odin
+++ b/core/c/libc/locale.odin
@@ -5,7 +5,7 @@ import "core:c"
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
@@ -72,14 +72,14 @@ when ODIN_OS == .Windows {
 		n_sep_by_space:       c.char,
 		p_sign_posn:          c.char,
 		n_sign_posn:          c.char,
-		_W_decimal_point:     [^]u16 `fmt:"s,0"`,
-		_W_thousands_sep:     [^]u16 `fmt:"s,0"`,
-		_W_int_curr_symbol:   [^]u16 `fmt:"s,0"`,
-		_W_currency_symbol:   [^]u16 `fmt:"s,0"`,
-		_W_mon_decimal_point: [^]u16 `fmt:"s,0"`,
-		_W_mon_thousands_sep: [^]u16 `fmt:"s,0"`,
-		_W_positive_sign:     [^]u16 `fmt:"s,0"`,
-		_W_negative_sign:     [^]u16 `fmt:"s,0"`,
+		_W_decimal_point:     cstring16,
+		_W_thousands_sep:     cstring16,
+		_W_int_curr_symbol:   cstring16,
+		_W_currency_symbol:   cstring16,
+		_W_mon_decimal_point: cstring16,
+		_W_mon_thousands_sep: cstring16,
+		_W_positive_sign:     cstring16,
+		_W_negative_sign:     cstring16,
 	}
 } else {
 	lconv :: struct {
--- a/core/c/libc/math.odin
+++ b/core/c/libc/math.odin
@@ -7,7 +7,7 @@ import "base:intrinsics"
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/setjmp.odin
+++ b/core/c/libc/setjmp.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/signal.odin
+++ b/core/c/libc/signal.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/stdio.odin
+++ b/core/c/libc/stdio.odin
@@ -9,7 +9,7 @@ when ODIN_OS == .Windows {
 		"system:legacy_stdio_definitions.lib",
 	}
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
@@ -275,7 +275,7 @@ foreign libc {
 	// 7.21.7 Character input/output functions
 	fgetc     :: proc(stream: ^FILE) -> int ---
 	fgets     :: proc(s: [^]char, n: int, stream: ^FILE) -> [^]char ---
-	fputc     :: proc(s: cstring, stream: ^FILE) -> int ---
+	fputc     :: proc(s: c.int, stream: ^FILE) -> int ---
 	getc      :: proc(stream: ^FILE) -> int ---
 	getchar   :: proc() -> int ---
 	putc      :: proc(c: int, stream: ^FILE) -> int ---
--- a/core/c/libc/stdlib.odin
+++ b/core/c/libc/stdlib.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/string.odin
+++ b/core/c/libc/string.odin
@@ -7,7 +7,7 @@ import "base:runtime"
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/time.odin
+++ b/core/c/libc/time.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/uchar.odin
+++ b/core/c/libc/uchar.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/wchar.odin
+++ b/core/c/libc/wchar.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/c/libc/wctype.odin
+++ b/core/c/libc/wctype.odin
@@ -5,7 +5,7 @@ package libc
 when ODIN_OS == .Windows {
 	foreign import libc "system:libucrt.lib"
 } else when ODIN_OS == .Darwin {
-	foreign import libc "system:System.framework"
+	foreign import libc "system:System"
 } else {
 	foreign import libc "system:c"
 }
--- a/core/compress/common.odin
+++ b/core/compress/common.odin
@@ -139,9 +139,6 @@ Context_Memory_Input :: struct #packed {
 }
 when size_of(rawptr) == 8 {
 	#assert(size_of(Context_Memory_Input) == 64)
-} else {
-	// e.g. `-target:windows_i386`
-	#assert(size_of(Context_Memory_Input) == 52)
 }

 Context_Stream_Input :: struct #packed {
--- a/core/container/intrusive/list/intrusive_list.odin
+++ b/core/container/intrusive/list/intrusive_list.odin
@@ -278,19 +278,19 @@ Example:
 	iterate_next_example :: proc() {
 		l: list.List

-		one := My_Struct{value=1}
-		two := My_Struct{value=2}
+		one := My_Next_Struct{value=1}
+		two := My_Next_Struct{value=2}

 		list.push_back(&l, &one.node)
 		list.push_back(&l, &two.node)

-		it := list.iterator_head(l, My_Struct, "node")
+		it := list.iterator_head(l, My_Next_Struct, "node")
 		for num in list.iterate_next(&it) {
 			fmt.println(num.value)
 		}
 	}

-	My_Struct :: struct {
+	My_Next_Struct :: struct {
 		node : list.Node,
 		value: int,
 	}
@@ -325,22 +325,22 @@ Example:
 	import "core:fmt"
 	import "core:container/intrusive/list"

-	iterate_next_example :: proc() {
+	iterate_prev_example :: proc() {
 		l: list.List

-		one := My_Struct{value=1}
-		two := My_Struct{value=2}
+		one := My_Prev_Struct{value=1}
+		two := My_Prev_Struct{value=2}

 		list.push_back(&l, &one.node)
 		list.push_back(&l, &two.node)

-		it := list.iterator_tail(l, My_Struct, "node")
+		it := list.iterator_tail(l, My_Prev_Struct, "node")
 		for num in list.iterate_prev(&it) {
 			fmt.println(num.value)
 		}
 	}

-	My_Struct :: struct {
+	My_Prev_Struct :: struct {
 		node : list.Node,
 		value: int,
 	}
--- a/core/container/lru/lru_cache.odin
+++ b/core/container/lru/lru_cache.odin
@@ -129,7 +129,7 @@ remove :: proc(c: ^$C/Cache($Key, $Value), key: Key) -> bool {
 		return false
 	}
 	_remove_node(c, e)
-	free(node, c.node_allocator)
+	free(e, c.node_allocator)
 	c.count -= 1
 	return true
 }
--- a/core/container/priority_queue/priority_queue.odin
+++ b/core/container/priority_queue/priority_queue.odin
@@ -1,6 +1,7 @@
 package container_priority_queue

 import "base:builtin"
+import "base:runtime"

 Priority_Queue :: struct($T: typeid) {
 	queue: [dynamic]T,
@@ -17,13 +18,14 @@ default_swap_proc :: proc($T: typeid) -> proc(q: []T, i, j: int) {
 	}
 }

-init :: proc(pq: ^$Q/Priority_Queue($T), less: proc(a, b: T) -> bool, swap: proc(q: []T, i, j: int), capacity := DEFAULT_CAPACITY, allocator := context.allocator) {
+init :: proc(pq: ^$Q/Priority_Queue($T), less: proc(a, b: T) -> bool, swap: proc(q: []T, i, j: int), capacity := DEFAULT_CAPACITY, allocator := context.allocator) -> (err: runtime.Allocator_Error) {
 	if pq.queue.allocator.procedure == nil {
 		pq.queue.allocator = allocator
 	}
-	reserve(pq, capacity)
+	reserve(pq, capacity) or_return
 	pq.less = less
 	pq.swap = swap
+	return .None
 }

 init_from_dynamic_array :: proc(pq: ^$Q/Priority_Queue($T), queue: [dynamic]T, less: proc(a, b: T) -> bool, swap: proc(q: []T, i, j: int)) {
@@ -41,8 +43,8 @@ destroy :: proc(pq: ^$Q/Priority_Queue($T)) {
 	delete(pq.queue)
 }

-reserve :: proc(pq: ^$Q/Priority_Queue($T), capacity: int) {
-	builtin.reserve(&pq.queue, capacity)
+reserve :: proc(pq: ^$Q/Priority_Queue($T), capacity: int) -> (err: runtime.Allocator_Error) {
+	return builtin.reserve(&pq.queue, capacity)
 }
 clear :: proc(pq: ^$Q/Priority_Queue($T)) {
 	builtin.clear(&pq.queue)
@@ -103,9 +105,10 @@ fix :: proc(pq: ^$Q/Priority_Queue($T), i: int) {
 	}
 }

-push :: proc(pq: ^$Q/Priority_Queue($T), value: T) {
-	append(&pq.queue, value)
+push :: proc(pq: ^$Q/Priority_Queue($T), value: T) -> (err: runtime.Allocator_Error) {
+	append(&pq.queue, value) or_return
 	_shift_up(pq, builtin.len(pq.queue)-1)
+	return .None
 }

 pop :: proc(pq: ^$Q/Priority_Queue($T), loc := #caller_location) -> (value: T) {
@@ -130,12 +133,10 @@ pop_safe :: proc(pq: ^$Q/Priority_Queue($T), loc := #caller_location) -> (value:
 remove :: proc(pq: ^$Q/Priority_Queue($T), i: int) -> (value: T, ok: bool) {
 	n := builtin.len(pq.queue)
 	if 0 <= i && i < n {
-		if n != i {
-			pq.swap(pq.queue[:], i, n)
-			_shift_down(pq, i, n)
-			_shift_up(pq, i)
-		}
-		value, ok = builtin.pop_safe(&pq.queue)
+		pq.swap(pq.queue[:], i, n-1)
+		_shift_down(pq, i, n-1)
+		_shift_up(pq, i)
+		value, ok = builtin.pop(&pq.queue), true
 	}
 	return
 }
--- a/core/container/queue/queue.odin
+++ b/core/container/queue/queue.odin
@@ -4,7 +4,13 @@ import "base:builtin"
 import "base:runtime"
 _ :: runtime

-// Dynamically resizable double-ended queue/ring-buffer
+/*
+`Queue` is a dynamically resizable double-ended queue/ring-buffer.
+
+Being double-ended means that either end may be pushed onto or popped from
+across the same block of memory, in any order, thus providing both stack and
+queue-like behaviors in the same data structure.
+*/
 Queue :: struct($T: typeid) {
 	data:   [dynamic]T,
 	len:    uint,
@@ -13,18 +19,31 @@ Queue :: struct($T: typeid) {

 DEFAULT_CAPACITY :: 16

-// Procedure to initialize a queue
-init :: proc(q: ^$Q/Queue($T), capacity := DEFAULT_CAPACITY, allocator := context.allocator) -> runtime.Allocator_Error {
-	if q.data.allocator.procedure == nil {
-		q.data.allocator = allocator
-	}
+/*
+Initialize a `Queue` with a starting `capacity` and an `allocator`.
+*/
+init :: proc(q: ^$Q/Queue($T), capacity := DEFAULT_CAPACITY, allocator := context.allocator, loc := #caller_location) -> runtime.Allocator_Error {
 	clear(q)
-	return reserve(q, capacity)
+	q.data = transmute([dynamic]T)runtime.Raw_Dynamic_Array{
+		data = nil,
+		len = 0,
+		cap = 0,
+		allocator = allocator,
+	}
+	return reserve(q, capacity, loc)
 }

-// Procedure to initialize a queue from a fixed backing slice.
-// The contents of the `backing` will be overwritten as items are pushed onto the `Queue`.
-// Any previous contents are not available.
+/*
+Initialize a `Queue` from a fixed `backing` slice into which modifications are
+made directly.
+
+The contents of the `backing` will be overwritten as items are pushed onto the
+`Queue`. Any previous contents will not be available through the API but are
+not explicitly zeroed either.
+
+Note that procedures which need space to work (`push_back`, ...) will fail if
+the backing slice runs out of space.
+*/
 init_from_slice :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
 	clear(q)
 	q.data = transmute([dynamic]T)runtime.Raw_Dynamic_Array{
@@ -36,8 +55,14 @@ init_from_slice :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
 	return true
 }

-// Procedure to initialize a queue from a fixed backing slice.
-// Existing contents are preserved and available on the queue.
+/*
+Initialize a `Queue` from a fixed `backing` slice into which modifications are
+made directly.
+
+The contents of the queue will start out with all of the elements in `backing`,
+effectively creating a full queue from the slice. As such, no procedures will
+be able to add more elements to the queue until some are taken off.
+*/
 init_with_contents :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
 	clear(q)
 	q.data = transmute([dynamic]T)runtime.Raw_Dynamic_Array{
@@ -50,87 +75,203 @@ init_with_contents :: proc(q: ^$Q/Queue($T), backing: []T) -> bool {
 	return true
 }

-// Procedure to destroy a queue
+/*
+Delete memory that has been dynamically allocated from a `Queue` that was setup with `init`.
+
+Note that this procedure should not be used on queues setup with
+`init_from_slice` or `init_with_contents`, as neither of those procedures keep
+track of the allocator state of the underlying `backing` slice.
+*/
 destroy :: proc(q: ^$Q/Queue($T)) {
 	delete(q.data)
 }

-// The length of the queue
+/*
+Return the length of the queue.
+*/
 len :: proc(q: $Q/Queue($T)) -> int {
 	return int(q.len)
 }

-// The current capacity of the queue
+/*
+Return the capacity of the queue.
+*/
 cap :: proc(q: $Q/Queue($T)) -> int {
 	return builtin.len(q.data)
 }

-// Remaining space in the queue (cap-len)
+/*
+Return the remaining space in the queue.
+
+This will be `cap() - len()`.
+*/
 space :: proc(q: $Q/Queue($T)) -> int {
 	return builtin.len(q.data) - int(q.len)
 }

-// Reserve enough space for at least the specified capacity
-reserve :: proc(q: ^$Q/Queue($T), capacity: int) -> runtime.Allocator_Error {
+/*
+Reserve enough space in the queue for at least the specified capacity.
+
+This may return an error if allocation failed.
+*/
+reserve :: proc(q: ^$Q/Queue($T), capacity: int, loc := #caller_location) -> runtime.Allocator_Error {
 	if capacity > space(q^) {
-		return _grow(q, uint(capacity)) 
+		return _grow(q, uint(capacity), loc)
 	}
 	return nil
 }

+/*
+Shrink a queue's dynamically allocated array.

+This has no effect if the queue was initialized with a backing slice.
+*/
+shrink :: proc(q: ^$Q/Queue($T), temp_allocator := context.temp_allocator, loc := #caller_location) {
+	if q.data.allocator.procedure == runtime.nil_allocator_proc {
+		return
+	}
+
+	if q.len > 0 && q.offset > 0 {
+		// Make the array contiguous again.
+		buffer := make([]T, q.len, temp_allocator)
+		defer delete(buffer, temp_allocator)
+
+		right := uint(builtin.len(q.data)) - q.offset
+		copy(buffer[:],      q.data[q.offset:])
+		copy(buffer[right:], q.data[:q.offset])
+
+		copy(q.data[:], buffer[:])
+
+		q.offset = 0
+	}
+
+	builtin.shrink(&q.data, q.len, loc)
+}
+
+/*
+Get the element at index `i`.
+
+This will raise a bounds checking error if `i` is an invalid index.
+*/
 get :: proc(q: ^$Q/Queue($T), #any_int i: int, loc := #caller_location) -> T {
-	runtime.bounds_check_error_loc(loc, i, builtin.len(q.data))
+	runtime.bounds_check_error_loc(loc, i, int(q.len))

 	idx := (uint(i)+q.offset)%builtin.len(q.data)
 	return q.data[idx]
 }

-front :: proc(q: ^$Q/Queue($T)) -> T {
-	return q.data[q.offset]
-}
-front_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
-	return &q.data[q.offset]
-}
+/*
+Get a pointer to the element at index `i`.

-back :: proc(q: ^$Q/Queue($T)) -> T {
-	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
-	return q.data[idx]
-}
-back_ptr :: proc(q: ^$Q/Queue($T)) -> ^T {
-	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
+This will raise a bounds checking error if `i` is an invalid index.
+*/
+get_ptr :: proc(q: ^$Q/Queue($T), #any_int i: int, loc := #caller_location) -> ^T {
+	runtime.bounds_check_error_loc(loc, i, int(q.len))
+
+	idx := (uint(i)+q.offset)%builtin.len(q.data)
 	return &q.data[idx]
 }

+/*
+Set the element at index `i` to `val`.
+
+This will raise a bounds checking error if `i` is an invalid index.
+*/
 set :: proc(q: ^$Q/Queue($T), #any_int i: int, val: T, loc := #caller_location) {
-	runtime.bounds_check_error_loc(loc, i, builtin.len(q.data))
-	
+	runtime.bounds_check_error_loc(loc, i, int(q.len))
+
 	idx := (uint(i)+q.offset)%builtin.len(q.data)
 	q.data[idx] = val
 }
-get_ptr :: proc(q: ^$Q/Queue($T), #any_int i: int, loc := #caller_location) -> ^T {
-	runtime.bounds_check_error_loc(loc, i, builtin.len(q.data))
-	
-	idx := (uint(i)+q.offset)%builtin.len(q.data)
+
+/*
+Get the element at the front of the queue.
+
+This will raise a bounds checking error if the queue is empty.
+*/
+front :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> T {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len > 0, "Queue is empty.", loc)
+	}
+	return q.data[q.offset]
+}
+
+/*
+Get a pointer to the element at the front of the queue.
+
+This will raise a bounds checking error if the queue is empty.
+*/
+front_ptr :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len > 0, "Queue is empty.", loc)
+	}
+	return &q.data[q.offset]
+}
+
+/*
+Get the element at the back of the queue.
+
+This will raise a bounds checking error if the queue is empty.
+*/
+back :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> T {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len > 0, "Queue is empty.", loc)
+	}
+	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
+	return q.data[idx]
+}
+
+/*
+Get a pointer to the element at the back of the queue.
+
+This will raise a bounds checking error if the queue is empty.
+*/
+back_ptr :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len > 0, "Queue is empty.", loc)
+	}
+	idx := (q.offset+uint(q.len - 1))%builtin.len(q.data)
 	return &q.data[idx]
 }

+
+@(deprecated="Use `front_ptr` instead")
 peek_front :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
-	runtime.bounds_check_error_loc(loc, 0, builtin.len(q.data))
-	idx := q.offset%builtin.len(q.data)
-	return &q.data[idx]
+	return front_ptr(q, loc)
 }

+@(deprecated="Use `back_ptr` instead")
 peek_back :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> ^T {
-	runtime.bounds_check_error_loc(loc, int(q.len - 1), builtin.len(q.data))
-	idx := (uint(q.len - 1)+q.offset)%builtin.len(q.data)
-	return &q.data[idx]
+	return back_ptr(q, loc)
 }

-// Push an element to the back of the queue
-push_back :: proc(q: ^$Q/Queue($T), elem: T) -> (ok: bool, err: runtime.Allocator_Error) {
+/*
+Push an element to the back of the queue.
+
+If there is no more space left and allocation fails to get more, this will
+return false with an `Allocator_Error`.
+
+Example:
+
+	import "base:runtime"
+	import "core:container/queue"
+
+	// This demonstrates typical queue behavior (First-In First-Out).
+	main :: proc() {
+		q: queue.Queue(int)
+		queue.init(&q)
+		queue.push_back(&q, 1)
+		queue.push_back(&q, 2)
+		queue.push_back(&q, 3)
+		// q.data is now [1, 2, 3, ...]
+		assert(queue.pop_front(&q) == 1)
+		assert(queue.pop_front(&q) == 2)
+		assert(queue.pop_front(&q) == 3)
+	}
+*/
+push_back :: proc(q: ^$Q/Queue($T), elem: T, loc := #caller_location) -> (ok: bool, err: runtime.Allocator_Error) {
 	if space(q^) == 0 {
-		_grow(q) or_return
+		_grow(q, loc = loc) or_return
 	}
 	idx := (q.offset+uint(q.len))%builtin.len(q.data)
 	q.data[idx] = elem
@@ -138,27 +279,78 @@ push_back :: proc(q: ^$Q/Queue($T), elem: T) -> (ok: bool, err: runtime.Allocato
 	return true, nil
 }

-// Push an element to the front of the queue
-push_front :: proc(q: ^$Q/Queue($T), elem: T) -> (ok: bool, err: runtime.Allocator_Error)  {
+/*
+Push an element to the front of the queue.
+
+If there is no more space left and allocation fails to get more, this will
+return false with an `Allocator_Error`.
+
+Example:
+
+	import "base:runtime"
+	import "core:container/queue"
+
+	// This demonstrates stack behavior (First-In Last-Out).
+	main :: proc() {
+		q: queue.Queue(int)
+		queue.init(&q)
+		queue.push_back(&q, 1)
+		queue.push_back(&q, 2)
+		queue.push_back(&q, 3)
+		// q.data is now [1, 2, 3, ...]
+		assert(queue.pop_back(&q) == 3)
+		assert(queue.pop_back(&q) == 2)
+		assert(queue.pop_back(&q) == 1)
+	}
+*/
+push_front :: proc(q: ^$Q/Queue($T), elem: T, loc := #caller_location) -> (ok: bool, err: runtime.Allocator_Error)  {
 	if space(q^) == 0 {
-		_grow(q) or_return
-	}	
+		_grow(q, loc = loc) or_return
+	}
 	q.offset = uint(q.offset - 1 + builtin.len(q.data)) % builtin.len(q.data)
 	q.len += 1
 	q.data[q.offset] = elem
 	return true, nil
 }

+/*
+Pop an element from the back of the queue.

-// Pop an element from the back of the queue
+This will raise a bounds checking error if the queue is empty.
+
+Example:
+
+	import "base:runtime"
+	import "core:container/queue"
+
+	// This demonstrates stack behavior (First-In Last-Out) at the far end of the data array.
+	main :: proc() {
+		q: queue.Queue(int)
+		queue.init(&q)
+		queue.push_front(&q, 1)
+		queue.push_front(&q, 2)
+		queue.push_front(&q, 3)
+		// q.data is now [..., 3, 2, 1]
+		log.infof("%#v", q)
+		assert(queue.pop_front(&q) == 3)
+		assert(queue.pop_front(&q) == 2)
+		assert(queue.pop_front(&q) == 1)
+	}
+*/
 pop_back :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> (elem: T) {
-	assert(condition=q.len > 0, loc=loc)
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len > 0, "Queue is empty.", loc)
+	}
 	q.len -= 1
 	idx := (q.offset+uint(q.len))%builtin.len(q.data)
 	elem = q.data[idx]
 	return
 }
-// Safely pop an element from the back of the queue
+
+/*
+Pop an element from the back of the queue if one exists and return true.
+Otherwise, return a nil element and false.
+*/
 pop_back_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 	if q.len > 0 {
 		q.len -= 1
@@ -169,15 +361,25 @@ pop_back_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 	return
 }

-// Pop an element from the front of the queue
+/*
+Pop an element from the front of the queue
+
+This will raise a bounds checking error if the queue is empty.
+*/
 pop_front :: proc(q: ^$Q/Queue($T), loc := #caller_location) -> (elem: T) {
-	assert(condition=q.len > 0, loc=loc)
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len > 0, "Queue is empty.", loc)
+	}
 	elem = q.data[q.offset]
 	q.offset = (q.offset+1)%builtin.len(q.data)
 	q.len -= 1
 	return
 }
-// Safely pop an element from the front of the queue
+
+/*
+Pop an element from the front of the queue if one exists and return true.
+Otherwise, return a nil element and false.
+*/
 pop_front_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 	if q.len > 0 {
 		elem = q.data[q.offset]
@@ -188,13 +390,18 @@ pop_front_safe :: proc(q: ^$Q/Queue($T)) -> (elem: T, ok: bool) {
 	return
 }

-// Push multiple elements to the back of the queue
-push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T) -> (ok: bool, err: runtime.Allocator_Error)  {
+/*
+Push many elements at once to the back of the queue.
+
+If there is not enough space left and allocation fails to get more, this will
+return false with an `Allocator_Error`.
+*/
+push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T, loc := #caller_location) -> (ok: bool, err: runtime.Allocator_Error)  {
 	n := uint(builtin.len(elems))
 	if space(q^) < int(n) {
-		_grow(q, q.len + n) or_return
+		_grow(q, q.len + n, loc) or_return
 	}
-	
+
 	sz := uint(builtin.len(q.data))
 	insert_from := (q.offset + q.len) % sz
 	insert_to := n
@@ -207,19 +414,31 @@ push_back_elems :: proc(q: ^$Q/Queue($T), elems: ..T) -> (ok: bool, err: runtime
 	return true, nil
 }

-// Consume `n` elements from the front of the queue
+/*
+Consume `n` elements from the back of the queue.
+
+This will raise a bounds checking error if the queue does not have enough elements.
+*/
 consume_front :: proc(q: ^$Q/Queue($T), n: int, loc := #caller_location) {
-	assert(condition=int(q.len) >= n, loc=loc)
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len >= uint(n), "Queue does not have enough elements to consume.", loc)
+	}
 	if n > 0 {
 		nu := uint(n)
 		q.offset = (q.offset + nu) % builtin.len(q.data)
-		q.len -= nu	
+		q.len -= nu
 	}
 }

-// Consume `n` elements from the back of the queue
+/*
+Consume `n` elements from the back of the queue.
+
+This will raise a bounds checking error if the queue does not have enough elements.
+*/
 consume_back :: proc(q: ^$Q/Queue($T), n: int, loc := #caller_location) {
-	assert(condition=int(q.len) >= n, loc=loc)
+	when !ODIN_NO_BOUNDS_CHECK {
+		ensure(q.len >= uint(n), "Queue does not have enough elements to consume.", loc)
+	}
 	if n > 0 {
 		q.len -= uint(n)
 	}
@@ -231,9 +450,14 @@ append_elem  :: push_back
 append_elems :: push_back_elems
 push   :: proc{push_back, push_back_elems}
 append :: proc{push_back, push_back_elems}
+enqueue :: push_back
+dequeue :: pop_front


-// Clear the contents of the queue
+/*
+Reset the queue's length and offset to zero, letting it write new elements over
+old memory, in effect clearing the accessible contents.
+*/
 clear :: proc(q: ^$Q/Queue($T)) {
 	q.len = 0
 	q.offset = 0
@@ -241,10 +465,10 @@ clear :: proc(q: ^$Q/Queue($T)) {


 // Internal growing procedure
-_grow :: proc(q: ^$Q/Queue($T), min_capacity: uint = 0) -> runtime.Allocator_Error {
+_grow :: proc(q: ^$Q/Queue($T), min_capacity: uint = 0, loc := #caller_location) -> runtime.Allocator_Error {
 	new_capacity := max(min_capacity, uint(8), uint(builtin.len(q.data))*2)
 	n := uint(builtin.len(q.data))
-	builtin.resize(&q.data, int(new_capacity)) or_return
+	builtin.resize(&q.data, int(new_capacity), loc) or_return
 	if q.offset + q.len > n {
 		diff := n - q.offset
 		copy(q.data[new_capacity-diff:], q.data[q.offset:][:diff])
--- a/core/container/small_array/doc.odin
+++ b/core/container/small_array/doc.odin
@@ -0,0 +1,55 @@
+/*
+Package small_array implements a dynamic array like
+interface on a stack-allocated, fixed-size array.
+
+The Small_Array type is optimal for scenarios where you need
+a container for a fixed number of elements of a specific type,
+with the total number known at compile time but the exact
+number to be used determined at runtime.
+
+Example:
+	import "core:fmt"
+	import "core:container/small_array"
+
+	create :: proc() -> (result: small_array.Small_Array(10, rune)) {
+		// appending single elements
+		small_array.push(&result, 'e')
+		// pushing a bunch of elements at once
+		small_array.push(&result, 'l', 'i', 'x', '-', 'e')
+		// pre-pending
+		small_array.push_front(&result, 'H')
+		// removing elements
+		small_array.ordered_remove(&result, 4)
+		// resizing to the desired length (the capacity will stay unchanged)
+		small_array.resize(&result, 7)
+		// inserting elements
+		small_array.inject_at(&result, 'p', 5)
+		// updating elements
+		small_array.set(&result, 3, 'l')
+		// getting pointers to elements
+		o := small_array.get_ptr(&result, 4)
+		o^ = 'o'
+		// and much more ....
+		return
+	}
+
+	// the Small_Array can be an ordinary parameter 'generic' over
+	// the actual length to be usable with different sizes
+	print_elements :: proc(arr: ^small_array.Small_Array($N, rune)) {
+		for r in small_array.slice(arr) {
+			fmt.print(r)
+		}
+	}
+
+	main :: proc() {
+		arr := create()
+		// ...
+		print_elements(&arr)
+	}
+
+Output:
+
+	Hellope
+
+*/
+package container_small_array
--- a/core/container/small_array/small_array.odin
+++ b/core/container/small_array/small_array.odin
@@ -1,62 +1,367 @@
 package container_small_array

 import "base:builtin"
-import "base:runtime"
-_ :: runtime
+@require import "base:intrinsics"
+@require import "base:runtime"

+/*
+A fixed-size stack-allocated array operated on in a dynamic fashion.
+
+Fields:
+- `data`: The underlying array
+- `len`: Amount of items that the `Small_Array` currently holds
+
+Example:
+
+	import "core:container/small_array"
+
+	example :: proc() {
+		a: small_array.Small_Array(100, int)
+		small_array.push_back(&a, 10)
+	}
+*/
 Small_Array :: struct($N: int, $T: typeid) where N >= 0 {
 	data: [N]T,
 	len:  int,
 }

+/*
+Returns the amount of items in the small-array.

+**Inputs**
+- `a`: The small-array
+
+**Returns**
+- the amount of items in the array
+*/
 len :: proc "contextless" (a: $A/Small_Array) -> int {
 	return a.len
 }

+/*
+Returns the capacity of the small-array.
+
+**Inputs**
+- `a`: The small-array
+
+**Returns** the capacity
+*/
 cap :: proc "contextless" (a: $A/Small_Array) -> int {
 	return builtin.len(a.data)
 }

+/*
+Returns how many more items the small-array could fit.
+
+**Inputs**
+- `a`: The small-array
+
+**Returns**
+- the number of unused slots
+*/
 space :: proc "contextless" (a: $A/Small_Array) -> int {
 	return builtin.len(a.data) - a.len
 }

+/*
+Returns a slice of the data.
+
+**Inputs**
+- `a`: The pointer to the small-array
+
+**Returns**
+- the slice
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	slice_example :: proc() {
+		print :: proc(a: ^small_array.Small_Array($N, int)) {
+			for item in small_array.slice(a) {
+				fmt.println(item)
+			}
+		}
+
+		a: small_array.Small_Array(5, int)
+		small_array.push_back(&a, 1)
+		small_array.push_back(&a, 2)
+		print(&a)
+	}
+
+Output:
+
+	1
+	2
+*/
 slice :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> []T {
 	return a.data[:a.len]
 }

+/*
+Get a copy of the item at the specified position.
+This operation assumes that the small-array is large enough.

+This will result in:
+	- the value if 0 <= index < len
+	- the zero value of the type if len < index < capacity
+	- 'crash' if capacity < index or index < 0
+
+**Inputs**
+- `a`: The small-array
+- `index`: The position of the item to get
+
+**Returns**
+- the element at the specified position
+*/
 get :: proc "contextless" (a: $A/Small_Array($N, $T), index: int) -> T {
 	return a.data[index]
 }
+
+/*
+Get a pointer to the item at the specified position.
+This operation assumes that the small-array is large enough.
+
+This will result in:
+	- the pointer if 0 <= index < len
+	- the pointer to the zero value if len < index < capacity
+	- 'crash' if capacity < index or index < 0
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `index`: The position of the item to get
+
+**Returns**
+- the pointer to the element at the specified position
+*/
 get_ptr :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int) -> ^T {
 	return &a.data[index]
 }

-get_safe :: proc(a: $A/Small_Array($N, $T), index: int) -> (T, bool) #no_bounds_check {
+/*
+Attempt to get a copy of the item at the specified position.
+
+**Inputs**
+- `a`: The small-array
+- `index`: The position of the item to get
+
+**Returns**
+- the element at the specified position
+- true if element exists, false otherwise
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	get_safe_example :: proc() {
+		a: small_array.Small_Array(5, rune)
+		small_array.push_back(&a, 'A')
+		
+		fmt.println(small_array.get_safe(a, 0) or_else 'x')
+		fmt.println(small_array.get_safe(a, 1) or_else 'x')
+	}
+
+Output:
+
+	A
+	x
+
+*/
+get_safe :: proc "contextless" (a: $A/Small_Array($N, $T), index: int) -> (T, bool) #no_bounds_check {
 	if index < 0 || index >= a.len {
 		return {}, false
 	}
 	return a.data[index], true
 }

-get_ptr_safe :: proc(a: ^$A/Small_Array($N, $T), index: int) -> (^T, bool) #no_bounds_check {
+/*
+Get a pointer to the item at the specified position.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `index`: The position of the item to get
+
+**Returns**
+- the pointer to the element at the specified position
+- true if element exists, false otherwise
+*/
+get_ptr_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int) -> (^T, bool) #no_bounds_check {
 	if index < 0 || index >= a.len {
 		return {}, false
 	}
 	return &a.data[index], true
 }

+/*
+Set the element at the specified position to the given value.
+This operation assumes that the small-array is large enough.
+
+This will result in:
+	- the value being set if 0 <= index < capacity
+	- 'crash' otherwise
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `index`: The position of the item to set
+- `value`: The value to set the element to
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	set_example :: proc() {
+		a: small_array.Small_Array(5, rune)
+		small_array.push_back(&a, 'A')
+		small_array.push_back(&a, 'B')
+		fmt.println(small_array.slice(&a))
+
+		// updates index 0
+		small_array.set(&a, 0, 'Z')
+		fmt.println(small_array.slice(&a))
+
+		// updates to a position x, where
+		// len <= x < cap are not visible since
+		// the length of the small-array remains unchanged
+		small_array.set(&a, 2, 'X')
+		small_array.set(&a, 3, 'Y')
+		small_array.set(&a, 4, 'Z')
+		fmt.println(small_array.slice(&a))
+
+		// resizing makes the change visible
+		small_array.non_zero_resize(&a, 100)
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[A, B]
+	[Z, B]
+	[Z, B]
+	[Z, B, X, Y, Z]
+
+*/
 set :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, item: T) {
 	a.data[index] = item
 }

-resize :: proc "contextless" (a: ^$A/Small_Array, length: int) {
+/*
+Tries to resize the small-array to the specified length.
+
+The memory of added elements will be zeroed out.
+
+The new length will be:
+	- `length` if `length` <= capacity
+	- capacity if length > capacity
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `length`: The new desired length
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	resize_example :: proc() {
+		a: small_array.Small_Array(5, int)
+
+		small_array.push_back(&a, 1)
+		small_array.push_back(&a, 2)
+		fmt.println(small_array.slice(&a))
+
+		small_array.resize(&a, 1)
+		fmt.println(small_array.slice(&a))
+
+		small_array.resize(&a, 100)
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[1, 2]
+	[1]
+	[1, 0, 0, 0, 0]
+*/
+resize :: proc "contextless" (a: ^$A/Small_Array($N, $T), length: int) {
+	prev_len := a.len
+	a.len = min(length, builtin.len(a.data))
+	if prev_len < a.len {
+		intrinsics.mem_zero(&a.data[prev_len], size_of(T)*(a.len-prev_len))
+	}
+}
+
+/*
+Tries to resize the small-array to the specified length.
+
+The new length will be:
+	- `length` if `length` <= capacity
+	- capacity if length > capacity
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `length`: The new desired length
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	non_zero_resize :: proc() {
+		a: small_array.Small_Array(5, int)
+
+		small_array.push_back(&a, 1)
+		small_array.push_back(&a, 2)
+		fmt.println(small_array.slice(&a))
+
+		small_array.non_zero_resize(&a, 1)
+		fmt.println(small_array.slice(&a))
+
+		small_array.non_zero_resize(&a, 100)
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[1, 2]
+	[1]
+	[1, 2, 0, 0, 0]
+*/
+non_zero_resize :: proc "contextless" (a: ^$A/Small_Array, length: int) {
 	a.len = min(length, builtin.len(a.data))
 }

+/*
+Attempts to add the given element to the end.

+**Inputs**
+- `a`: A pointer to the small-array
+- `item`: The item to append
+
+**Returns** 
+- true if there was enough space to fit the element, false otherwise
+
+Example:
+	
+	import "core:container/small_array"
+	import "core:fmt"
+
+	push_back_example :: proc() {
+		a: small_array.Small_Array(2, int)
+
+		assert(small_array.push_back(&a, 1), "this should fit")
+		assert(small_array.push_back(&a, 2), "this should fit")
+		assert(!small_array.push_back(&a, 3), "this should not fit")
+
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[1, 2]
+*/
 push_back :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
 	if a.len < cap(a^) {
 		a.data[a.len] = item
@@ -66,6 +371,39 @@ push_back :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
 	return false
 }

+/*
+Attempts to add the given element at the beginning.
+This operation assumes that the small-array is not empty.
+
+Note: Performing this operation will cause pointers obtained
+through get_ptr(_save) to reference incorrect elements.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `item`: The item to append
+
+**Returns** 
+- true if there was enough space to fit the element, false otherwise
+
+Example:
+	
+	import "core:container/small_array"
+	import "core:fmt"
+
+	push_front_example :: proc() {
+		a: small_array.Small_Array(2, int)
+
+		assert(small_array.push_front(&a, 2), "this should fit")
+		assert(small_array.push_front(&a, 1), "this should fit")
+		assert(!small_array.push_back(&a, 0), "this should not fit")
+
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[1, 2]
+*/
 push_front :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
 	if a.len < cap(a^) {
 		a.len += 1
@@ -77,6 +415,35 @@ push_front :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T) -> bool {
 	return false
 }

+/*
+Removes and returns the last element of the small-array.
+This operation assumes that the small-array is not empty.
+
+**Inputs**
+- `a`: A pointer to the small-array
+
+**Returns** 
+- a copy of the element removed from the end of the small-array
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	pop_back_example :: proc() {
+		a: small_array.Small_Array(5, int)
+		small_array.push(&a, 0, 1, 2)
+
+		fmt.println("BEFORE:", small_array.slice(&a))
+		small_array.pop_back(&a)
+		fmt.println("AFTER: ", small_array.slice(&a))
+	}
+
+Output:
+
+	BEFORE: [0, 1, 2]
+	AFTER:  [0, 1]
+*/
 pop_back :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) -> T {
 	assert(condition=(N > 0 && a.len > 0), loc=loc)
 	item := a.data[a.len-1]
@@ -84,6 +451,38 @@ pop_back :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) ->
 	return item
 }

+/*
+Removes and returns the first element of the small-array.
+This operation assumes that the small-array is not empty.
+
+Note: Performing this operation will cause pointers obtained
+through get_ptr(_save) to reference incorrect elements.
+
+**Inputs**
+- `a`: A pointer to the small-array
+
+**Returns** 
+- a copy of the element removed from the beginning of the small-array
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	pop_front_example :: proc() {
+		a: small_array.Small_Array(5, int)
+		small_array.push(&a, 0, 1, 2)
+
+		fmt.println("BEFORE:", small_array.slice(&a))
+		small_array.pop_front(&a)
+		fmt.println("AFTER: ", small_array.slice(&a))
+	}
+
+Output:
+
+	BEFORE: [0, 1, 2]
+	AFTER:  [1, 2]
+*/
 pop_front :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) -> T {
 	assert(condition=(N > 0 && a.len > 0), loc=loc)
 	item := a.data[0]
@@ -93,6 +492,32 @@ pop_front :: proc "odin" (a: ^$A/Small_Array($N, $T), loc := #caller_location) -
 	return item
 }

+/*
+Attempts to remove and return the last element of the small array.
+Unlike `pop_back`, it does not assume that the array is non-empty.
+
+**Inputs**
+- `a`: A pointer to the small-array
+
+**Returns** 
+- a copy of the element removed from the end of the small-array
+- true if the small-array was not empty, false otherwise
+
+Example:
+
+	import "core:container/small_array"
+
+	pop_back_safe_example :: proc() {
+		a: small_array.Small_Array(3, int)
+		small_array.push(&a, 1)
+
+		el, ok := small_array.pop_back_safe(&a)
+		assert(ok, "there was an element in the array")
+
+		el, ok = small_array.pop_back_safe(&a)
+		assert(!ok, "there was NO element in the array")
+	}
+*/
 pop_back_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, ok: bool) {
 	if N > 0 && a.len > 0 {
 		item = a.data[a.len-1]
@@ -102,6 +527,35 @@ pop_back_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, ok
 	return
 }

+/*
+Attempts to remove and return the first element of the small array.
+Unlike `pop_front`, it does not assume that the array is non-empty.
+
+Note: Performing this operation will cause pointers obtained
+through get_ptr(_save) to reference incorrect elements.
+
+**Inputs**
+- `a`: A pointer to the small-array
+
+**Returns** 
+- a copy of the element removed from the beginning of the small-array
+- true if the small-array was not empty, false otherwise
+
+Example:
+
+	import "core:container/small_array"
+
+	pop_front_safe_example :: proc() {
+		a: small_array.Small_Array(3, int)
+		small_array.push(&a, 1)
+
+		el, ok := small_array.pop_front_safe(&a)
+		assert(ok, "there was an element in the array")
+
+		el, ok = small_array.pop_front_(&a)
+		assert(!ok, "there was NO element in the array")
+	}
+*/
 pop_front_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, ok: bool) {
 	if N > 0 && a.len > 0 {
 		item = a.data[0]
@@ -113,11 +567,70 @@ pop_front_safe :: proc "contextless" (a: ^$A/Small_Array($N, $T)) -> (item: T, o
 	return
 }

+/*
+Decreases the length of the small-array by the given amount.
+The elements are therefore not really removed and can be
+recovered by calling `resize`.
+
+Note: This procedure assumes that the array has a sufficient length.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `count`: The amount the length should be reduced by
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	consume_example :: proc() {
+		a: small_array.Small_Array(3, int)
+		small_array.push(&a, 0, 1, 2)
+
+		fmt.println("BEFORE:", small_array.slice(&a))
+		small_array.consume(&a, 2)
+		fmt.println("AFTER :", small_array.slice(&a))
+	}
+
+Output:
+
+	BEFORE: [0, 1, 2]
+	AFTER : [0]
+*/
 consume :: proc "odin" (a: ^$A/Small_Array($N, $T), count: int, loc := #caller_location) {
 	assert(condition=a.len >= count, loc=loc)
 	a.len -= count
 }

+/*
+Removes the element at the specified index while retaining order.
+
+Note: Performing this operation will cause pointers obtained
+through get_ptr(_save) to reference incorrect elements.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `index`: The position of the element to remove
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	ordered_remove_example :: proc() {
+		a: small_array.Small_Array(4, int)
+		small_array.push(&a, 0, 1, 2, 3)
+
+		fmt.println("BEFORE:", small_array.slice(&a))
+		small_array.ordered_remove(&a, 1)
+		fmt.println("AFTER :", small_array.slice(&a))
+	}
+
+Output:
+
+	BEFORE: [0, 1, 2, 3]
+	AFTER : [0, 2, 3]
+*/
 ordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
 	runtime.bounds_check_error_loc(loc, index, a.len)
 	if index+1 < a.len {
@@ -126,6 +639,32 @@ ordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, lo
 	a.len -= 1
 }

+/*
+Removes the element at the specified index without retaining order.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `index`: The position of the element to remove
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	unordered_remove_example :: proc() {
+		a: small_array.Small_Array(4, int)
+		small_array.push(&a, 0, 1, 2, 3)
+
+		fmt.println("BEFORE:", small_array.slice(&a))
+		small_array.unordered_remove(&a, 1)
+		fmt.println("AFTER :", small_array.slice(&a))
+	}
+
+Output:
+
+	BEFORE: [0, 1, 2, 3]
+	AFTER : [0, 3, 2]
+*/
 unordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int, loc := #caller_location) #no_bounds_check {
 	runtime.bounds_check_error_loc(loc, index, a.len)
 	n := a.len-1
@@ -135,10 +674,63 @@ unordered_remove :: proc "contextless" (a: ^$A/Small_Array($N, $T), index: int,
 	a.len -= 1
 }

+/*
+Sets the length of the small-array to 0.
+
+**Inputs**
+- `a`: A pointer to the small-array
+
+Example:
+	
+	import "core:container/small_array"
+	import "core:fmt"
+
+	clear_example :: proc() {
+		a: small_array.Small_Array(4, int)
+		small_array.push(&a, 0, 1, 2, 3)
+
+		fmt.println("BEFORE:", small_array.slice(&a))
+		small_array.clear(&a)
+		fmt.println("AFTER :", small_array.slice(&a))
+	}
+
+Output:
+
+	BEFORE: [0, 1, 2, 3]
+	AFTER : []
+
+*/
 clear :: proc "contextless" (a: ^$A/Small_Array($N, $T)) {
 	resize(a, 0)
 }

+/*
+Attempts to append all elements to the small-array returning
+false if there is not enough space to fit all of them.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `item`: The item to append
+- ..:
+
+**Returns**
+- true if there was enough space to fit the element, false otherwise
+
+Example:
+	
+	import "core:container/small_array"
+	import "core:fmt"
+
+	push_back_elems_example :: proc() {
+		a: small_array.Small_Array(100, int)
+		small_array.push_back_elems(&a, 0, 1, 2, 3, 4)
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[0, 1, 2, 3, 4]
+*/
 push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) -> bool {
 	if a.len + builtin.len(items) <= cap(a^) {
 		n := copy(a.data[a.len:], items[:])
@@ -148,6 +740,36 @@ push_back_elems :: proc "contextless" (a: ^$A/Small_Array($N, $T), items: ..T) -
 	return false
 }

+/*
+Tries to insert an element at the specified position.
+
+Note: Performing this operation will cause pointers obtained
+through get_ptr(_save) to reference incorrect elements.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `item`: The item to insert
+- `index`: The index to insert the item at
+
+**Returns**
+- true if there was enough space to fit the element, false otherwise
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	inject_at_example :: proc() {
+		arr: small_array.Small_Array(100, rune)
+		small_array.push(&arr,  'A', 'C', 'D')
+		small_array.inject_at(&arr, 'B', 1)
+		fmt.println(small_array.slice(&arr))
+	}
+
+Output:
+
+	[A, B, C, D]
+*/
 inject_at :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T, index: int) -> bool #no_bounds_check {
 	if a.len < cap(a^) && index >= 0 && index <= len(a^) {
 		a.len += 1
@@ -160,7 +782,38 @@ inject_at :: proc "contextless" (a: ^$A/Small_Array($N, $T), item: T, index: int
 	return false
 }

+// Alias for `push_back`
 append_elem  :: push_back
+// Alias for `push_back_elems`
 append_elems :: push_back_elems
+
+/*
+Tries to append the element(s) to the small-array.
+
+**Inputs**
+- `a`: A pointer to the small-array
+- `item`: The item to append
+- ..:
+
+**Returns**
+- true if there was enough space to fit the element, false otherwise
+
+Example:
+
+	import "core:container/small_array"
+	import "core:fmt"
+
+	push_example :: proc() {
+		a: small_array.Small_Array(100, int)
+		small_array.push(&a, 0)
+		small_array.push(&a, 1, 2, 3, 4)
+		fmt.println(small_array.slice(&a))
+	}
+
+Output:
+
+	[0, 1, 2, 3, 4]
+*/
 push   :: proc{push_back, push_back_elems}
+// Alias for `push`
 append :: proc{push_back, push_back_elems}
--- a/core/crypto/_aes/aes.odin
+++ b/core/crypto/_aes/aes.odin
@@ -25,4 +25,5 @@ GHASH_BLOCK_SIZE :: 16
 GHASH_TAG_SIZE :: 16

 // RCON is the AES keyschedule round constants.
+@(rodata)
 RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}
--- a/core/crypto/_aes/ct64/ct64.odin
+++ b/core/crypto/_aes/ct64/ct64.odin
@@ -22,8 +22,6 @@

 package aes_ct64

-import "base:intrinsics"
-
 // Bitsliced AES for 64-bit general purpose (integer) registers.  Each
 // invocation will process up to 4 blocks at a time.  This implementation
 // is derived from the BearSSL ct64 code, and distributed under a 1-clause
@@ -212,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) {
 }

@(require_results)
-interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
-	if len(w) < 4 {
-		intrinsics.trap()
-	}
-	x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
+interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
+	x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
 	x0 |= (x0 << 16)
 	x1 |= (x1 << 16)
 	x2 |= (x2 << 16)
--- a/core/crypto/_aes/ct64/ct64_enc.odin
+++ b/core/crypto/_aes/ct64/ct64_enc.odin
@@ -22,12 +22,8 @@

 package aes_ct64

-import "base:intrinsics"
-
 add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check {
-	if len(sk) < 8 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(sk) >= 8, "aes/ct64: invalid round key size")

 	q[0] ~= sk[0]
 	q[1] ~= sk[1]
--- a/core/crypto/_aes/ct64/ct64_keysched.odin
+++ b/core/crypto/_aes/ct64/ct64_keysched.odin
@@ -22,7 +22,6 @@

 package aes_ct64

-import "base:intrinsics"
 import "core:crypto/_aes"
 import "core:encoding/endian"
 import "core:mem"
@@ -42,7 +41,7 @@ sub_word :: proc "contextless" (x: u32) -> u32 {
 }

@(private, require_results)
-keysched :: proc(comp_skey: []u64, key: []byte) -> int {
+keysched :: proc "contextless" (comp_skey: []u64, key: []byte) -> int {
 	num_rounds, key_len := 0, len(key)
 	switch key_len {
 	case _aes.KEY_SIZE_128:
@@ -52,7 +51,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
 	case _aes.KEY_SIZE_256:
 		num_rounds = _aes.ROUNDS_256
 	case:
-		panic("crypto/aes: invalid AES key size")
+		panic_contextless("crypto/aes: invalid AES key size")
 	}

 	skey: [60]u32 = ---
@@ -78,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {

 	q: [8]u64 = ---
 	for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
-		q[0], q[4] = interleave_in(skey[i:])
+		q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3])
 		q[1] = q[0]
 		q[2] = q[0]
 		q[3] = q[0]
@@ -123,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
 		skey[v + 3] = (x3 << 4) - x3
 	}
 }
-
-orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
-	if len(qq) < 8 || len(key) != 16 {
-		intrinsics.trap()
-	}
-
-	skey: [4]u32 = ---
-	skey[0] = endian.unchecked_get_u32le(key[0:])
-	skey[1] = endian.unchecked_get_u32le(key[4:])
-	skey[2] = endian.unchecked_get_u32le(key[8:])
-	skey[3] = endian.unchecked_get_u32le(key[12:])
-
-	q: [8]u64 = ---
-	q[0], q[4] = interleave_in(skey[:])
-	q[1] = q[0]
-	q[2] = q[0]
-	q[3] = q[0]
-	q[5] = q[4]
-	q[6] = q[4]
-	q[7] = q[4]
-	orthogonalize(&q)
-
-	comp_skey: [2]u64 = ---
-	comp_skey[0] =
-		(q[0] & 0x1111111111111111) |
-		(q[1] & 0x2222222222222222) |
-		(q[2] & 0x4444444444444444) |
-		(q[3] & 0x8888888888888888)
-	comp_skey[1] =
-		(q[4] & 0x1111111111111111) |
-		(q[5] & 0x2222222222222222) |
-		(q[6] & 0x4444444444444444) |
-		(q[7] & 0x8888888888888888)
-
-	for x, u in comp_skey {
-		x0 := x
-		x1, x2, x3 := x0, x0, x0
-		x0 &= 0x1111111111111111
-		x1 &= 0x2222222222222222
-		x2 &= 0x4444444444444444
-		x3 &= 0x8888888888888888
-		x1 >>= 1
-		x2 >>= 2
-		x3 >>= 3
-		qq[u * 4 + 0] = (x0 << 4) - x0
-		qq[u * 4 + 1] = (x1 << 4) - x1
-		qq[u * 4 + 2] = (x2 << 4) - x2
-		qq[u * 4 + 3] = (x3 << 4) - x3
-	}
-
-	mem.zero_explicit(&skey, size_of(skey))
-	mem.zero_explicit(&q, size_of(q))
-	mem.zero_explicit(&comp_skey, size_of(comp_skey))
-}
--- a/core/crypto/_aes/ct64/ghash.odin
+++ b/core/crypto/_aes/ct64/ghash.odin
@@ -22,7 +22,6 @@

 package aes_ct64

-import "base:intrinsics"
 import "core:crypto/_aes"
 import "core:encoding/endian"

@@ -64,9 +63,8 @@ rev64 :: proc "contextless" (x: u64) -> u64 {
 // Note: `dst` is both an input and an output, to support easy implementation
 // of GCM.
 ghash :: proc "contextless" (dst, key, data: []byte) {
-	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(dst) == _aes.GHASH_BLOCK_SIZE)
+	ensure_contextless(len(key) == _aes.GHASH_BLOCK_SIZE)

 	buf := data
 	l := len(buf)
--- a/core/crypto/_aes/ct64/helpers.odin
+++ b/core/crypto/_aes/ct64/helpers.odin
@@ -1,60 +1,61 @@
 package aes_ct64

-import "base:intrinsics"
 import "core:crypto/_aes"
 import "core:encoding/endian"

-load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
-	if len(src) != _aes.BLOCK_SIZE {
-		intrinsics.trap()
-	}
-
-	w: [4]u32 = ---
-	w[0] = endian.unchecked_get_u32le(src[0:])
-	w[1] = endian.unchecked_get_u32le(src[4:])
-	w[2] = endian.unchecked_get_u32le(src[8:])
-	w[3] = endian.unchecked_get_u32le(src[12:])
-	q[0], q[4] = interleave_in(w[:])
-	orthogonalize(q)
+@(require_results)
+load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) #no_bounds_check {
+	w0 := endian.unchecked_get_u32le(src[0:])
+	w1 := endian.unchecked_get_u32le(src[4:])
+	w2 := endian.unchecked_get_u32le(src[8:])
+	w3 := endian.unchecked_get_u32le(src[12:])
+	return interleave_in(w0, w1, w2, w3)
 }

-store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
-	if len(dst) != _aes.BLOCK_SIZE {
-		intrinsics.trap()
-	}
-
-	orthogonalize(q)
-	w0, w1, w2, w3 := interleave_out(q[0], q[4])
+store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) #no_bounds_check {
+	w0, w1, w2, w3 := interleave_out(a0, a1)
 	endian.unchecked_put_u32le(dst[0:], w0)
 	endian.unchecked_put_u32le(dst[4:], w1)
 	endian.unchecked_put_u32le(dst[8:], w2)
 	endian.unchecked_put_u32le(dst[12:], w3)
 }

+@(require_results)
+xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
+	return a0 ~ b0, a1 ~ b1
+}
+
+@(require_results)
+and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
+	return a0 & b0, a1 & b1
+}
+
+load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
+	ensure_contextless(len(src) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
+
+	q[0], q[4] = #force_inline load_interleaved(src)
+	orthogonalize(q)
+}
+
+store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
+	ensure_contextless(len(dst) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
+
+	orthogonalize(q)
+	#force_inline store_interleaved(dst, q[0], q[4])
+}
+
 load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
-	if n := len(src); n > STRIDE || n == 0 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(src) == 0 || len(src) <= STRIDE, "aes/ct64: invalid block(s) size")

-	w: [4]u32 = ---
 	for s, i in src {
-		if len(s) != _aes.BLOCK_SIZE {
-			intrinsics.trap()
-		}
-
-		w[0] = endian.unchecked_get_u32le(s[0:])
-		w[1] = endian.unchecked_get_u32le(s[4:])
-		w[2] = endian.unchecked_get_u32le(s[8:])
-		w[3] = endian.unchecked_get_u32le(s[12:])
-		q[i], q[i + 4] = interleave_in(w[:])
+		ensure_contextless(len(s) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
+		q[i], q[i + 4] = #force_inline load_interleaved(s)
 	}
 	orthogonalize(q)
 }

 store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
-	if n := len(dst); n > STRIDE || n == 0 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(dst) == 0 || len(dst) <= STRIDE, "aes/ct64: invalid block(s) size")

 	orthogonalize(q)
 	for d, i in dst {
@@ -62,14 +63,7 @@ store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
 		if d == nil {
 			break
 		}
-		if len(d) != _aes.BLOCK_SIZE {
-			intrinsics.trap()
-		}
-
-		w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
-		endian.unchecked_put_u32le(d[0:], w0)
-		endian.unchecked_put_u32le(d[4:], w1)
-		endian.unchecked_put_u32le(d[8:], w2)
-		endian.unchecked_put_u32le(d[12:], w3)
+		ensure_contextless(len(d) == _aes.BLOCK_SIZE, "aes/ct64: invalid block size")
+		#force_inline store_interleaved(d, q[i], q[i + 4])
 	}
 }
--- a/core/crypto/_aes/hw_intel/api.odin
+++ b/core/crypto/_aes/hw_intel/api.odin
@@ -6,7 +6,7 @@ import "core:sys/info"
 // is_supported returns true iff hardware accelerated AES
 // is supported.
 is_supported :: proc "contextless" () -> bool {
-	features, ok := info.cpu_features.?
+	features, ok := info.cpu.features.?
 	if !ok {
 		return false
 	}
--- a/core/crypto/_aes/hw_intel/ghash.odin
+++ b/core/crypto/_aes/hw_intel/ghash.odin
@@ -52,7 +52,7 @@ GHASH_STRIDE_BYTES_HW :: GHASH_STRIDE_HW * _aes.GHASH_BLOCK_SIZE
 // that it is right-shifted by 1 bit. The left-shift is relatively
 // inexpensive, and it can be mutualised.
 //
-// Since SSE2 opcodes do not have facilities for shitfting full 128-bit
+// Since SSE2 opcodes do not have facilities for shifting full 128-bit
 // values with bit precision, we have to break down values into 64-bit
 // chunks. We number chunks from 0 to 3 in left to right order.

@@ -155,7 +155,7 @@ square_f128 :: #force_inline proc "contextless" (kw: x86.__m128i) -> (x86.__m128
@(enable_target_feature = "sse2,ssse3,pclmul")
 ghash :: proc "contextless" (dst, key, data: []byte) #no_bounds_check {
 	if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE {
-		intrinsics.trap()
+		panic_contextless("aes/ghash: invalid dst or key size")
 	}

 	// Note: BearSSL opts to copy the remainder into a zero-filled
--- a/core/crypto/_blake2/blake2.odin
+++ b/core/crypto/_blake2/blake2.odin
@@ -18,6 +18,8 @@ BLAKE2S_SIZE :: 32
 BLAKE2B_BLOCK_SIZE :: 128
 BLAKE2B_SIZE :: 64

+MAX_SIZE :: 255
+
 Blake2s_Context :: struct {
 	h:            [8]u32,
 	t:            [2]u32,
@@ -68,13 +70,13 @@ Blake2_Tree :: struct {
 	is_last_node:    bool,
 }

-@(private)
+@(private, rodata)
 BLAKE2S_IV := [8]u32 {
 	0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
 	0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
 }

-@(private)
+@(private, rodata)
 BLAKE2B_IV := [8]u64 {
 	0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
 	0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
@@ -82,16 +84,13 @@ BLAKE2B_IV := [8]u64 {
 	0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
 }

-init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
+init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) {
 	when T == Blake2s_Context {
 		max_size :: BLAKE2S_SIZE
 	} else when T == Blake2b_Context {
 		max_size :: BLAKE2B_SIZE
 	}
-
-	if cfg.size > max_size {
-		panic("blake2: requested output size exceeeds algorithm max")
-	}
+	ensure_contextless(cfg.size <= max_size, "blake2: requested output size exceeeds algorithm max")

 	// To save having to allocate a scratch buffer, use the internal
 	// data buffer (`ctx.x`), as it is exactly the correct size.
@@ -167,8 +166,8 @@ init :: proc(ctx: ^$T, cfg: ^Blake2_Config) {
 	ctx.is_initialized = true
 }

-update :: proc(ctx: ^$T, p: []byte) {
-	assert(ctx.is_initialized)
+update :: proc "contextless" (ctx: ^$T, p: []byte) {
+	ensure_contextless(ctx.is_initialized)

 	p := p
 	when T == Blake2s_Context {
@@ -195,8 +194,8 @@ update :: proc(ctx: ^$T, p: []byte) {
 	ctx.nx += copy(ctx.x[ctx.nx:], p)
 }

-final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
-	assert(ctx.is_initialized)
+final :: proc "contextless" (ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
+	ensure_contextless(ctx.is_initialized)

 	ctx := ctx
 	if finalize_clone {
@@ -206,24 +205,19 @@ final :: proc(ctx: ^$T, hash: []byte, finalize_clone: bool = false) {
 	}
 	defer(reset(ctx))

+	ensure_contextless(len(hash) >= int(ctx.size), "crypto/blake2: invalid destination digest size")
 	when T == Blake2s_Context {
-		if len(hash) < int(ctx.size) {
-			panic("crypto/blake2s: invalid destination digest size")
-		}
 		blake2s_final(ctx, hash)
 	} else when T == Blake2b_Context {
-		if len(hash) < int(ctx.size) {
-			panic("crypto/blake2b: invalid destination digest size")
-		}
 		blake2b_final(ctx, hash)
 	}
 }

-clone :: proc(ctx, other: ^$T) {
+clone :: proc "contextless" (ctx, other: ^$T) {
 	ctx^ = other^
 }

-reset :: proc(ctx: ^$T) {
+reset :: proc "contextless" (ctx: ^$T) {
 	if !ctx.is_initialized {
 		return
 	}
--- a/core/crypto/_chacha20/chacha20.odin
+++ b/core/crypto/_chacha20/chacha20.odin
@@ -1,6 +1,5 @@
 package _chacha20

-import "base:intrinsics"
 import "core:encoding/endian"
 import "core:math/bits"
 import "core:mem"
@@ -46,9 +45,8 @@ Context :: struct {
 // derivation is expected to be handled by the caller, so that the
 // HChaCha call can be suitably accelerated.
 init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {
-	if len(key) != KEY_SIZE || len(iv) != IV_SIZE {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(key) == KEY_SIZE, "chacha20: invalid key size")
+	ensure_contextless(len(iv) == IV_SIZE, "chacha20: invalid key size")

 	k, n := key, iv

@@ -76,12 +74,10 @@ init :: proc "contextless" (ctx: ^Context, key, iv: []byte, is_xchacha: bool) {

 // seek seeks the (X)ChaCha20 stream counter to the specified block.
 seek :: proc(ctx: ^Context, block_nr: u64) {
-	assert(ctx._is_initialized)
+	ensure(ctx._is_initialized)

 	if ctx._is_ietf_flavor {
-		if block_nr > MAX_CTR_IETF {
-			panic("crypto/chacha20: attempted to seek past maximum counter")
-		}
+		ensure(block_nr <= MAX_CTR_IETF, "crypto/chacha20: attempted to seek past maximum counter")
 	} else {
 		ctx._s[13] = u32(block_nr >> 32)
 	}
@@ -102,7 +98,7 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {
 	// Enforce the maximum consumed keystream per IV.
 	//
 	// While all modern "standard" definitions of ChaCha20 use
-	// the IETF 32-bit counter, for XChaCha20 most common
+	// the IETF 32-bit counter, for XChaCha20 historical
 	// implementations allow for a 64-bit counter.
 	//
 	// Honestly, the answer here is "use a MRAE primitive", but
@@ -110,14 +106,14 @@ check_counter_limit :: proc(ctx: ^Context, nr_blocks: int) {

 	ERR_CTR_EXHAUSTED :: "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached"

+	ctr_ok: bool
 	if ctx._is_ietf_flavor {
-		if u64(ctx._s[12]) + u64(nr_blocks) > MAX_CTR_IETF {
-			panic(ERR_CTR_EXHAUSTED)
-		}
+		ctr_ok = u64(ctx._s[12]) + u64(nr_blocks) <= MAX_CTR_IETF
 	} else {
 		ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
-		if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
-			panic(ERR_CTR_EXHAUSTED)
-		}
+		_, carry := bits.add_u64(ctr, u64(nr_blocks), 0)
+		ctr_ok = carry == 0
 	}
+
+	ensure(ctr_ok, "crypto/chacha20: maximum (X)ChaCha20 keystream per IV reached")
 }
--- a/core/crypto/_chacha20/simd128/chacha20_simd128.odin
+++ b/core/crypto/_chacha20/simd128/chacha20_simd128.odin
@@ -29,11 +29,24 @@ when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
 	// explicitly using simd.u8x16 shuffles.
 	@(private = "file")
 	TARGET_SIMD_FEATURES :: "sse2,ssse3"
+} else when ODIN_ARCH == .riscv64 {
+	@(private = "file")
+	TARGET_SIMD_FEATURES :: "v"
 } else {
 	@(private = "file")
 	TARGET_SIMD_FEATURES :: ""
 }

+// Some targets lack runtime feature detection, and will flat out refuse
+// to load binaries that have unknown instructions.  This is distinct from
+// `simd.HAS_HARDWARE_SIMD` as actually good designs support runtime feature
+// detection and that constant establishes a baseline.
+//
+// See:
+// - https://github.com/WebAssembly/design/issues/1161
+@(private = "file")
+TARGET_IS_DESIGNED_BY_IDIOTS :: (ODIN_ARCH == .wasm64p32 || ODIN_ARCH == .wasm32) && !intrinsics.has_target_feature("simd128")
+
@(private = "file")
 _ROT_7L: simd.u32x4 : {7, 7, 7, 7}
@(private = "file")
@@ -205,14 +218,16 @@ _store_simd128 :: #force_inline proc "contextless" (
 // is_performant returns true iff the target and current host both support
 // "enough" 128-bit SIMD to make this implementation performant.
 is_performant :: proc "contextless" () -> bool {
-	when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
+	when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 || ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 || ODIN_ARCH == .riscv64 {
 		when ODIN_ARCH == .arm64 || ODIN_ARCH == .arm32 {
 			req_features :: info.CPU_Features{.asimd}
 		} else when ODIN_ARCH == .amd64 || ODIN_ARCH == .i386 {
 			req_features :: info.CPU_Features{.sse2, .ssse3}
+		} else when ODIN_ARCH == .riscv64 {
+			req_features :: info.CPU_Features{.V}
 		}

-		features, ok := info.cpu_features.?
+		features, ok := info.cpu.features.?
 		if !ok {
 			return false
 		}
@@ -245,8 +260,17 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)

 	// 8 blocks at a time.
 	//
-	// Note: This is only worth it on Aarch64.
-	when ODIN_ARCH == .arm64 {
+	// Note:
+	// This uses a ton of registers so it is only worth it on targets
+	// that have something like 32 128-bit registers.  This is currently
+	// all ARMv8 targets, and RISC-V Zvl128b (`V` application profile)
+	// targets.
+	//
+	// While our current definition of `.arm32` is 32-bit ARMv8, this
+	// may change in the future (ARMv7 is still relevant), and things
+	// like Cortex-A8/A9 does "pretend" 128-bit SIMD 64-bits at a time
+	// thus needs bemchmarking.
+	when ODIN_ARCH == .arm64 || ODIN_ARCH == .riscv64 {
 		for ; n >= 8; n = n - 8 {
 			v0, v1, v2, v3 := s0, s1, s2, s3

@@ -354,9 +378,11 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)

 	// 4 blocks at a time.
 	//
-	// Note: The i386 target lacks the required number of registers
-	// for this to be performant, so it is skipped.
-	when ODIN_ARCH != .i386 {
+	// Note: This is skipped on several targets for various reasons.
+	// - i386 lacks the required number of registers
+	// - Generating code when runtime "hardware" SIMD support is impossible
+	//   to detect is pointless, since this will be emulated using GP regs.
+	when ODIN_ARCH != .i386 && !TARGET_IS_DESIGNED_BY_IDIOTS {
 		for ; n >= 4; n = n - 4 {
 			v0, v1, v2, v3 := s0, s1, s2, s3

--- a/core/crypto/_chacha20/simd256/chacha20_simd256.odin
+++ b/core/crypto/_chacha20/simd256/chacha20_simd256.odin
@@ -41,7 +41,7 @@ _VEC_TWO: simd.u64x4 : {2, 0, 2, 0}
 is_performant :: proc "contextless" () -> bool {
 	req_features :: info.CPU_Features{.avx, .avx2}

-	features, ok := info.cpu_features.?
+	features, ok := info.cpu.features.?
 	if !ok {
 		return false
 	}
--- a/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin
+++ b/core/crypto/_chacha20/simd256/chacha20_simd256_stub.odin
@@ -13,5 +13,5 @@ stream_blocks :: proc(ctx: ^_chacha20.Context, dst, src: []byte, nr_blocks: int)
 }

 hchacha20 :: proc "contextless" (dst, key, iv: []byte) {
-	intrinsics.trap()
+	panic_contextless("crypto/chacha20: simd256 implementation unsupported")
 }
--- a/core/crypto/_edwards25519/edwards25519.odin
+++ b/core/crypto/_edwards25519/edwards25519.odin
@@ -11,7 +11,6 @@ See:
 - https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
 */

-import "base:intrinsics"
 import "core:crypto"
 import field "core:crypto/_fiat/field_curve25519"
 import "core:mem"
@@ -32,6 +31,7 @@ import "core:mem"
 // - The group element decoding routine takes the opinionated stance of
 //   rejecting non-canonical encodings.

+@(rodata)
 FE_D := field.Tight_Field_Element {
 	929955233495203,
 	466365720129213,
@@ -39,7 +39,7 @@ FE_D := field.Tight_Field_Element {
 	2033849074728123,
 	1442794654840575,
 }
-@(private)
+@(private, rodata)
 FE_A := field.Tight_Field_Element {
 	2251799813685228,
 	2251799813685247,
@@ -47,7 +47,7 @@ FE_A := field.Tight_Field_Element {
 	2251799813685247,
 	2251799813685247,
 }
-@(private)
+@(private, rodata)
 FE_D2 := field.Tight_Field_Element {
 	1859910466990425,
 	932731440258426,
@@ -55,7 +55,7 @@ FE_D2 := field.Tight_Field_Element {
 	1815898335770999,
 	633789495995903,
 }
-@(private)
+@(private, rodata)
 GE_BASEPOINT := Group_Element {
 	field.Tight_Field_Element {
 		1738742601995546,
@@ -80,6 +80,7 @@ GE_BASEPOINT := Group_Element {
 		1821297809914039,
 	},
 }
+@(rodata)
 GE_IDENTITY := Group_Element {
 	field.Tight_Field_Element{0, 0, 0, 0, 0},
 	field.Tight_Field_Element{1, 0, 0, 0, 0},
@@ -107,9 +108,7 @@ ge_set :: proc "contextless" (ge, a: ^Group_Element) {

@(require_results)
 ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
-	if len(b) != 32 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(b) == 32, "edwards25519: invalid group element size")
 	b_ := (^[32]byte)(raw_data(b))

 	// Do the work in a scratch element, so that ge is unchanged on
@@ -166,9 +165,7 @@ ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
 }

 ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
-	if len(dst) != 32 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(dst) == 32, "edwards25519: invalid group element size")
 	dst_ := (^[32]byte)(raw_data(dst))

 	// Convert the element to affine (x, y) representation.
--- a/core/crypto/_edwards25519/edwards25519_scalar.odin
+++ b/core/crypto/_edwards25519/edwards25519_scalar.odin
@@ -1,6 +1,5 @@
 package _edwards25519

-import "base:intrinsics"
 import field "core:crypto/_fiat/field_scalar25519"
 import "core:mem"

@@ -8,7 +7,7 @@ Scalar :: field.Montgomery_Domain_Field_Element

 // WARNING: This is non-canonical and only to be used when checking if
 // a group element is on the prime-order subgroup.
-@(private)
+@(private, rodata)
 SC_ELL := field.Non_Montgomery_Domain_Field_Element {
 	field.ELL[0],
 	field.ELL[1],
@@ -25,17 +24,13 @@ sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {

@(require_results)
 sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
-	if len(b) != 32 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size")
 	b_ := (^[32]byte)(raw_data(b))
 	return field.fe_from_bytes(sc, b_)
 }

 sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
-	if len(b) != 32 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(b) == 32, "edwards25519: invalid scalar size")
 	b_ := (^[32]byte)(raw_data(b))
 	field.fe_from_bytes_rfc8032(sc, b_)
 }
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -42,9 +42,12 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64

+@(rodata)
 FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
+@(rodata)
 FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}

+@(rodata)
 FE_SQRT_M1 := Tight_Field_Element {
 	1718705420411056,
 	234908883556509,
--- a/core/crypto/_fiat/field_curve448/field.odin
+++ b/core/crypto/_fiat/field_curve448/field.odin
@@ -0,0 +1,235 @@
+package field_curve448
+
+import "core:mem"
+
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
+	return (^Loose_Field_Element)(arg1)
+}
+
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
+	return (^Tight_Field_Element)(arg1)
+}
+
+fe_clear :: proc "contextless" (
+	arg1: $T,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mem.zero_explicit(arg1, size_of(arg1^))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: $T,
+) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
+fe_carry_mul_small :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: u64,
+) {
+	arg2_ := Loose_Field_Element{arg2, 0, 0, 0, 0, 0, 0, 0}
+	fe_carry_mul(out1, arg1, &arg2_)
+}
+
+fe_carry_pow2k :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: uint,
+) {
+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
+	if arg2 == 0 {
+		fe_one(out1)
+		return
+	}
+
+	fe_carry_square(out1, arg1)
+	for _ in 1 ..< arg2 {
+		fe_carry_square(out1, fe_relax_cast(out1))
+	}
+}
+
+fe_carry_inv :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+) {
+	// Inversion computation is derived from the addition chain:
+	//
+	//	_10     = 2*1
+	//	_11     = 1 + _10
+	//	_110    = 2*_11
+	//	_111    = 1 + _110
+	//	_111000 = _111 << 3
+	//	_111111 = _111 + _111000
+	//	x12     = _111111 << 6 + _111111
+	//	x24     = x12 << 12 + x12
+	//	i34     = x24 << 6
+	//	x30     = _111111 + i34
+	//	x48     = i34 << 18 + x24
+	//	x96     = x48 << 48 + x48
+	//	x192    = x96 << 96 + x96
+	//	x222    = x192 << 30 + x30
+	//	x223    = 2*x222 + 1
+	//	return    (x223 << 223 + x222) << 2 + 1
+	//
+	// Operations: 447 squares 13 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	t0, t1, t2: Tight_Field_Element = ---, ---, ---
+
+	// Step 1: t0 = x^0x2
+	fe_carry_square(&t0, arg1)
+
+	// Step 2: t0 = x^0x3
+	fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
+
+	// t0.Sqr(t0)
+	fe_carry_square(&t0, fe_relax_cast(&t0))
+
+	// Step 4: t0 = x^0x7
+	fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
+
+	// Step 7: t1 = x^0x38
+	fe_carry_pow2k(&t1, fe_relax_cast(&t0), 3)
+
+	// Step 8: t0 = x^0x3f
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 14: t1 = x^0xfc0
+	fe_carry_pow2k(&t1, fe_relax_cast(&t0), 6)
+
+	// Step 15: t1 = x^0xfff
+	fe_carry_mul(&t1, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 27: t2 = x^0xfff000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 12)
+
+	// Step 28: t1 = x^0xffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 34: t2 = x^0x3fffffc0
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 6)
+
+	// Step 35: t0 = x^0x3fffffff
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t2))
+
+	// Step 53: t2 = x^0xffffff000000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t2), 18)
+
+	// Step 54: t1 = x^0xffffffffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 102: t2 = x^0xffffffffffff000000000000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 48)
+
+	// Step 103: t1 = x^0xffffffffffffffffffffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 199: t2 = x^0xffffffffffffffffffffffff000000000000000000000000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 96)
+
+	// Step 200: t1 = x^0xffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 230: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffc0000000
+	fe_carry_pow2k(&t1, fe_relax_cast(&t1), 30)
+
+	// Step 231: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 232: t1 = x^0x7ffffffffffffffffffffffffffffffffffffffffffffffffffffffe
+	fe_carry_square(&t1, fe_relax_cast(&t0))
+
+	// Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t1, arg1, fe_relax_cast(&t1))
+
+	// Step 456: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000
+	fe_carry_pow2k(&t1, fe_relax_cast(&t1), 223)
+
+	// Step 457: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 459: t0 = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffc
+	fe_carry_pow2k(&t0, fe_relax_cast(&t0), 2)
+
+	// Step 460: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd
+	fe_carry_mul(out1, arg1, fe_relax_cast(&t0))
+
+	fe_clear_vec([]^Tight_Field_Element{&t0, &t1, &t2})
+}
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+	out1[5] = 0
+	out1[6] = 0
+	out1[7] = 0
+}
+
+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 1
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+	out1[5] = 0
+	out1[6] = 0
+	out1[7] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	x5 := arg1[4]
+	x6 := arg1[5]
+	x7 := arg1[6]
+	x8 := arg1[7]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+	out1[5] = x6
+	out1[6] = x7
+	out1[7] = x8
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	x = (out1[4] ~ out2[4]) & mask
+	x5, y5 := out1[4] ~ x, out2[4] ~ x
+	x = (out1[5] ~ out2[5]) & mask
+	x6, y6 := out1[5] ~ x, out2[5] ~ x
+	x = (out1[6] ~ out2[6]) & mask
+	x7, y7 := out1[6] ~ x, out2[6] ~ x
+	x = (out1[7] ~ out2[7]) & mask
+	x8, y8 := out1[7] ~ x, out2[7] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+	out1[4], out2[4] = x5, y5
+	out1[5], out2[5] = x6, y6
+	out1[6], out2[6] = x7, y7
+	out1[7], out2[7] = x8, y8
+}
--- a/core/crypto/_fiat/field_curve448/field51.odin
+++ b/core/crypto/_fiat/field_curve448/field51.odin
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -1,6 +1,5 @@
 package field_poly1305

-import "base:intrinsics"
 import "core:encoding/endian"
 import "core:mem"

@@ -29,9 +28,7 @@ fe_from_bytes :: #force_inline proc "contextless" (
 	// makes implementing the actual MAC block processing considerably
 	// neater.

-	if len(arg1) != 16 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(arg1) == 16, "poly1305: invalid field element size")

 	// While it may be unwise to do deserialization here on our
 	// own when fiat-crypto provides equivalent functionality,
--- a/core/crypto/_fiat/field_scalar25519/field.odin
+++ b/core/crypto/_fiat/field_scalar25519/field.odin
@@ -1,18 +1,17 @@
 package field_scalar25519

-import "base:intrinsics"
 import "core:encoding/endian"
 import "core:math/bits"
 import "core:mem"

-@(private)
+@(private, rodata)
 _TWO_168 := Montgomery_Domain_Field_Element {
 	0x5b8ab432eac74798,
 	0x38afddd6de59d5d7,
 	0xa2c131b399411b7c,
 	0x6329a7ed9ce5a30,
 }
-@(private)
+@(private, rodata)
 _TWO_336 := Montgomery_Domain_Field_Element {
 	0xbd3d108e2b35ecc5,
 	0x5c3a3718bdf9c90b,
@@ -95,9 +94,8 @@ fe_from_bytes_wide :: proc "contextless" (
@(private)
 _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) {
 	// INVARIANT: len(arg1) < 32.
-	if len(arg1) >= 32 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(arg1) < 32, "edwards25519: oversized short scalar")
+
 	tmp: [32]byte
 	copy(tmp[:], arg1)

@@ -106,9 +104,7 @@ _fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Eleme
 }

 fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
-	if len(out1) != 32 {
-		intrinsics.trap()
-	}
+	ensure_contextless(len(out1) == 32, "edwards25519: oversized scalar output buffer")

 	tmp: Non_Montgomery_Domain_Field_Element
 	fe_from_montgomery(&tmp, arg1)
--- a/core/crypto/_sha3/sha3.odin
+++ b/core/crypto/_sha3/sha3.odin
@@ -44,7 +44,7 @@ Context :: struct {
 	is_finalized:   bool, // For SHAKE (unlimited squeeze is allowed)
 }

-@(private)
+@(private, rodata)
 keccakf_rndc := [?]u64 {
 	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
 	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
@@ -56,13 +56,13 @@ keccakf_rndc := [?]u64 {
 	0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
 }

-@(private)
+@(private, rodata)
 keccakf_rotc := [?]int {
 	1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
 	27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
 }

-@(private)
+@(private, rodata)
 keccakf_piln := [?]i32 {
 	10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
 	15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
@@ -122,7 +122,7 @@ keccakf :: proc "contextless" (st: ^[25]u64) {
 	}
 }

-init :: proc(ctx: ^Context) {
+init :: proc "contextless" (ctx: ^Context) {
 	for i := 0; i < 25; i += 1 {
 		ctx.st.q[i] = 0
 	}
@@ -133,9 +133,9 @@ init :: proc(ctx: ^Context) {
 	ctx.is_finalized = false
 }

-update :: proc(ctx: ^Context, data: []byte) {
-	assert(ctx.is_initialized)
-	assert(!ctx.is_finalized)
+update :: proc "contextless" (ctx: ^Context, data: []byte) {
+	ensure_contextless(ctx.is_initialized)
+	ensure_contextless(!ctx.is_finalized)

 	j := ctx.pt
 	for i := 0; i < len(data); i += 1 {
@@ -149,12 +149,9 @@ update :: proc(ctx: ^Context, data: []byte) {
 	ctx.pt = j
 }

-final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
-	assert(ctx.is_initialized)
-
-	if len(hash) < ctx.mdlen {
-		panic("crypto/sha3: invalid destination digest size")
-	}
+final :: proc "contextless" (ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
+	ensure_contextless(ctx.is_initialized)
+	ensure_contextless(len(hash) >= ctx.mdlen, "crypto/sha3: invalid destination digest size")

 	ctx := ctx
 	if finalize_clone {
@@ -173,11 +170,11 @@ final :: proc(ctx: ^Context, hash: []byte, finalize_clone: bool = false) {
 	}
 }

-clone :: proc(ctx, other: ^Context) {
+clone :: proc "contextless" (ctx, other: ^Context) {
 	ctx^ = other^
 }

-reset :: proc(ctx: ^Context) {
+reset :: proc "contextless" (ctx: ^Context) {
 	if !ctx.is_initialized {
 		return
 	}
@@ -185,9 +182,9 @@ reset :: proc(ctx: ^Context) {
 	mem.zero_explicit(ctx, size_of(ctx^))
 }

-shake_xof :: proc(ctx: ^Context) {
-	assert(ctx.is_initialized)
-	assert(!ctx.is_finalized)
+shake_xof :: proc "contextless" (ctx: ^Context) {
+	ensure_contextless(ctx.is_initialized)
+	ensure_contextless(!ctx.is_finalized)

 	ctx.st.b[ctx.pt] ~= ctx.dsbyte
 	ctx.st.b[ctx.rsiz - 1] ~= 0x80
@@ -197,9 +194,9 @@ shake_xof :: proc(ctx: ^Context) {
 	ctx.is_finalized = true // No more absorb, unlimited squeeze.
 }

-shake_out :: proc(ctx: ^Context, hash: []byte) {
-	assert(ctx.is_initialized)
-	assert(ctx.is_finalized)
+shake_out :: proc "contextless" (ctx: ^Context, hash: []byte) {
+	ensure_contextless(ctx.is_initialized)
+	ensure_contextless(ctx.is_finalized)

 	j := ctx.pt
 	for i := 0; i < len(hash); i += 1 {
--- a/core/crypto/_sha3/sp800_185.odin
+++ b/core/crypto/_sha3/sp800_185.odin
@@ -3,7 +3,7 @@ package _sha3
 import "core:encoding/endian"
 import "core:math/bits"

-init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
+init_cshake :: proc "contextless" (ctx: ^Context, n, s: []byte, sec_strength: int) {
 	ctx.mdlen = sec_strength / 8

 	// No domain separator is equivalent to vanilla SHAKE.
@@ -18,7 +18,7 @@ init_cshake :: proc(ctx: ^Context, n, s: []byte, sec_strength: int) {
 	bytepad(ctx, [][]byte{n, s}, rate_cshake(sec_strength))
 }

-final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
+final_cshake :: proc "contextless" (ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
 	ctx := ctx
 	if finalize_clone {
 		tmp_ctx: Context
@@ -32,7 +32,7 @@ final_cshake :: proc(ctx: ^Context, dst: []byte, finalize_clone: bool = false) {
 	shake_out(ctx, dst)
 }

-rate_cshake :: #force_inline proc(sec_strength: int) -> int {
+rate_cshake :: #force_inline proc "contextless" (sec_strength: int) -> int {
 	switch sec_strength {
 	case 128:
 		return RATE_128
@@ -40,7 +40,7 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int {
 		return RATE_256
 	}

-	panic("crypto/sha3: invalid security strength")
+	panic_contextless("crypto/sha3: invalid security strength")
 }

 // right_encode and left_encode are defined to support 0 <= x < 2^2040
@@ -52,10 +52,10 @@ rate_cshake :: #force_inline proc(sec_strength: int) -> int {
 //
 // Thus we support 0 <= x < 2^128.

-@(private)
+@(private, rodata)
 _PAD: [RATE_128]byte // Biggest possible value of w per spec.

-bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
+bytepad :: proc "contextless" (ctx: ^Context, x_strings: [][]byte, w: int) {
 	// 1. z = left_encode(w) || X.
 	z_hi: u64
 	z_lo := left_right_encode(ctx, 0, u64(w), true)
@@ -70,9 +70,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {

 		// This isn't actually possible, at least with the currently
 		// defined SP 800-185 routines.
-		if carry != 0 {
-			panic("crypto/sha3: bytepad input length overflow")
-		}
+		ensure_contextless(carry == 0, "crypto/sha3: bytepad input length overflow")
 	}

 	// We skip this step as we are doing a byte-oriented implementation
@@ -95,7 +93,7 @@ bytepad :: proc(ctx: ^Context, x_strings: [][]byte, w: int) {
 	}
 }

-encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
+encode_string :: #force_inline proc "contextless" (ctx: ^Context, s: []byte) -> (u64, u64) {
 	l := encode_byte_len(ctx, len(s), true) // left_encode
 	update(ctx, s)

@@ -104,13 +102,13 @@ encode_string :: #force_inline proc(ctx: ^Context, s: []byte) -> (u64, u64) {
 	return hi, lo
 }

-encode_byte_len :: #force_inline proc(ctx: ^Context, l: int, is_left: bool) -> u64 {
+encode_byte_len :: #force_inline proc "contextless" (ctx: ^Context, l: int, is_left: bool) -> u64 {
 	hi, lo := bits.mul_u64(u64(l), 8)
 	return left_right_encode(ctx, hi, lo, is_left)
 }

@(private)
-left_right_encode :: proc(ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
+left_right_encode :: proc "contextless" (ctx: ^Context, hi, lo: u64, is_left: bool) -> u64 {
 	HI_OFFSET :: 1
 	LO_OFFSET :: HI_OFFSET + 8
 	RIGHT_OFFSET :: LO_OFFSET + 8
--- a/core/crypto/aead/aead.odin
+++ b/core/crypto/aead/aead.odin
@@ -16,7 +16,7 @@ seal_oneshot :: proc(algo: Algorithm, dst, tag, key, iv, aad, plaintext: []byte,
 // returning true iff the authentication was successful.  If authentication
 // fails, the destination buffer will be zeroed.
 //
-// dst and plaintext MUST alias exactly or not at all.
+// dst and ciphertext MUST alias exactly or not at all.
@(require_results)
 open_oneshot :: proc(algo: Algorithm, dst, key, iv, aad, ciphertext, tag: []byte, impl: Implementation = nil) -> bool {
 	ctx: Context
--- a/core/crypto/aead/low_level.odin
+++ b/core/crypto/aead/low_level.odin
@@ -1,8 +1,10 @@
 package aead

+import "core:crypto/aegis"
 import "core:crypto/aes"
 import "core:crypto/chacha20"
 import "core:crypto/chacha20poly1305"
+import "core:crypto/deoxysii"
 import "core:reflect"

 // Implementation is an AEAD implementation.  Most callers will not need
@@ -15,7 +17,7 @@ Implementation :: union {

 // MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
 // Algorithms supported via this package.
-MAX_TAG_SIZE :: 16
+MAX_TAG_SIZE :: 32

 // Algorithm is the algorithm identifier associated with a given Context.
 Algorithm :: enum {
@@ -25,9 +27,14 @@ Algorithm :: enum {
 	AES_GCM_256,
 	CHACHA20POLY1305,
 	XCHACHA20POLY1305,
+	AEGIS_128L,
+	AEGIS_128L_256, // AEGIS-128L (256-bit tag)
+	AEGIS_256,
+	AEGIS_256_256, // AEGIS-256 (256-bit tag)
+	DEOXYS_II_256,
 }

-// ALGORITM_NAMES is the Agorithm to algorithm name string.
+// ALGORITM_NAMES is the Algorithm to algorithm name string.
 ALGORITHM_NAMES := [Algorithm]string {
 	.Invalid           = "Invalid",
 	.AES_GCM_128       = "AES-GCM-128",
@@ -35,6 +42,11 @@ ALGORITHM_NAMES := [Algorithm]string {
 	.AES_GCM_256       = "AES-GCM-256",
 	.CHACHA20POLY1305  = "chacha20poly1305",
 	.XCHACHA20POLY1305 = "xchacha20poly1305",
+	.AEGIS_128L        = "AEGIS-128L",
+	.AEGIS_128L_256    = "AEGIS-128L-256",
+	.AEGIS_256         = "AEGIS-256",
+	.AEGIS_256_256     = "AEGIS-256-256",
+	.DEOXYS_II_256     = "Deoxys-II-256",
 }

 // TAG_SIZES is the Algorithm to tag size in bytes.
@@ -45,6 +57,11 @@ TAG_SIZES := [Algorithm]int {
 	.AES_GCM_256       = aes.GCM_TAG_SIZE,
 	.CHACHA20POLY1305  = chacha20poly1305.TAG_SIZE,
 	.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
+	.AEGIS_128L        = aegis.TAG_SIZE_128,
+	.AEGIS_128L_256    = aegis.TAG_SIZE_256,
+	.AEGIS_256         = aegis.TAG_SIZE_128,
+	.AEGIS_256_256     = aegis.TAG_SIZE_256,
+	.DEOXYS_II_256     = deoxysii.TAG_SIZE,
 }

 // KEY_SIZES is the Algorithm to key size in bytes.
@@ -55,6 +72,11 @@ KEY_SIZES := [Algorithm]int {
 	.AES_GCM_256       = aes.KEY_SIZE_256,
 	.CHACHA20POLY1305  = chacha20poly1305.KEY_SIZE,
 	.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
+	.AEGIS_128L        = aegis.KEY_SIZE_128L,
+	.AEGIS_128L_256    = aegis.KEY_SIZE_128L,
+	.AEGIS_256         = aegis.KEY_SIZE_256,
+	.AEGIS_256_256     = aegis.KEY_SIZE_256,
+	.DEOXYS_II_256     = deoxysii.KEY_SIZE,
 }

 // IV_SIZES is the Algorithm to initialization vector size in bytes.
@@ -67,6 +89,11 @@ IV_SIZES := [Algorithm]int {
 	.AES_GCM_256       = aes.GCM_IV_SIZE,
 	.CHACHA20POLY1305  = chacha20poly1305.IV_SIZE,
 	.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
+	.AEGIS_128L        = aegis.IV_SIZE_128L,
+	.AEGIS_128L_256    = aegis.IV_SIZE_128L,
+	.AEGIS_256         = aegis.IV_SIZE_256,
+	.AEGIS_256_256     = aegis.IV_SIZE_256,
+	.DEOXYS_II_256     = deoxysii.IV_SIZE,
 }

 // Context is a concrete instantiation of a specific AEAD algorithm.
@@ -75,6 +102,8 @@ Context :: struct {
 	_impl: union {
 		aes.Context_GCM,
 		chacha20poly1305.Context,
+		aegis.Context,
+		deoxysii.Context,
 	},
 }

@@ -86,6 +115,11 @@ _IMPL_IDS := [Algorithm]typeid {
 	.AES_GCM_256       = typeid_of(aes.Context_GCM),
 	.CHACHA20POLY1305  = typeid_of(chacha20poly1305.Context),
 	.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
+	.AEGIS_128L        = typeid_of(aegis.Context),
+	.AEGIS_128L_256    = typeid_of(aegis.Context),
+	.AEGIS_256         = typeid_of(aegis.Context),
+	.AEGIS_256_256     = typeid_of(aegis.Context),
+	.DEOXYS_II_256     = typeid_of(deoxysii.Context),
 }

 // init initializes a Context with a specific AEAD Algorithm.
@@ -94,9 +128,7 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
 		reset(ctx)
 	}

-	if len(key) != KEY_SIZES[algorithm] {
-		panic("crypto/aead: invalid key size")
-	}
+	ensure(len(key) == KEY_SIZES[algorithm], "crypto/aead: invalid key size")

 	// Directly specialize the union by setting the type ID (save a copy).
 	reflect.set_union_variant_typeid(
@@ -113,6 +145,12 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
 	case .XCHACHA20POLY1305:
 		impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
 		chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
+	case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256:
+		impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
+		aegis.init(&ctx._impl.(aegis.Context), key, impl_)
+	case .DEOXYS_II_256:
+		impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
+		deoxysii.init(&ctx._impl.(deoxysii.Context), key, impl_)
 	case .Invalid:
 		panic("crypto/aead: uninitialized algorithm")
 	case:
@@ -127,11 +165,17 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
 //
 // dst and plaintext MUST alias exactly or not at all.
 seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
+	ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size")
+
 	switch &impl in ctx._impl {
 	case aes.Context_GCM:
 		aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
 	case chacha20poly1305.Context:
 		chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
+	case aegis.Context:
+		aegis.seal(&impl, dst, tag, iv, aad, plaintext)
+	case deoxysii.Context:
+		deoxysii.seal(&impl, dst, tag, iv, aad, plaintext)
 	case:
 		panic("crypto/aead: uninitialized algorithm")
 	}
@@ -145,11 +189,17 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
 // dst and plaintext MUST alias exactly or not at all.
@(require_results)
 open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
+	ensure(len(tag) == TAG_SIZES[ctx._algo], "crypto/aead: invalid tag size")
+
 	switch &impl in ctx._impl {
 	case aes.Context_GCM:
 		return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
 	case chacha20poly1305.Context:
 		return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
+	case aegis.Context:
+		return aegis.open(&impl, dst, iv, aad, ciphertext, tag)
+	case deoxysii.Context:
+		return deoxysii.open(&impl, dst, iv, aad, ciphertext, tag)
 	case:
 		panic("crypto/aead: uninitialized algorithm")
 	}
@@ -163,6 +213,10 @@ reset :: proc(ctx: ^Context) {
 		aes.reset_gcm(&impl)
 	case chacha20poly1305.Context:
 		chacha20poly1305.reset(&impl)
+	case aegis.Context:
+		aegis.reset(&impl)
+	case deoxysii.Context:
+		deoxysii.reset(&impl)
 	case:
 		// Calling reset repeatedly is fine.
 	}
--- a/core/crypto/aegis/aegis.odin
+++ b/core/crypto/aegis/aegis.odin
@@ -0,0 +1,213 @@
+/*
+package aegis implements the AEGIS-128L and AEGIS-256 Authenticated
+Encryption with Additional Data algorithms.
+
+See:
+- [[ https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-12.txt ]]
+*/
+package aegis
+
+import "core:bytes"
+import "core:crypto"
+import "core:crypto/aes"
+import "core:mem"
+
+// KEY_SIZE_128L is the AEGIS-128L key size in bytes.
+KEY_SIZE_128L :: 16
+// KEY_SIZE_256 is the AEGIS-256 key size in bytes.
+KEY_SIZE_256 :: 32
+// IV_SIZE_128L is the AEGIS-128L IV size in bytes.
+IV_SIZE_128L :: 16
+// IV_SIZE_256 is the AEGIS-256 IV size in bytes.
+IV_SIZE_256 :: 32
+// TAG_SIZE_128 is the AEGIS-128L or AEGIS-256 128-bit tag size in bytes.
+TAG_SIZE_128 :: 16
+// TAG_SIZE_256 is the AEGIS-128L or AEGIS-256 256-bit tag size in bytes.
+TAG_SIZE_256 :: 32
+
+@(private)
+_RATE_128L :: 32
+@(private)
+_RATE_256 :: 16
+@(private)
+_RATE_MAX :: _RATE_128L
+
+@(private, rodata)
+_C0 := [16]byte{
+	0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
+	0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
+}
+
+@(private, rodata)
+_C1 := [16]byte {
+	0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
+	0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
+}
+
+// Context is a keyed AEGIS-128L or AEGIS-256 instance.
+Context :: struct {
+	_key:            [KEY_SIZE_256]byte,
+	_key_len:        int,
+	_impl:           aes.Implementation,
+	_is_initialized: bool,
+}
+
+@(private)
+_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) {
+	switch len(tag) {
+	case TAG_SIZE_128, TAG_SIZE_256:
+	case:
+		panic("crypto/aegis: invalid tag size")
+	}
+
+	iv_ok: bool
+	switch ctx._key_len {
+	case KEY_SIZE_128L:
+		iv_ok = len(iv) == IV_SIZE_128L
+	case KEY_SIZE_256:
+		iv_ok = len(iv) == IV_SIZE_256
+	}
+	ensure(iv_ok,"crypto/aegis: invalid IV size")
+
+	#assert(size_of(int) == 8 || size_of(int) <= 4)
+	// As A_MAX and P_MAX are both defined to be 2^61 - 1 bytes, and
+	// the maximum length of a slice is bound by `size_of(int)`, where
+	// `int` is register sized, there is no need to check AAD/text
+	// lengths.
+}
+
+// init initializes a Context with the provided key, for AEGIS-128L or AEGIS-256.
+init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) {
+	switch len(key) {
+	case KEY_SIZE_128L, KEY_SIZE_256:
+	case:
+		panic("crypto/aegis: invalid key size")
+	}
+
+	copy(ctx._key[:], key)
+	ctx._key_len = len(key)
+	ctx._impl = impl
+	if ctx._impl == .Hardware && !is_hardware_accelerated() {
+		ctx._impl = .Portable
+	}
+	ctx._is_initialized = true
+}
+
+// seal encrypts the plaintext and authenticates the aad and ciphertext,
+// with the provided Context and iv, stores the output in dst and tag.
+//
+// dst and plaintext MUST alias exactly or not at all.
+seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
+	ensure(ctx._is_initialized)
+
+	_validate_common_slice_sizes(ctx, tag, iv, aad, plaintext)
+	ensure(len(dst) == len(plaintext), "crypto/aegis: invalid destination ciphertext size")
+	ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aegis: dst and plaintext alias inexactly")
+
+	switch ctx._impl {
+	case .Hardware:
+		st: State_HW
+		defer reset_state_hw(&st)
+
+		init_hw(ctx, &st, iv)
+
+		aad_len, pt_len := len(aad), len(plaintext)
+		if aad_len > 0 {
+			absorb_hw(&st, aad)
+		}
+
+		if pt_len > 0 {
+			enc_hw(&st, dst, plaintext)
+		}
+
+		finalize_hw(&st, tag, aad_len, pt_len)
+	case .Portable:
+		st: State_SW
+		defer reset_state_sw(&st)
+
+		init_sw(ctx, &st, iv)
+
+		aad_len, pt_len := len(aad), len(plaintext)
+		if aad_len > 0 {
+			absorb_sw(&st, aad)
+		}
+
+		if pt_len > 0 {
+			enc_sw(&st, dst, plaintext)
+		}
+
+		finalize_sw(&st, tag, aad_len, pt_len)
+	case:
+		panic("core/crypto/aegis: not implemented")
+	}
+}
+
+// open authenticates the aad and ciphertext, and decrypts the ciphertext,
+// with the provided Context, iv, and tag, and stores the output in dst,
+// returning true iff the authentication was successful.  If authentication
+// fails, the destination buffer will be zeroed.
+//
+// dst and plaintext MUST alias exactly or not at all.
+@(require_results)
+open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
+	ensure(ctx._is_initialized)
+
+	_validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext)
+	ensure(len(dst) == len(ciphertext), "crypto/aegis: invalid destination plaintext size")
+	ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aegis: dst and ciphertext alias inexactly")
+
+	tmp: [TAG_SIZE_256]byte
+	derived_tag := tmp[:len(tag)]
+	aad_len, ct_len := len(aad), len(ciphertext)
+
+	switch ctx._impl {
+	case .Hardware:
+		st: State_HW
+		defer reset_state_hw(&st)
+
+		init_hw(ctx, &st, iv)
+
+		if aad_len > 0 {
+			absorb_hw(&st, aad)
+		}
+
+		if ct_len > 0 {
+			dec_hw(&st, dst, ciphertext)
+		}
+
+		finalize_hw(&st, derived_tag, aad_len, ct_len)
+	case .Portable:
+		st: State_SW
+		defer reset_state_sw(&st)
+
+		init_sw(ctx, &st, iv)
+
+		if aad_len > 0 {
+			absorb_sw(&st, aad)
+		}
+
+		if ct_len > 0 {
+			dec_sw(&st, dst, ciphertext)
+		}
+
+		finalize_sw(&st, derived_tag, aad_len, ct_len)
+	case:
+		panic("core/crypto/aegis: not implemented")
+	}
+
+	if crypto.compare_constant_time(tag, derived_tag) != 1 {
+		mem.zero_explicit(raw_data(derived_tag), len(derived_tag))
+		mem.zero_explicit(raw_data(dst), ct_len)
+		return false
+	}
+
+	return true
+}
+
+// reset sanitizes the Context.  The Context must be
+// re-initialized to be used again.
+reset :: proc "contextless" (ctx: ^Context) {
+	mem.zero_explicit(&ctx._key, len(ctx._key))
+	ctx._key_len = 0
+	ctx._is_initialized = false
+}
--- a/core/crypto/aegis/aegis_impl_ct64.odin
+++ b/core/crypto/aegis/aegis_impl_ct64.odin
@@ -0,0 +1,452 @@
+package aegis
+
+import aes "core:crypto/_aes/ct64"
+import "core:encoding/endian"
+import "core:mem"
+
+// This uses the bitlsiced 64-bit general purpose register SWAR AES
+// round function.  The intermediate state is stored in interleaved
+// but NOT orthogonalized form, as leaving things in the orthgonalized
+// format would overly complicate the update implementation.
+//
+// Note/perf: Per Frank Denis and a review of the specification, it is
+// possible to gain slightly more performance by leaving the state in
+// orthogonalized form while doing initialization, finalization, and
+// absorbing AAD.  This implementation opts out of those optimizations
+// for the sake of simplicity.
+//
+// The update function leverages the paralleism (4xblocks) at once.
+
+@(private)
+State_SW :: struct {
+	s0_0, s0_1: u64,
+	s1_0, s1_1: u64,
+	s2_0, s2_1: u64,
+	s3_0, s3_1: u64,
+	s4_0, s4_1: u64,
+	s5_0, s5_1: u64,
+	s6_0, s6_1: u64,
+	s7_0, s7_1: u64,
+	q_k, q_b:   [8]u64,
+	rate:       int,
+}
+
+@(private)
+init_sw :: proc "contextless" (ctx: ^Context, st: ^State_SW, iv: []byte) {
+	switch ctx._key_len {
+	case KEY_SIZE_128L:
+		key_0, key_1 := aes.load_interleaved(ctx._key[:16])
+		iv_0, iv_1 := aes.load_interleaved(iv)
+
+		st.s0_0, st.s0_1 = aes.xor_interleaved(key_0, key_1, iv_0, iv_1)
+		st.s1_0, st.s1_1 = aes.load_interleaved(_C1[:])
+		st.s2_0, st.s2_1 = aes.load_interleaved(_C0[:])
+		st.s3_0, st.s3_1 = st.s1_0, st.s1_1
+		st.s4_0, st.s4_1 = st.s0_0, st.s0_1
+		st.s5_0, st.s5_1 = aes.xor_interleaved(key_0, key_1, st.s2_0, st.s2_1)
+		st.s6_0, st.s6_1 = aes.xor_interleaved(key_0, key_1, st.s1_0, st.s1_1)
+		st.s7_0, st.s7_1 = st.s5_0, st.s5_1
+		st.rate = _RATE_128L
+
+		for _ in 0 ..< 10 {
+			update_sw_128l(st, iv_0, iv_1, key_0, key_1)
+		}
+	case KEY_SIZE_256:
+		k0_0, k0_1 := aes.load_interleaved(ctx._key[:16])
+		k1_0, k1_1 := aes.load_interleaved(ctx._key[16:])
+		n0_0, n0_1 := aes.load_interleaved(iv[:16])
+		n1_0, n1_1 := aes.load_interleaved(iv[16:])
+
+		st.s0_0, st.s0_1 = aes.xor_interleaved(k0_0, k0_1, n0_0, n0_1)
+		st.s1_0, st.s1_1 = aes.xor_interleaved(k1_0, k1_1, n1_0, n1_1)
+		st.s2_0, st.s2_1 = aes.load_interleaved(_C1[:])
+		st.s3_0, st.s3_1 = aes.load_interleaved(_C0[:])
+		st.s4_0, st.s4_1 = aes.xor_interleaved(k0_0, k0_1, st.s3_0, st.s3_1)
+		st.s5_0, st.s5_1 = aes.xor_interleaved(k1_0, k1_1, st.s2_0, st.s2_1)
+		st.rate = _RATE_256
+
+		u0_0, u0_1, u1_0, u1_1 := st.s0_0, st.s0_1, st.s1_0, st.s1_1
+		for _ in 0 ..< 4 {
+			update_sw_256(st, k0_0, k0_1)
+			update_sw_256(st, k1_0, k1_1)
+			update_sw_256(st, u0_0, u0_1)
+			update_sw_256(st, u1_0, u1_1)
+		}
+	}
+}
+
+@(private = "file")
+update_sw_128l :: proc "contextless" (st: ^State_SW, m0_0, m0_1, m1_0, m1_1: u64) {
+	st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m0_0, m0_1)
+	st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
+	st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
+	st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
+	aes.orthogonalize(&st.q_k)
+
+	st.q_b[0], st.q_b[4] = st.s7_0, st.s7_1
+	st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
+	st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
+	st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
+	aes.orthogonalize(&st.q_b)
+
+	aes.sub_bytes(&st.q_b)
+	aes.shift_rows(&st.q_b)
+	aes.mix_columns(&st.q_b)
+	aes.add_round_key(&st.q_b, st.q_k[:])
+	aes.orthogonalize(&st.q_b)
+
+	st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
+	st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
+	st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
+	s3_0, s3_1 := st.q_b[3], st.q_b[7]
+
+	st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s4_0, st.s4_1, m1_0, m1_1)
+	st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
+	st.q_k[2], st.q_k[6] = st.s6_0, st.s6_1
+	st.q_k[3], st.q_k[7] = st.s7_0, st.s7_1
+	aes.orthogonalize(&st.q_k)
+
+	st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
+	st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
+	st.q_b[2], st.q_b[6] = st.s5_0, st.s5_1
+	st.q_b[3], st.q_b[7] = st.s6_0, st.s6_1
+	aes.orthogonalize(&st.q_b)
+
+	aes.sub_bytes(&st.q_b)
+	aes.shift_rows(&st.q_b)
+	aes.mix_columns(&st.q_b)
+	aes.add_round_key(&st.q_b, st.q_k[:])
+	aes.orthogonalize(&st.q_b)
+
+	st.s3_0, st.s3_1 = s3_0, s3_1
+	st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
+	st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
+	st.s6_0, st.s6_1 = st.q_b[2], st.q_b[6]
+	st.s7_0, st.s7_1 = st.q_b[3], st.q_b[7]
+}
+
+@(private = "file")
+update_sw_256 :: proc "contextless" (st: ^State_SW, m_0, m_1: u64) {
+	st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m_0, m_1)
+	st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
+	st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
+	st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
+	aes.orthogonalize(&st.q_k)
+
+	st.q_b[0], st.q_b[4] = st.s5_0, st.s5_1
+	st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
+	st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
+	st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
+	aes.orthogonalize(&st.q_b)
+
+	aes.sub_bytes(&st.q_b)
+	aes.shift_rows(&st.q_b)
+	aes.mix_columns(&st.q_b)
+	aes.add_round_key(&st.q_b, st.q_k[:])
+	aes.orthogonalize(&st.q_b)
+
+	st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
+	st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
+	st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
+	s3_0, s3_1 := st.q_b[3], st.q_b[7]
+
+	st.q_k[0], st.q_k[4] = st.s4_0, st.s4_1
+	st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
+	aes.orthogonalize(&st.q_k)
+
+	st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
+	st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
+	aes.orthogonalize(&st.q_b)
+
+	aes.sub_bytes(&st.q_b)
+	aes.shift_rows(&st.q_b)
+	aes.mix_columns(&st.q_b)
+	aes.add_round_key(&st.q_b, st.q_k[:])
+	aes.orthogonalize(&st.q_b)
+
+	st.s3_0, st.s3_1 = s3_0, s3_1
+	st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
+	st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
+}
+
+@(private = "file")
+absorb_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) #no_bounds_check {
+	t0_0, t0_1 := aes.load_interleaved(ai[:16])
+	t1_0, t1_1 := aes.load_interleaved(ai[16:])
+	update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
+}
+
+@(private = "file")
+absorb_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) {
+	m_0, m_1 := aes.load_interleaved(ai)
+	update_sw_256(st, m_0, m_1)
+}
+
+@(private)
+absorb_sw :: proc "contextless" (st: ^State_SW, aad: []byte) #no_bounds_check {
+	ai, l := aad, len(aad)
+
+	switch st.rate {
+	case _RATE_128L:
+		for l >= _RATE_128L {
+			absorb_sw_128l(st, ai)
+			ai = ai[_RATE_128L:]
+			l -= _RATE_128L
+		}
+	case _RATE_256:
+		for l >= _RATE_256 {
+			absorb_sw_256(st, ai)
+
+			ai = ai[_RATE_256:]
+			l -= _RATE_256
+		}
+	}
+
+	// Pad out the remainder with `0`s till it is rate sized.
+	if l > 0 {
+		tmp: [_RATE_MAX]byte // AAD is not confidential.
+		copy(tmp[:], ai)
+		switch st.rate {
+		case _RATE_128L:
+			absorb_sw_128l(st, tmp[:])
+		case _RATE_256:
+			absorb_sw_256(st, tmp[:])
+		}
+	}
+}
+
+@(private = "file", require_results)
+z_sw_128l :: proc "contextless" (st: ^State_SW) -> (u64, u64, u64, u64) {
+	z0_0, z0_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
+	z0_0, z0_1 = aes.xor_interleaved(st.s1_0, st.s1_1, z0_0, z0_1)
+	z0_0, z0_1 = aes.xor_interleaved(st.s6_0, st.s6_1, z0_0, z0_1)
+
+	z1_0, z1_1 := aes.and_interleaved(st.s6_0, st.s6_1, st.s7_0, st.s7_1)
+	z1_0, z1_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z1_0, z1_1)
+	z1_0, z1_1 = aes.xor_interleaved(st.s2_0, st.s2_1, z1_0, z1_1)
+
+	return z0_0, z0_1, z1_0, z1_1
+}
+
+@(private = "file", require_results)
+z_sw_256 :: proc "contextless" (st: ^State_SW) -> (u64, u64) {
+	z_0, z_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
+	z_0, z_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z_0, z_1)
+	z_0, z_1 = aes.xor_interleaved(st.s4_0, st.s4_1, z_0, z_1)
+	return aes.xor_interleaved(st.s1_0, st.s1_1, z_0, z_1)
+}
+
+@(private = "file")
+enc_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
+	z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
+
+	t0_0, t0_1 := aes.load_interleaved(xi[:16])
+	t1_0, t1_1 := aes.load_interleaved(xi[16:])
+	update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
+
+	out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
+	out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
+	aes.store_interleaved(ci[:16], out0_0, out0_1)
+	aes.store_interleaved(ci[16:], out1_0, out1_1)
+}
+
+@(private = "file")
+enc_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
+	z_0, z_1 := z_sw_256(st)
+
+	xi_0, xi_1 := aes.load_interleaved(xi)
+	update_sw_256(st, xi_0, xi_1)
+
+	ci_0, ci_1 := aes.xor_interleaved(xi_0, xi_1, z_0, z_1)
+	aes.store_interleaved(ci, ci_0, ci_1)
+}
+
+@(private)
+enc_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
+	ci, xi, l := dst, src, len(src)
+
+	switch st.rate {
+	case _RATE_128L:
+		for l >= _RATE_128L {
+			enc_sw_128l(st, ci, xi)
+			ci = ci[_RATE_128L:]
+			xi = xi[_RATE_128L:]
+			l -= _RATE_128L
+		}
+	case _RATE_256:
+		for l >= _RATE_256 {
+			enc_sw_256(st, ci, xi)
+			ci = ci[_RATE_256:]
+			xi = xi[_RATE_256:]
+			l -= _RATE_256
+		}
+	}
+
+	// Pad out the remainder with `0`s till it is rate sized.
+	if l > 0 {
+		tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
+		copy(tmp[:], xi)
+		switch st.rate {
+		case _RATE_128L:
+			enc_sw_128l(st, tmp[:], tmp[:])
+		case _RATE_256:
+			enc_sw_256(st, tmp[:], tmp[:])
+		}
+		copy(ci, tmp[:l])
+	}
+}
+
+@(private = "file")
+dec_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
+	z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
+
+	t0_0, t0_1 := aes.load_interleaved(ci[:16])
+	t1_0, t1_1 := aes.load_interleaved(ci[16:])
+	out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
+	out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
+
+	update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
+	aes.store_interleaved(xi[:16], out0_0, out0_1)
+	aes.store_interleaved(xi[16:], out1_0, out1_1)
+}
+
+@(private = "file")
+dec_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
+	z_0, z_1 := z_sw_256(st)
+
+	ci_0, ci_1 := aes.load_interleaved(ci)
+	xi_0, xi_1 := aes.xor_interleaved(ci_0, ci_1, z_0, z_1)
+
+	update_sw_256(st, xi_0, xi_1)
+	aes.store_interleaved(xi, xi_0, xi_1)
+}
+
+@(private = "file")
+dec_partial_sw_128l :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
+	tmp: [_RATE_128L]byte
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
+	copy(tmp[:], cn)
+
+	t0_0, t0_1 := aes.load_interleaved(tmp[:16])
+	t1_0, t1_1 := aes.load_interleaved(tmp[16:])
+	out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
+	out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
+
+	aes.store_interleaved(tmp[:16], out0_0, out0_1)
+	aes.store_interleaved(tmp[16:], out1_0, out1_1)
+	copy(xn, tmp[:])
+
+	for off := len(xn); off < _RATE_128L; off += 1 {
+		tmp[off] = 0
+	}
+	out0_0, out0_1 = aes.load_interleaved(tmp[:16])
+	out1_0, out1_1 = aes.load_interleaved(tmp[16:])
+	update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
+}
+
+@(private = "file")
+dec_partial_sw_256 :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
+	tmp: [_RATE_256]byte
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	z_0, z_1 := z_sw_256(st)
+	copy(tmp[:], cn)
+
+	cn_0, cn_1 := aes.load_interleaved(tmp[:])
+	xn_0, xn_1 := aes.xor_interleaved(cn_0, cn_1, z_0, z_1)
+
+	aes.store_interleaved(tmp[:], xn_0, xn_1)
+	copy(xn, tmp[:])
+
+	for off := len(xn); off < _RATE_256; off += 1 {
+		tmp[off] = 0
+	}
+	xn_0, xn_1 = aes.load_interleaved(tmp[:])
+	update_sw_256(st, xn_0, xn_1)
+}
+
+@(private)
+dec_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
+	xi, ci, l := dst, src, len(src)
+
+	switch st.rate {
+	case _RATE_128L:
+		for l >= _RATE_128L {
+			dec_sw_128l(st, xi, ci)
+			xi = xi[_RATE_128L:]
+			ci = ci[_RATE_128L:]
+			l -= _RATE_128L
+		}
+	case _RATE_256:
+		for l >= _RATE_256 {
+			dec_sw_256(st, xi, ci)
+			xi = xi[_RATE_256:]
+			ci = ci[_RATE_256:]
+			l -= _RATE_256
+		}
+	}
+
+	// Process the remainder.
+	if l > 0 {
+		switch st.rate {
+		case _RATE_128L:
+			dec_partial_sw_128l(st, xi, ci)
+		case _RATE_256:
+			dec_partial_sw_256(st, xi, ci)
+		}
+	}
+}
+
+@(private)
+finalize_sw :: proc "contextless" (st: ^State_SW, tag: []byte, ad_len, msg_len: int) {
+	tmp: [16]byte
+	endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
+	endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
+
+	t_0, t_1 := aes.load_interleaved(tmp[:])
+
+	t0_0, t0_1, t1_0, t1_1: u64 = ---, ---, ---, ---
+	switch st.rate {
+	case _RATE_128L:
+		t_0, t_1 = aes.xor_interleaved(st.s2_0, st.s2_1, t_0, t_1)
+		for _ in 0 ..< 7 {
+			update_sw_128l(st, t_0, t_1, t_0, t_1)
+		}
+
+		t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s3_0, st.s3_1)
+
+		t1_0, t1_1 = aes.xor_interleaved(st.s4_0, st.s4_1, st.s5_0, st.s5_1)
+		t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s6_0, st.s6_1)
+		if len(tag) == TAG_SIZE_256 {
+			t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s7_0, st.s7_1)
+		}
+	case _RATE_256:
+		t_0, t_1 = aes.xor_interleaved(st.s3_0, st.s3_1, t_0, t_1)
+		for _ in 0 ..< 7 {
+			update_sw_256(st, t_0, t_1)
+		}
+
+		t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
+
+		t1_0, t1_1 = aes.xor_interleaved(st.s3_0, st.s3_1, st.s4_0, st.s4_1)
+		t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s5_0, st.s5_1)
+	}
+	switch len(tag) {
+	case TAG_SIZE_128:
+		t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, t1_0, t1_1)
+		aes.store_interleaved(tag, t0_0, t0_1)
+	case TAG_SIZE_256:
+		aes.store_interleaved(tag[:16], t0_0, t0_1)
+		aes.store_interleaved(tag[16:], t1_0, t1_1)
+	}
+}
+
+@(private)
+reset_state_sw :: proc "contextless" (st: ^State_SW) {
+	mem.zero_explicit(st, size_of(st^))
+}
--- a/core/crypto/aegis/aegis_impl_hw_gen.odin
+++ b/core/crypto/aegis/aegis_impl_hw_gen.odin
@@ -0,0 +1,44 @@
+#+build !amd64
+package aegis
+
+@(private = "file")
+ERR_HW_NOT_SUPPORTED :: "crypto/aegis: hardware implementation unsupported"
+
+@(private)
+State_HW :: struct {}
+
+// is_hardware_accelerated returns true iff hardware accelerated AEGIS
+// is supported.
+is_hardware_accelerated :: proc "contextless" () -> bool {
+	return false
+}
+
+@(private)
+init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
+	panic_contextless(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) {
+	panic_contextless(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
+	panic_contextless(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
+	panic_contextless(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
+	panic_contextless(ERR_HW_NOT_SUPPORTED)
+}
+
+@(private)
+reset_state_hw :: proc "contextless" (st: ^State_HW) {
+	panic_contextless(ERR_HW_NOT_SUPPORTED)
+}
--- a/core/crypto/aegis/aegis_impl_hw_intel.odin
+++ b/core/crypto/aegis/aegis_impl_hw_intel.odin
@@ -0,0 +1,389 @@
+#+build amd64
+package aegis
+
+import "base:intrinsics"
+import "core:crypto/aes"
+import "core:encoding/endian"
+import "core:mem"
+import "core:simd/x86"
+
+@(private)
+State_HW :: struct {
+	s0:   x86.__m128i,
+	s1:   x86.__m128i,
+	s2:   x86.__m128i,
+	s3:   x86.__m128i,
+	s4:   x86.__m128i,
+	s5:   x86.__m128i,
+	s6:   x86.__m128i,
+	s7:   x86.__m128i,
+	rate: int,
+}
+
+// is_hardware_accelerated returns true iff hardware accelerated AEGIS
+// is supported.
+is_hardware_accelerated :: proc "contextless" () -> bool {
+	return aes.is_hardware_accelerated()
+}
+
+@(private, enable_target_feature = "sse2,aes")
+init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
+	switch ctx._key_len {
+	case KEY_SIZE_128L:
+		key := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
+		iv := intrinsics.unaligned_load((^x86.__m128i)(raw_data(iv)))
+
+		st.s0 = x86._mm_xor_si128(key, iv)
+		st.s1 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
+		st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
+		st.s3 = st.s1
+		st.s4 = st.s0
+		st.s5 = x86._mm_xor_si128(key, st.s2) // key ^ C0
+		st.s6 = x86._mm_xor_si128(key, st.s1) // key ^ C1
+		st.s7 = st.s5
+		st.rate = _RATE_128L
+
+		for _ in 0 ..< 10 {
+			update_hw_128l(st, iv, key)
+		}
+	case KEY_SIZE_256:
+		k0 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
+		k1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[16]))
+		n0 := intrinsics.unaligned_load((^x86.__m128i)(&iv[0]))
+		n1 := intrinsics.unaligned_load((^x86.__m128i)(&iv[16]))
+
+		st.s0 = x86._mm_xor_si128(k0, n0)
+		st.s1 = x86._mm_xor_si128(k1, n1)
+		st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
+		st.s3 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
+		st.s4 = x86._mm_xor_si128(k0, st.s3) // k0 ^ C0
+		st.s5 = x86._mm_xor_si128(k1, st.s2) // k1 ^ C1
+		st.rate = _RATE_256
+
+		u0, u1 := st.s0, st.s1
+		for _ in 0 ..< 4 {
+			update_hw_256(st, k0)
+			update_hw_256(st, k1)
+			update_hw_256(st, u0)
+			update_hw_256(st, u1)
+		}
+	}
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+update_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, m0, m1: x86.__m128i) {
+	s0_ := x86._mm_aesenc_si128(st.s7, x86._mm_xor_si128(st.s0, m0))
+	s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
+	s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
+	s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
+	s4_ := x86._mm_aesenc_si128(st.s3, x86._mm_xor_si128(st.s4, m1))
+	s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
+	s6_ := x86._mm_aesenc_si128(st.s5, st.s6)
+	s7_ := x86._mm_aesenc_si128(st.s6, st.s7)
+	st.s0, st.s1, st.s2, st.s3, st.s4, st.s5, st.s6, st.s7 = s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+update_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, m: x86.__m128i) {
+	s0_ := x86._mm_aesenc_si128(st.s5, x86._mm_xor_si128(st.s0, m))
+	s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
+	s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
+	s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
+	s4_ := x86._mm_aesenc_si128(st.s3, st.s4)
+	s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
+	st.s0, st.s1, st.s2, st.s3, st.s4, st.s5 = s0_, s1_, s2_, s3_, s4_, s5_
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+absorb_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&ai[16]))
+	update_hw_128l(st, t0, t1)
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+absorb_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
+	m := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
+	update_hw_256(st, m)
+}
+
+@(private, enable_target_feature = "sse2,aes")
+absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) #no_bounds_check {
+	ai, l := aad, len(aad)
+
+	switch st.rate {
+	case _RATE_128L:
+		for l >= _RATE_128L {
+			absorb_hw_128l(st, ai)
+			ai = ai[_RATE_128L:]
+			l -= _RATE_128L
+		}
+	case _RATE_256:
+		for l >= _RATE_256 {
+			absorb_hw_256(st, ai)
+
+			ai = ai[_RATE_256:]
+			l -= _RATE_256
+		}
+	}
+
+	// Pad out the remainder with `0`s till it is rate sized.
+	if l > 0 {
+		tmp: [_RATE_MAX]byte // AAD is not confidential.
+		copy(tmp[:], ai)
+		switch st.rate {
+		case _RATE_128L:
+			absorb_hw_128l(st, tmp[:])
+		case _RATE_256:
+			absorb_hw_256(st, tmp[:])
+		}
+	}
+}
+
+@(private = "file", enable_target_feature = "sse2", require_results)
+z_hw_128l :: #force_inline proc "contextless" (st: ^State_HW) -> (x86.__m128i, x86.__m128i) {
+	z0 := x86._mm_xor_si128(
+		st.s6,
+		x86._mm_xor_si128(
+			st.s1,
+			x86._mm_and_si128(st.s2, st.s3),
+		),
+	)
+	z1 := x86._mm_xor_si128(
+		st.s2,
+		x86._mm_xor_si128(
+			st.s5,
+			x86._mm_and_si128(st.s6, st.s7),
+		),
+	)
+	return z0, z1
+}
+
+@(private = "file", enable_target_feature = "sse2", require_results)
+z_hw_256 :: #force_inline proc "contextless" (st: ^State_HW) -> x86.__m128i {
+	return x86._mm_xor_si128(
+		st.s1,
+		x86._mm_xor_si128(
+			st.s4,
+			x86._mm_xor_si128(
+				st.s5,
+				x86._mm_and_si128(st.s2, st.s3),
+			),
+		),
+	)
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+enc_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
+	z0, z1 := z_hw_128l(st)
+
+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&xi[0]))
+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&xi[16]))
+	update_hw_128l(st, t0, t1)
+
+	out0 := x86._mm_xor_si128(t0, z0)
+	out1 := x86._mm_xor_si128(t1, z1)
+	intrinsics.unaligned_store((^x86.__m128i)(&ci[0]), out0)
+	intrinsics.unaligned_store((^x86.__m128i)(&ci[16]), out1)
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+enc_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
+	z := z_hw_256(st)
+
+	xi_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(xi)))
+	update_hw_256(st, xi_)
+
+	ci_ := x86._mm_xor_si128(xi_, z)
+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(ci)), ci_)
+}
+
+@(private, enable_target_feature = "sse2,aes")
+enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
+	ci, xi, l := dst, src, len(src)
+
+	switch st.rate {
+	case _RATE_128L:
+		for l >= _RATE_128L {
+			enc_hw_128l(st, ci, xi)
+			ci = ci[_RATE_128L:]
+			xi = xi[_RATE_128L:]
+			l -= _RATE_128L
+		}
+	case _RATE_256:
+		for l >= _RATE_256 {
+			enc_hw_256(st, ci, xi)
+			ci = ci[_RATE_256:]
+			xi = xi[_RATE_256:]
+			l -= _RATE_256
+		}
+	}
+
+	// Pad out the remainder with `0`s till it is rate sized.
+	if l > 0 {
+		tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
+		copy(tmp[:], xi)
+		switch st.rate {
+		case _RATE_128L:
+			enc_hw_128l(st, tmp[:], tmp[:])
+		case _RATE_256:
+			enc_hw_256(st, tmp[:], tmp[:])
+		}
+		copy(ci, tmp[:l])
+	}
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+dec_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
+	z0, z1 := z_hw_128l(st)
+
+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&ci[0]))
+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&ci[16]))
+	out0 := x86._mm_xor_si128(t0, z0)
+	out1 := x86._mm_xor_si128(t1, z1)
+
+	update_hw_128l(st, out0, out1)
+	intrinsics.unaligned_store((^x86.__m128i)(&xi[0]), out0)
+	intrinsics.unaligned_store((^x86.__m128i)(&xi[16]), out1)
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+dec_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
+	z := z_hw_256(st)
+
+	ci_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(ci)))
+	xi_ := x86._mm_xor_si128(ci_, z)
+
+	update_hw_256(st, xi_)
+	intrinsics.unaligned_store((^x86.__m128i)(raw_data(xi)), xi_)
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+dec_partial_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
+	tmp: [_RATE_128L]byte
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	z0, z1 := z_hw_128l(st)
+	copy(tmp[:], cn)
+
+	t0 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
+	t1 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[16]))
+	out0 := x86._mm_xor_si128(t0, z0)
+	out1 := x86._mm_xor_si128(t1, z1)
+
+	intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), out0)
+	intrinsics.unaligned_store((^x86.__m128i)(&tmp[16]), out1)
+	copy(xn, tmp[:])
+
+	for off := len(xn); off < _RATE_128L; off += 1 {
+		tmp[off] = 0
+	}
+	out0 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) // v0
+	out1 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) // v1
+	update_hw_128l(st, out0, out1)
+}
+
+@(private = "file", enable_target_feature = "sse2,aes")
+dec_partial_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
+	tmp: [_RATE_256]byte
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	z := z_hw_256(st)
+	copy(tmp[:], cn)
+
+	cn_ := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
+	xn_ := x86._mm_xor_si128(cn_, z)
+
+	intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), xn_)
+	copy(xn, tmp[:])
+
+	for off := len(xn); off < _RATE_256; off += 1 {
+		tmp[off] = 0
+	}
+	xn_ = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
+	update_hw_256(st, xn_)
+}
+
+@(private, enable_target_feature = "sse2,aes")
+dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
+	xi, ci, l := dst, src, len(src)
+
+	switch st.rate {
+	case _RATE_128L:
+		for l >= _RATE_128L {
+			dec_hw_128l(st, xi, ci)
+			xi = xi[_RATE_128L:]
+			ci = ci[_RATE_128L:]
+			l -= _RATE_128L
+		}
+	case _RATE_256:
+		for l >= _RATE_256 {
+			dec_hw_256(st, xi, ci)
+			xi = xi[_RATE_256:]
+			ci = ci[_RATE_256:]
+			l -= _RATE_256
+		}
+	}
+
+	// Process the remainder.
+	if l > 0 {
+		switch st.rate {
+		case _RATE_128L:
+			dec_partial_hw_128l(st, xi, ci)
+		case _RATE_256:
+			dec_partial_hw_256(st, xi, ci)
+		}
+	}
+}
+
+@(private, enable_target_feature = "sse2,aes")
+finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
+	tmp: [16]byte
+	endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
+	endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
+
+	t := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
+
+	t0, t1: x86.__m128i = ---, ---
+	switch st.rate {
+	case _RATE_128L:
+		t = x86._mm_xor_si128(st.s2, t)
+		for _ in 0 ..< 7 {
+			update_hw_128l(st, t, t)
+		}
+
+		t0 = x86._mm_xor_si128(st.s0, st.s1)
+		t0 = x86._mm_xor_si128(t0, st.s2)
+		t0 = x86._mm_xor_si128(t0, st.s3)
+
+		t1 = x86._mm_xor_si128(st.s4, st.s5)
+		t1 = x86._mm_xor_si128(t1, st.s6)
+		if len(tag) == TAG_SIZE_256 {
+			t1 = x86._mm_xor_si128(t1, st.s7)
+		}
+	case _RATE_256:
+		t = x86._mm_xor_si128(st.s3, t)
+		for _ in 0 ..< 7 {
+			update_hw_256(st, t)
+		}
+
+		t0 = x86._mm_xor_si128(st.s0, st.s1)
+		t0 = x86._mm_xor_si128(t0, st.s2)
+
+		t1 = x86._mm_xor_si128(st.s3, st.s4)
+		t1 = x86._mm_xor_si128(t1, st.s5)
+	}
+	switch len(tag) {
+	case TAG_SIZE_128:
+		t0 = x86._mm_xor_si128(t0, t1)
+		intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
+	case TAG_SIZE_256:
+		intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
+		intrinsics.unaligned_store((^x86.__m128i)(&tag[16]), t1)
+	}
+}
+
+@(private)
+reset_state_hw :: proc "contextless" (st: ^State_HW) {
+	mem.zero_explicit(st, size_of(st^))
+}
--- a/core/crypto/aes/aes_ctr.odin
+++ b/core/crypto/aes/aes_ctr.odin
@@ -21,9 +21,7 @@ Context_CTR :: struct {

 // init_ctr initializes a Context_CTR with the provided key and IV.
 init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTATION) {
-	if len(iv) != CTR_IV_SIZE {
-		panic("crypto/aes: invalid CTR IV size")
-	}
+	ensure(len(iv) == CTR_IV_SIZE, "crypto/aes: invalid CTR IV size")

 	init_impl(&ctx._impl, key, impl)
 	ctx._off = BLOCK_SIZE
@@ -36,16 +34,14 @@ init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := DEFAULT_IMPLEMENTAT
 // keystream, and writes the resulting output to dst.  dst and src MUST
 // alias exactly or not at all.
 xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
-	assert(ctx._is_initialized)
+	ensure(ctx._is_initialized)

 	src, dst := src, dst
 	if dst_len := len(dst); dst_len < len(src) {
 		src = src[:dst_len]
 	}

-	if bytes.alias_inexactly(dst, src) {
-		panic("crypto/aes: dst and src alias inexactly")
-	}
+	ensure(!bytes.alias_inexactly(dst, src), "crypto/aes: dst and src alias inexactly")

 	#no_bounds_check for remaining := len(src); remaining > 0; {
 		// Process multiple blocks at once
@@ -82,7 +78,7 @@ xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {

 // keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
 keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
-	assert(ctx._is_initialized)
+	ensure(ctx._is_initialized)

 	dst := dst
 	#no_bounds_check for remaining := len(dst); remaining > 0; {
--- a/core/crypto/aes/aes_ecb.odin
+++ b/core/crypto/aes/aes_ecb.odin
@@ -19,11 +19,9 @@ init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := DEFAULT_IMPLEMENTATION)

 // encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
 encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
-	assert(ctx._is_initialized)
-
-	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
-		panic("crypto/aes: invalid buffer size(s)")
-	}
+	ensure(ctx._is_initialized)
+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size")
+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size")

 	switch &impl in ctx._impl {
 	case ct64.Context:
@@ -35,11 +33,9 @@ encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {

 // decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
 decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
-	assert(ctx._is_initialized)
-
-	if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
-		panic("crypto/aes: invalid buffer size(s)")
-	}
+	ensure(ctx._is_initialized)
+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid dst size")
+	ensure(len(dst) == BLOCK_SIZE, "crypto/aes: invalid src size")

 	switch &impl in ctx._impl {
 	case ct64.Context:
--- a/core/crypto/aes/aes_gcm.odin
+++ b/core/crypto/aes/aes_gcm.odin
@@ -36,15 +36,11 @@ init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := DEFAULT_IMPLEMENTATION)
 //
 // dst and plaintext MUST alias exactly or not at all.
 seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
-	assert(ctx._is_initialized)
+	ensure(ctx._is_initialized)

 	gcm_validate_common_slice_sizes(tag, iv, aad, plaintext)
-	if len(dst) != len(plaintext) {
-		panic("crypto/aes: invalid destination ciphertext size")
-	}
-	if bytes.alias_inexactly(dst, plaintext) {
-		panic("crypto/aes: dst and plaintext alias inexactly")
-	}
+	ensure(len(dst) == len(plaintext), "crypto/aes: invalid destination ciphertext size")
+	ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aes: dst and plaintext alias inexactly")

 	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
 		gcm_seal_hw(&impl, dst, tag, iv, aad, plaintext)
@@ -76,15 +72,11 @@ seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, iv, aad, plaintext: []byte) {
 // dst and plaintext MUST alias exactly or not at all.
@(require_results)
 open_gcm :: proc(ctx: ^Context_GCM, dst, iv, aad, ciphertext, tag: []byte) -> bool {
-	assert(ctx._is_initialized)
+	ensure(ctx._is_initialized)

 	gcm_validate_common_slice_sizes(tag, iv, aad, ciphertext)
-	if len(dst) != len(ciphertext) {
-		panic("crypto/aes: invalid destination plaintext size")
-	}
-	if bytes.alias_inexactly(dst, ciphertext) {
-		panic("crypto/aes: dst and ciphertext alias inexactly")
-	}
+	ensure(len(dst) == len(ciphertext), "crypto/aes: invalid destination plaintext size")
+	ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aes: dst and ciphertext alias inexactly")

 	if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
 		return gcm_open_hw(&impl, dst, iv, aad, ciphertext, tag)
@@ -122,21 +114,13 @@ reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {

@(private = "file")
 gcm_validate_common_slice_sizes :: proc(tag, iv, aad, text: []byte) {
-	if len(tag) != GCM_TAG_SIZE {
-		panic("crypto/aes: invalid GCM tag size")
-	}
+	ensure(len(tag) == GCM_TAG_SIZE, "crypto/aes: invalid GCM tag size")

 	// The specification supports IVs in the range [1, 2^64) bits.
-	if l := len(iv); l == 0 || u64(l) >= GCM_IV_SIZE_MAX {
-		panic("crypto/aes: invalid GCM IV size")
-	}
+	ensure(len(iv) == 0 || u64(len(iv)) <= GCM_IV_SIZE_MAX, "crypto/aes: invalid GCM IV size")

-	if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
-		panic("crypto/aes: oversized GCM aad")
-	}
-	if text_len := u64(len(text)); text_len > GCM_P_MAX {
-		panic("crypto/aes: oversized GCM src data")
-	}
+	ensure(u64(len(aad)) <= GCM_A_MAX, "crypto/aes: oversized GCM aad")
+	ensure(u64(len(text)) <= GCM_P_MAX, "crypto/aes: oversized GCM data")
 }

@(private = "file")
--- a/core/crypto/aes/aes_gcm_hw_intel.odin
+++ b/core/crypto/aes/aes_gcm_hw_intel.odin
@@ -235,7 +235,7 @@ gctr_hw :: proc(
 // BUG: Sticking this in gctr_hw (like the other implementations) crashes
 // the compiler.
 //
-// src/check_expr.cpp(7892): Assertion Failure: `c->curr_proc_decl->entity`
+// src/check_expr.cpp(8104): Assertion Failure: `c->curr_proc_decl->entity`
@(private = "file", enable_target_feature = "sse4.1")
 hw_inc_ctr32 :: #force_inline proc "contextless" (src: ^x86.__m128i, ctr: u32) -> (x86.__m128i, u32) {
 	ret := x86._mm_insert_epi32(src^, i32(intrinsics.byte_swap(ctr)), 3)
--- a/core/crypto/blake2b/blake2b.odin
+++ b/core/crypto/blake2b/blake2b.odin
@@ -18,7 +18,7 @@ package blake2b
 import "../_blake2"

 // DIGEST_SIZE is the BLAKE2b digest size in bytes.
-DIGEST_SIZE :: 64
+DIGEST_SIZE :: _blake2.BLAKE2B_SIZE

 // BLOCK_SIZE is the BLAKE2b block size in bytes.
 BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
@@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2B_BLOCK_SIZE
 Context :: _blake2.Blake2b_Context

 // init initializes a Context with the default BLAKE2b config.
-init :: proc(ctx: ^Context) {
+init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
+	ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2b: invalid digest size")
+
 	cfg: _blake2.Blake2_Config
-	cfg.size = _blake2.BLAKE2B_SIZE
+	cfg.size = u8(digest_size)
 	_blake2.init(ctx, &cfg)
 }

--- a/core/crypto/blake2s/blake2s.odin
+++ b/core/crypto/blake2s/blake2s.odin
@@ -18,7 +18,7 @@ package blake2s
 import "../_blake2"

 // DIGEST_SIZE is the BLAKE2s digest size in bytes.
-DIGEST_SIZE :: 32
+DIGEST_SIZE :: _blake2.BLAKE2S_SIZE

 // BLOCK_SIZE is the BLAKE2s block size in bytes.
 BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
@@ -27,9 +27,11 @@ BLOCK_SIZE :: _blake2.BLAKE2S_BLOCK_SIZE
 Context :: _blake2.Blake2s_Context

 // init initializes a Context with the default BLAKE2s config.
-init :: proc(ctx: ^Context) {
+init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
+	ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2s: invalid digest size")
+
 	cfg: _blake2.Blake2_Config
-	cfg.size = _blake2.BLAKE2S_SIZE
+	cfg.size = u8(digest_size)
 	_blake2.init(ctx, &cfg)
 }

--- a/Show More
+++ b/Show More