core/crypto/aes: Add AES implementation

This commit is contained in:
Yawning Angel
2024-05-25 20:24:39 +09:00
parent 1ade62b630
commit c751e4b2eb
11 changed files with 1142 additions and 1 deletions

View File

@@ -10,7 +10,6 @@ KEY_SIZE_256 :: 32
// BLOCK_SIZE is the AES block size in bytes.
BLOCK_SIZE :: 16
// ROUNDS_128 is the number of rounds for AES-128.
ROUNDS_128 :: 10
// ROUNDS_192 is the number of rounds for AES-192.
@@ -22,6 +21,8 @@ ROUNDS_256 :: 14
GHASH_KEY_SIZE :: 16
// GHASH_BLOCK_SIZE is the GHASH block size in bytes.
GHASH_BLOCK_SIZE :: 16
// GHASH_TAG_SIZE is the GHASH tag size in bytes.
GHASH_TAG_SIZE :: 16
// RCON is the AES keyschedule round constants.
RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36}

22
core/crypto/aes/aes.odin Normal file
View File

@@ -0,0 +1,22 @@
/*
package aes implements the AES block cipher and some common modes.
See:
- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf
- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
*/
package aes
import "core:crypto/_aes"
// KEY_SIZE_128 is the AES-128 key size in bytes.
KEY_SIZE_128 :: _aes.KEY_SIZE_128
// KEY_SIZE_192 is the AES-192 key size in bytes.
KEY_SIZE_192 :: _aes.KEY_SIZE_192
// KEY_SIZE_256 is the AES-256 key size in bytes.
KEY_SIZE_256 :: _aes.KEY_SIZE_256
// BLOCK_SIZE is the AES block size in bytes.
BLOCK_SIZE :: _aes.BLOCK_SIZE

View File

@@ -0,0 +1,199 @@
package aes
import "core:crypto/_aes/ct64"
import "core:encoding/endian"
import "core:math/bits"
import "core:mem"
// CTR_IV_SIZE is the size of the CTR mode IV in bytes.
CTR_IV_SIZE :: 16
// Context_CTR is a keyed AES-CTR instance.
Context_CTR :: struct {
_impl: Context_Impl,
_buffer: [BLOCK_SIZE]byte,
_off: int,
_ctr_hi: u64,
_ctr_lo: u64,
_is_initialized: bool,
}
// init_ctr initializes a Context_CTR with the provided key and IV.
init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) {
if len(iv) != CTR_IV_SIZE {
panic("crypto/aes: invalid CTR IV size")
}
init_impl(&ctx._impl, key, impl)
ctx._off = BLOCK_SIZE
ctx._ctr_hi = endian.unchecked_get_u64be(iv[0:])
ctx._ctr_lo = endian.unchecked_get_u64be(iv[8:])
ctx._is_initialized = true
}
// xor_bytes_ctr XORs each byte in src with bytes taken from the AES-CTR
// keystream, and writes the resulting output to dst. dst and src MUST
// alias exactly or not at all.
xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) {
assert(ctx._is_initialized)
// TODO: Enforcing that dst and src alias exactly or not at all
// is a good idea, though odd aliasing should be extremely uncommon.
src, dst := src, dst
if dst_len := len(dst); dst_len < len(src) {
src = src[:dst_len]
}
for remaining := len(src); remaining > 0; {
// Process multiple blocks at once
if ctx._off == BLOCK_SIZE {
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * BLOCK_SIZE
ctr_blocks(ctx, dst, src, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
}
dst = dst[direct_bytes:]
src = src[direct_bytes:]
}
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
ctr_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
}
// Process partial blocks from the buffered keystream.
to_xor := min(BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
for i := 0; i < to_xor; i = i + 1 {
dst[i] = buffered_keystream[i] ~ src[i]
}
ctx._off += to_xor
dst = dst[to_xor:]
src = src[to_xor:]
remaining -= to_xor
}
}
// keystream_bytes_ctr fills dst with the raw AES-CTR keystream output.
keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) {
assert(ctx._is_initialized)
dst := dst
for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once
if ctx._off == BLOCK_SIZE {
if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * BLOCK_SIZE
ctr_blocks(ctx, dst, nil, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
}
dst = dst[direct_bytes:]
}
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
ctr_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
}
// Process partial blocks from the buffered keystream.
to_copy := min(BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
copy(dst[:to_copy], buffered_keystream[:to_copy])
ctx._off += to_copy
dst = dst[to_copy:]
remaining -= to_copy
}
}
// reset_ctr sanitizes the Context_CTR. The Context_CTR must be
// re-initialized to be used again.
reset_ctr :: proc "contextless" (ctx: ^Context_CTR) {
reset_impl(&ctx._impl)
ctx._off = 0
ctx._ctr_hi = 0
ctx._ctr_lo = 0
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
ctx._is_initialized = false
}
@(private)
ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
// Use the optimized hardware implementation if available.
if _, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
ctr_blocks_hw(ctx, dst, src, nr_blocks)
return
}
// Portable implementation.
ct64_inc_ctr := #force_inline proc "contextless" (dst: []byte, hi, lo: u64) -> (u64, u64) {
endian.unchecked_put_u64be(dst[0:], hi)
endian.unchecked_put_u64be(dst[8:], lo)
hi, lo := hi, lo
carry: u64
lo, carry = bits.add_u64(lo, 1, 0)
hi, _ = bits.add_u64(hi, 0, carry)
return hi, lo
}
impl := &ctx._impl.(ct64.Context)
src, dst := src, dst
nr_blocks := nr_blocks
ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo
tmp: [ct64.STRIDE][BLOCK_SIZE]byte = ---
ctrs: [ct64.STRIDE][]byte = ---
for i in 0 ..< ct64.STRIDE {
ctrs[i] = tmp[i][:]
}
for nr_blocks > 0 {
n := min(ct64.STRIDE, nr_blocks)
blocks := ctrs[:n]
for i in 0 ..< n {
ctr_hi, ctr_lo = ct64_inc_ctr(blocks[i], ctr_hi, ctr_lo)
}
ct64.encrypt_blocks(impl, blocks, blocks)
xor_blocks(dst, src, blocks)
if src != nil {
src = src[n * BLOCK_SIZE:]
}
dst = dst[n * BLOCK_SIZE:]
nr_blocks -= n
}
// Write back the counter.
ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo
mem.zero_explicit(&tmp, size_of(tmp))
}
@(private)
xor_blocks :: #force_inline proc "contextless" (dst, src: []byte, blocks: [][]byte) {
// Note: This would be faster `core:simd` was used, however if
// performance of this implementation matters to where that
// optimization would be worth it, use chacha20poly1305, or a
// CPU that isn't e-waste.
if src != nil {
#no_bounds_check {
for i in 0 ..< len(blocks) {
off := i * BLOCK_SIZE
for j in 0 ..< BLOCK_SIZE {
blocks[i][j] ~= src[off + j]
}
}
}
}
for i in 0 ..< len(blocks) {
copy(dst[i * BLOCK_SIZE:], blocks[i])
}
}

View File

@@ -0,0 +1,57 @@
package aes
import "core:crypto/_aes/ct64"
// Context_ECB is a keyed AES-ECB instance.
//
// WARNING: Using ECB mode is strongly discouraged unless it is being
// used to implement higher level constructs.
Context_ECB :: struct {
_impl: Context_Impl,
_is_initialized: bool,
}
// init_ecb initializes a Context_ECB with the provided key.
init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) {
init_impl(&ctx._impl, key, impl)
ctx._is_initialized = true
}
// encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst.
encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
assert(ctx._is_initialized)
if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
panic("crypto/aes: invalid buffer size(s)")
}
switch &impl in ctx._impl {
case ct64.Context:
ct64.encrypt_block(&impl, dst, src)
case Context_Impl_Hardware:
encrypt_block_hw(&impl, dst, src)
}
}
// decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst.
decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) {
assert(ctx._is_initialized)
if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE {
panic("crypto/aes: invalid buffer size(s)")
}
switch &impl in ctx._impl {
case ct64.Context:
ct64.decrypt_block(&impl, dst, src)
case Context_Impl_Hardware:
decrypt_block_hw(&impl, dst, src)
}
}
// reset_ecb sanitizes the Context_ECB. The Context_ECB must be
// re-initialized to be used again.
reset_ecb :: proc "contextless" (ctx: ^Context_ECB) {
reset_impl(&ctx._impl)
ctx._is_initialized = false
}

View File

@@ -0,0 +1,253 @@
package aes
import "core:crypto"
import "core:crypto/_aes"
import "core:crypto/_aes/ct64"
import "core:encoding/endian"
import "core:mem"
// GCM_NONCE_SIZE is the size of the GCM nonce in bytes.
GCM_NONCE_SIZE :: 12
// GCM_TAG_SIZE is the size of a GCM tag in bytes.
GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE
@(private)
GCM_A_MAX :: max(u64) / 8 // 2^64 - 1 bits -> bytes
@(private)
GCM_P_MAX :: 0xfffffffe0 // 2^39 - 256 bits -> bytes
// Context_GCM is a keyed AES-GCM instance.
Context_GCM :: struct {
_impl: Context_Impl,
_is_initialized: bool,
}
// init_gcm initializes a Context_GCM with the provided key.
init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) {
init_impl(&ctx._impl, key, impl)
ctx._is_initialized = true
}
// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext,
// with the provided Context_GCM and nonce, stores the output in dst and tag.
//
// dst and plaintext MUST alias exactly or not at all.
seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) {
assert(ctx._is_initialized)
gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext)
if len(dst) != len(plaintext) {
panic("crypto/aes: invalid destination ciphertext size")
}
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext)
return
}
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_ct64(ctx, &h, &j0, nonce)
// Note: Our GHASH implementation handles appending padding.
ct64.ghash(s[:], h[:], aad)
gctr_ct64(ctx, dst, &s, plaintext, &h, nonce, true)
final_ghash_ct64(&s, &h, &j0, len(aad), len(plaintext))
copy(tag, s[:])
mem.zero_explicit(&h, len(h))
mem.zero_explicit(&j0, len(j0))
}
// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext,
// with the provided Context_GCM, nonce, and tag, and stores the output in dst,
// returning true iff the authentication was successful. If authentication
// fails, the destination buffer will be zeroed.
//
// dst and plaintext MUST alias exactly or not at all.
open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
assert(ctx._is_initialized)
gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext)
if len(dst) != len(ciphertext) {
panic("crypto/aes: invalid destination plaintext size")
}
if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw {
return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag)
}
h: [_aes.GHASH_KEY_SIZE]byte
j0: [_aes.GHASH_BLOCK_SIZE]byte
s: [_aes.GHASH_TAG_SIZE]byte
init_ghash_ct64(ctx, &h, &j0, nonce)
ct64.ghash(s[:], h[:], aad)
gctr_ct64(ctx, dst, &s, ciphertext, &h, nonce, false)
final_ghash_ct64(&s, &h, &j0, len(aad), len(ciphertext))
ok := crypto.compare_constant_time(s[:], tag) == 1
if !ok {
mem.zero_explicit(raw_data(dst), len(dst))
}
mem.zero_explicit(&h, len(h))
mem.zero_explicit(&j0, len(j0))
mem.zero_explicit(&s, len(s))
return ok
}
// reset_ctr sanitizes the Context_GCM. The Context_GCM must be
// re-initialized to be used again.
reset_gcm :: proc "contextless" (ctx: ^Context_GCM) {
reset_impl(&ctx._impl)
ctx._is_initialized = false
}
@(private)
gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) {
if len(tag) != GCM_TAG_SIZE {
panic("crypto/aes: invalid GCM tag size")
}
// The specification supports nonces in the range [1, 2^64) bits
// however per NIST SP 800-38D 5.2.1.1:
//
// > For IVs, it is recommended that implementations restrict support
// > to the length of 96 bits, to promote interoperability, efficiency,
// > and simplicity of design.
if len(nonce) != GCM_NONCE_SIZE {
panic("crypto/aes: invalid GCM nonce size")
}
if aad_len := u64(len(aad)); aad_len > GCM_A_MAX {
panic("crypto/aes: oversized GCM aad")
}
if text_len := u64(len(text)); text_len > GCM_P_MAX {
panic("crypto/aes: oversized GCM src data")
}
}
@(private = "file")
init_ghash_ct64 :: proc(
ctx: ^Context_GCM,
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
nonce: []byte,
) {
impl := &ctx._impl.(ct64.Context)
// 1. Let H = CIPH(k, 0^128)
ct64.encrypt_block(impl, h[:], h[:])
// ECB encrypt j0, so that we can just XOR with the tag. In theory
// this could be processed along with the final GCTR block, to
// potentially save a call to AES-ECB, but... just use AES-NI.
copy(j0[:], nonce)
j0[_aes.GHASH_BLOCK_SIZE - 1] = 1
ct64.encrypt_block(impl, j0[:], j0[:])
}
@(private = "file")
final_ghash_ct64 :: proc(
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
j0: ^[_aes.GHASH_BLOCK_SIZE]byte,
a_len: int,
t_len: int,
) {
blk: [_aes.GHASH_BLOCK_SIZE]byte
endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8)
endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8)
ct64.ghash(s[:], h[:], blk[:])
for i in 0 ..< len(s) {
s[i] ~= j0[i]
}
}
@(private = "file")
gctr_ct64 :: proc(
ctx: ^Context_GCM,
dst: []byte,
s: ^[_aes.GHASH_BLOCK_SIZE]byte,
src: []byte,
h: ^[_aes.GHASH_KEY_SIZE]byte,
nonce: []byte,
is_seal: bool,
) {
ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 {
endian.unchecked_put_u32be(dst[12:], ctr)
return ctr + 1
}
// 2. Define a block J_0 as follows:
// if len(IV) = 96, then let J0 = IV || 0^31 || 1
//
// Note: We only support 96 bit IVs.
tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, ---
ctrs, blks: [ct64.STRIDE][]byte = ---, ---
ctr: u32 = 2
for i in 0 ..< ct64.STRIDE {
// Setup scratch space for the keystream.
blks[i] = tmp2[i][:]
// Pre-copy the IV to all the counter blocks.
ctrs[i] = tmp[i][:]
copy(ctrs[i], nonce)
}
// We stitch the GCTR and GHASH operations together, so that only
// one pass over the ciphertext is required.
impl := &ctx._impl.(ct64.Context)
src, dst := src, dst
nr_blocks := len(src) / BLOCK_SIZE
for nr_blocks > 0 {
n := min(ct64.STRIDE, nr_blocks)
l := n * BLOCK_SIZE
if !is_seal {
ct64.ghash(s[:], h[:], src[:l])
}
// The keystream is written to a separate buffer, as we will
// reuse the first 96-bits of each counter.
for i in 0 ..< n {
ctr = ct64_inc_ctr32(ctrs[i], ctr)
}
ct64.encrypt_blocks(impl, blks[:n], ctrs[:n])
xor_blocks(dst, src, blks[:n])
if is_seal {
ct64.ghash(s[:], h[:], dst[:l])
}
src = src[l:]
dst = dst[l:]
nr_blocks -= n
}
if l := len(src); l > 0 {
if !is_seal {
ct64.ghash(s[:], h[:], src[:l])
}
ct64_inc_ctr32(ctrs[0], ctr)
ct64.encrypt_block(impl, ctrs[0], ctrs[0])
for i in 0 ..< l {
dst[i] = src[i] ~ ctrs[0][i]
}
if is_seal {
ct64.ghash(s[:], h[:], dst[:l])
}
}
mem.zero_explicit(&tmp, size_of(tmp))
mem.zero_explicit(&tmp2, size_of(tmp2))
}

View File

@@ -0,0 +1,41 @@
package aes
import "core:crypto/_aes/ct64"
import "core:mem"
import "core:reflect"
@(private)
Context_Impl :: union {
ct64.Context,
Context_Impl_Hardware,
}
// Implementation is an AES implementation. Most callers will not need
// to use this as the package will automatically select the most performant
// implementation available (See `is_hardware_accelerated()`).
Implementation :: enum {
Portable,
Hardware,
}
@(private)
init_impl :: proc(ctx: ^Context_Impl, key: []byte, impl: Implementation) {
impl := impl
if !is_hardware_accelerated() {
impl = .Portable
}
switch impl {
case .Portable:
reflect.set_union_variant_typeid(ctx^, typeid_of(ct64.Context))
ct64.init(&ctx.(ct64.Context), key)
case .Hardware:
reflect.set_union_variant_typeid(ctx^, typeid_of(Context_Impl_Hardware))
init_impl_hw(&ctx.(Context_Impl_Hardware), key)
}
}
@(private)
reset_impl :: proc "contextless" (ctx: ^Context_Impl) {
mem.zero_explicit(ctx, size_of(Context_Impl))
}

View File

@@ -0,0 +1,43 @@
package aes
@(private = "file")
ERR_HW_NOT_SUPPORTED :: "crypto/aes: hardware implementation unsupported"
// is_hardware_accelerated returns true iff hardware accelerated AES
// is supported.
is_hardware_accelerated :: proc "contextless" () -> bool {
return false
}
@(private)
Context_Impl_Hardware :: struct {}
@(private)
init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) {
panic(ERR_HW_NOT_SUPPORTED)
}
@(private)
gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool {
panic(ERR_HW_NOT_SUPPORTED)
}