diff --git a/core/crypto/_aes/aes.odin b/core/crypto/_aes/aes.odin new file mode 100644 index 000000000..4f52485d2 --- /dev/null +++ b/core/crypto/_aes/aes.odin @@ -0,0 +1,28 @@ +package _aes + +// KEY_SIZE_128 is the AES-128 key size in bytes. +KEY_SIZE_128 :: 16 +// KEY_SIZE_192 is the AES-192 key size in bytes. +KEY_SIZE_192 :: 24 +// KEY_SIZE_256 is the AES-256 key size in bytes. +KEY_SIZE_256 :: 32 + +// BLOCK_SIZE is the AES block size in bytes. +BLOCK_SIZE :: 16 + +// ROUNDS_128 is the number of rounds for AES-128. +ROUNDS_128 :: 10 +// ROUNDS_192 is the number of rounds for AES-192. +ROUNDS_192 :: 12 +// ROUNDS_256 is the number of rounds for AES-256. +ROUNDS_256 :: 14 + +// GHASH_KEY_SIZE is the GHASH key size in bytes. +GHASH_KEY_SIZE :: 16 +// GHASH_BLOCK_SIZE is the GHASH block size in bytes. +GHASH_BLOCK_SIZE :: 16 +// GHASH_TAG_SIZE is the GHASH tag size in bytes. +GHASH_TAG_SIZE :: 16 + +// RCON is the AES keyschedule round constants. +RCON := [10]byte{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36} diff --git a/core/crypto/_aes/ct64/api.odin b/core/crypto/_aes/ct64/api.odin new file mode 100644 index 000000000..ae624971c --- /dev/null +++ b/core/crypto/_aes/ct64/api.odin @@ -0,0 +1,96 @@ +package aes_ct64 + +import "base:intrinsics" +import "core:mem" + +STRIDE :: 4 + +// Context is a keyed AES (ECB) instance. +Context :: struct { + _sk_exp: [120]u64, + _num_rounds: int, + _is_initialized: bool, +} + +// init initializes a context for AES with the provided key. +init :: proc(ctx: ^Context, key: []byte) { + skey: [30]u64 = --- + + ctx._num_rounds = keysched(skey[:], key) + skey_expand(ctx._sk_exp[:], skey[:], ctx._num_rounds) + ctx._is_initialized = true +} + +// encrypt_block sets `dst` to `AES-ECB-Encrypt(src)`. +encrypt_block :: proc(ctx: ^Context, dst, src: []byte) { + assert(ctx._is_initialized) + + q: [8]u64 + load_blockx1(&q, src) + _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blockx1(dst, &q) +} + +// encrypt_block sets `dst` to `AES-ECB-Decrypt(src)`. +decrypt_block :: proc(ctx: ^Context, dst, src: []byte) { + assert(ctx._is_initialized) + + q: [8]u64 + load_blockx1(&q, src) + _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blockx1(dst, &q) +} + +// encrypt_blocks sets `dst` to `AES-ECB-Encrypt(src[0], .. src[n])`. +encrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) { + assert(ctx._is_initialized) + + q: [8]u64 = --- + src, dst := src, dst + + n := len(src) + for n > 4 { + load_blocks(&q, src[0:4]) + _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst[0:4], &q) + + src = src[4:] + dst = dst[4:] + n -= 4 + } + if n > 0 { + load_blocks(&q, src) + _encrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst, &q) + } +} + +// decrypt_blocks sets dst to `AES-ECB-Decrypt(src[0], .. src[n])`. +decrypt_blocks :: proc(ctx: ^Context, dst, src: [][]byte) { + assert(ctx._is_initialized) + + q: [8]u64 = --- + src, dst := src, dst + + n := len(src) + for n > 4 { + load_blocks(&q, src[0:4]) + _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst[0:4], &q) + + src = src[4:] + dst = dst[4:] + n -= 4 + } + if n > 0 { + load_blocks(&q, src) + _decrypt(&q, ctx._sk_exp[:], ctx._num_rounds) + store_blocks(dst, &q) + } +} + +// reset sanitizes the Context. The Context must be re-initialized to +// be used again. +reset :: proc(ctx: ^Context) { + mem.zero_explicit(ctx, size_of(ctx)) +} diff --git a/core/crypto/_aes/ct64/ct64.odin b/core/crypto/_aes/ct64/ct64.odin new file mode 100644 index 000000000..f198cab81 --- /dev/null +++ b/core/crypto/_aes/ct64/ct64.odin @@ -0,0 +1,265 @@ +// Copyright (c) 2016 Thomas Pornin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" + +// Bitsliced AES for 64-bit general purpose (integer) registers. Each +// invocation will process up to 4 blocks at a time. This implementation +// is derived from the BearSSL ct64 code, and distributed under a 1-clause +// BSD license with permission from the original author. +// +// WARNING: "hic sunt dracones" +// +// This package also deliberately exposes enough internals to be able to +// function as a replacement for `AESENC` and `AESDEC` from AES-NI, to +// allow the implementation of non-AES primitives that use the AES round +// function such as AEGIS and Deoxys-II. This should ONLY be done when +// implementing something other than AES itself. + +sub_bytes :: proc "contextless" (q: ^[8]u64) { + // This S-box implementation is a straightforward translation of + // the circuit described by Boyar and Peralta in "A new + // combinational logic minimization technique with applications + // to cryptology" (https://eprint.iacr.org/2009/191.pdf). + // + // Note that variables x* (input) and s* (output) are numbered + // in "reverse" order (x0 is the high bit, x7 is the low bit). + + x0 := q[7] + x1 := q[6] + x2 := q[5] + x3 := q[4] + x4 := q[3] + x5 := q[2] + x6 := q[1] + x7 := q[0] + + // Top linear transformation. + y14 := x3 ~ x5 + y13 := x0 ~ x6 + y9 := x0 ~ x3 + y8 := x0 ~ x5 + t0 := x1 ~ x2 + y1 := t0 ~ x7 + y4 := y1 ~ x3 + y12 := y13 ~ y14 + y2 := y1 ~ x0 + y5 := y1 ~ x6 + y3 := y5 ~ y8 + t1 := x4 ~ y12 + y15 := t1 ~ x5 + y20 := t1 ~ x1 + y6 := y15 ~ x7 + y10 := y15 ~ t0 + y11 := y20 ~ y9 + y7 := x7 ~ y11 + y17 := y10 ~ y11 + y19 := y10 ~ y8 + y16 := t0 ~ y11 + y21 := y13 ~ y16 + y18 := x0 ~ y16 + + // Non-linear section. + t2 := y12 & y15 + t3 := y3 & y6 + t4 := t3 ~ t2 + t5 := y4 & x7 + t6 := t5 ~ t2 + t7 := y13 & y16 + t8 := y5 & y1 + t9 := t8 ~ t7 + t10 := y2 & y7 + t11 := t10 ~ t7 + t12 := y9 & y11 + t13 := y14 & y17 + t14 := t13 ~ t12 + t15 := y8 & y10 + t16 := t15 ~ t12 + t17 := t4 ~ t14 + t18 := t6 ~ t16 + t19 := t9 ~ t14 + t20 := t11 ~ t16 + t21 := t17 ~ y20 + t22 := t18 ~ y19 + t23 := t19 ~ y21 + t24 := t20 ~ y18 + + t25 := t21 ~ t22 + t26 := t21 & t23 + t27 := t24 ~ t26 + t28 := t25 & t27 + t29 := t28 ~ t22 + t30 := t23 ~ t24 + t31 := t22 ~ t26 + t32 := t31 & t30 + t33 := t32 ~ t24 + t34 := t23 ~ t33 + t35 := t27 ~ t33 + t36 := t24 & t35 + t37 := t36 ~ t34 + t38 := t27 ~ t36 + t39 := t29 & t38 + t40 := t25 ~ t39 + + t41 := t40 ~ t37 + t42 := t29 ~ t33 + t43 := t29 ~ t40 + t44 := t33 ~ t37 + t45 := t42 ~ t41 + z0 := t44 & y15 + z1 := t37 & y6 + z2 := t33 & x7 + z3 := t43 & y16 + z4 := t40 & y1 + z5 := t29 & y7 + z6 := t42 & y11 + z7 := t45 & y17 + z8 := t41 & y10 + z9 := t44 & y12 + z10 := t37 & y3 + z11 := t33 & y4 + z12 := t43 & y13 + z13 := t40 & y5 + z14 := t29 & y2 + z15 := t42 & y9 + z16 := t45 & y14 + z17 := t41 & y8 + + // Bottom linear transformation. + t46 := z15 ~ z16 + t47 := z10 ~ z11 + t48 := z5 ~ z13 + t49 := z9 ~ z10 + t50 := z2 ~ z12 + t51 := z2 ~ z5 + t52 := z7 ~ z8 + t53 := z0 ~ z3 + t54 := z6 ~ z7 + t55 := z16 ~ z17 + t56 := z12 ~ t48 + t57 := t50 ~ t53 + t58 := z4 ~ t46 + t59 := z3 ~ t54 + t60 := t46 ~ t57 + t61 := z14 ~ t57 + t62 := t52 ~ t58 + t63 := t49 ~ t58 + t64 := z4 ~ t59 + t65 := t61 ~ t62 + t66 := z1 ~ t63 + s0 := t59 ~ t63 + s6 := t56 ~ ~t62 + s7 := t48 ~ ~t60 + t67 := t64 ~ t65 + s3 := t53 ~ t66 + s4 := t51 ~ t66 + s5 := t47 ~ t65 + s1 := t64 ~ ~s3 + s2 := t55 ~ ~t67 + + q[7] = s0 + q[6] = s1 + q[5] = s2 + q[4] = s3 + q[3] = s4 + q[2] = s5 + q[1] = s6 + q[0] = s7 +} + +orthogonalize :: proc "contextless" (q: ^[8]u64) { + CL2 :: 0x5555555555555555 + CH2 :: 0xAAAAAAAAAAAAAAAA + q[0], q[1] = (q[0] & CL2) | ((q[1] & CL2) << 1), ((q[0] & CH2) >> 1) | (q[1] & CH2) + q[2], q[3] = (q[2] & CL2) | ((q[3] & CL2) << 1), ((q[2] & CH2) >> 1) | (q[3] & CH2) + q[4], q[5] = (q[4] & CL2) | ((q[5] & CL2) << 1), ((q[4] & CH2) >> 1) | (q[5] & CH2) + q[6], q[7] = (q[6] & CL2) | ((q[7] & CL2) << 1), ((q[6] & CH2) >> 1) | (q[7] & CH2) + + CL4 :: 0x3333333333333333 + CH4 :: 0xCCCCCCCCCCCCCCCC + q[0], q[2] = (q[0] & CL4) | ((q[2] & CL4) << 2), ((q[0] & CH4) >> 2) | (q[2] & CH4) + q[1], q[3] = (q[1] & CL4) | ((q[3] & CL4) << 2), ((q[1] & CH4) >> 2) | (q[3] & CH4) + q[4], q[6] = (q[4] & CL4) | ((q[6] & CL4) << 2), ((q[4] & CH4) >> 2) | (q[6] & CH4) + q[5], q[7] = (q[5] & CL4) | ((q[7] & CL4) << 2), ((q[5] & CH4) >> 2) | (q[7] & CH4) + + CL8 :: 0x0F0F0F0F0F0F0F0F + CH8 :: 0xF0F0F0F0F0F0F0F0 + q[0], q[4] = (q[0] & CL8) | ((q[4] & CL8) << 4), ((q[0] & CH8) >> 4) | (q[4] & CH8) + q[1], q[5] = (q[1] & CL8) | ((q[5] & CL8) << 4), ((q[1] & CH8) >> 4) | (q[5] & CH8) + q[2], q[6] = (q[2] & CL8) | ((q[6] & CL8) << 4), ((q[2] & CH8) >> 4) | (q[6] & CH8) + q[3], q[7] = (q[3] & CL8) | ((q[7] & CL8) << 4), ((q[3] & CH8) >> 4) | (q[7] & CH8) +} + +@(require_results) +interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check { + if len(w) < 4 { + intrinsics.trap() + } + x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3]) + x0 |= (x0 << 16) + x1 |= (x1 << 16) + x2 |= (x2 << 16) + x3 |= (x3 << 16) + x0 &= 0x0000FFFF0000FFFF + x1 &= 0x0000FFFF0000FFFF + x2 &= 0x0000FFFF0000FFFF + x3 &= 0x0000FFFF0000FFFF + x0 |= (x0 << 8) + x1 |= (x1 << 8) + x2 |= (x2 << 8) + x3 |= (x3 << 8) + x0 &= 0x00FF00FF00FF00FF + x1 &= 0x00FF00FF00FF00FF + x2 &= 0x00FF00FF00FF00FF + x3 &= 0x00FF00FF00FF00FF + q0 = x0 | (x2 << 8) + q1 = x1 | (x3 << 8) + return +} + +@(require_results) +interleave_out :: proc "contextless" (q0, q1: u64) -> (w0, w1, w2, w3: u32) { + x0 := q0 & 0x00FF00FF00FF00FF + x1 := q1 & 0x00FF00FF00FF00FF + x2 := (q0 >> 8) & 0x00FF00FF00FF00FF + x3 := (q1 >> 8) & 0x00FF00FF00FF00FF + x0 |= (x0 >> 8) + x1 |= (x1 >> 8) + x2 |= (x2 >> 8) + x3 |= (x3 >> 8) + x0 &= 0x0000FFFF0000FFFF + x1 &= 0x0000FFFF0000FFFF + x2 &= 0x0000FFFF0000FFFF + x3 &= 0x0000FFFF0000FFFF + w0 = u32(x0) | u32(x0 >> 16) + w1 = u32(x1) | u32(x1 >> 16) + w2 = u32(x2) | u32(x2 >> 16) + w3 = u32(x3) | u32(x3 >> 16) + return +} + +@(private) +rotr32 :: #force_inline proc "contextless" (x: u64) -> u64 { + return (x << 32) | (x >> 32) +} diff --git a/core/crypto/_aes/ct64/ct64_dec.odin b/core/crypto/_aes/ct64/ct64_dec.odin new file mode 100644 index 000000000..408ee6002 --- /dev/null +++ b/core/crypto/_aes/ct64/ct64_dec.odin @@ -0,0 +1,135 @@ +// Copyright (c) 2016 Thomas Pornin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" + +inv_sub_bytes :: proc "contextless" (q: ^[8]u64) { + // AES S-box is: + // S(x) = A(I(x)) ^ 0x63 + // where I() is inversion in GF(256), and A() is a linear + // transform (0 is formally defined to be its own inverse). + // Since inversion is an involution, the inverse S-box can be + // computed from the S-box as: + // iS(x) = B(S(B(x ^ 0x63)) ^ 0x63) + // where B() is the inverse of A(). Indeed, for any y in GF(256): + // iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y + // + // Note: we reuse the implementation of the forward S-box, + // instead of duplicating it here, so that total code size is + // lower. By merging the B() transforms into the S-box circuit + // we could make faster CBC decryption, but CBC decryption is + // already quite faster than CBC encryption because we can + // process four blocks in parallel. + + q0 := ~q[0] + q1 := ~q[1] + q2 := q[2] + q3 := q[3] + q4 := q[4] + q5 := ~q[5] + q6 := ~q[6] + q7 := q[7] + q[7] = q1 ~ q4 ~ q6 + q[6] = q0 ~ q3 ~ q5 + q[5] = q7 ~ q2 ~ q4 + q[4] = q6 ~ q1 ~ q3 + q[3] = q5 ~ q0 ~ q2 + q[2] = q4 ~ q7 ~ q1 + q[1] = q3 ~ q6 ~ q0 + q[0] = q2 ~ q5 ~ q7 + + sub_bytes(q) + + q0 = ~q[0] + q1 = ~q[1] + q2 = q[2] + q3 = q[3] + q4 = q[4] + q5 = ~q[5] + q6 = ~q[6] + q7 = q[7] + q[7] = q1 ~ q4 ~ q6 + q[6] = q0 ~ q3 ~ q5 + q[5] = q7 ~ q2 ~ q4 + q[4] = q6 ~ q1 ~ q3 + q[3] = q5 ~ q0 ~ q2 + q[2] = q4 ~ q7 ~ q1 + q[1] = q3 ~ q6 ~ q0 + q[0] = q2 ~ q5 ~ q7 +} + +inv_shift_rows :: proc "contextless" (q: ^[8]u64) { + for x, i in q { + q[i] = + (x & 0x000000000000FFFF) | + ((x & 0x000000000FFF0000) << 4) | + ((x & 0x00000000F0000000) >> 12) | + ((x & 0x000000FF00000000) << 8) | + ((x & 0x0000FF0000000000) >> 8) | + ((x & 0x000F000000000000) << 12) | + ((x & 0xFFF0000000000000) >> 4) + } +} + +inv_mix_columns :: proc "contextless" (q: ^[8]u64) { + q0 := q[0] + q1 := q[1] + q2 := q[2] + q3 := q[3] + q4 := q[4] + q5 := q[5] + q6 := q[6] + q7 := q[7] + r0 := (q0 >> 16) | (q0 << 48) + r1 := (q1 >> 16) | (q1 << 48) + r2 := (q2 >> 16) | (q2 << 48) + r3 := (q3 >> 16) | (q3 << 48) + r4 := (q4 >> 16) | (q4 << 48) + r5 := (q5 >> 16) | (q5 << 48) + r6 := (q6 >> 16) | (q6 << 48) + r7 := (q7 >> 16) | (q7 << 48) + + q[0] = q5 ~ q6 ~ q7 ~ r0 ~ r5 ~ r7 ~ rotr32(q0 ~ q5 ~ q6 ~ r0 ~ r5) + q[1] = q0 ~ q5 ~ r0 ~ r1 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q5 ~ q7 ~ r1 ~ r5 ~ r6) + q[2] = q0 ~ q1 ~ q6 ~ r1 ~ r2 ~ r6 ~ r7 ~ rotr32(q0 ~ q2 ~ q6 ~ r2 ~ r6 ~ r7) + q[3] = q0 ~ q1 ~ q2 ~ q5 ~ q6 ~ r0 ~ r2 ~ r3 ~ r5 ~ rotr32(q0 ~ q1 ~ q3 ~ q5 ~ q6 ~ q7 ~ r0 ~ r3 ~ r5 ~ r7) + q[4] = q1 ~ q2 ~ q3 ~ q5 ~ r1 ~ r3 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q1 ~ q2 ~ q4 ~ q5 ~ q7 ~ r1 ~ r4 ~ r5 ~ r6) + q[5] = q2 ~ q3 ~ q4 ~ q6 ~ r2 ~ r4 ~ r5 ~ r6 ~ r7 ~ rotr32(q2 ~ q3 ~ q5 ~ q6 ~ r2 ~ r5 ~ r6 ~ r7) + q[6] = q3 ~ q4 ~ q5 ~ q7 ~ r3 ~ r5 ~ r6 ~ r7 ~ rotr32(q3 ~ q4 ~ q6 ~ q7 ~ r3 ~ r6 ~ r7) + q[7] = q4 ~ q5 ~ q6 ~ r4 ~ r6 ~ r7 ~ rotr32(q4 ~ q5 ~ q7 ~ r4 ~ r7) +} + +@(private) +_decrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) { + add_round_key(q, skey[num_rounds << 3:]) + for u := num_rounds - 1; u > 0; u -= 1 { + inv_shift_rows(q) + inv_sub_bytes(q) + add_round_key(q, skey[u << 3:]) + inv_mix_columns(q) + } + inv_shift_rows(q) + inv_sub_bytes(q) + add_round_key(q, skey) +} diff --git a/core/crypto/_aes/ct64/ct64_enc.odin b/core/crypto/_aes/ct64/ct64_enc.odin new file mode 100644 index 000000000..36d4aebc8 --- /dev/null +++ b/core/crypto/_aes/ct64/ct64_enc.odin @@ -0,0 +1,95 @@ +// Copyright (c) 2016 Thomas Pornin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" + +add_round_key :: proc "contextless" (q: ^[8]u64, sk: []u64) #no_bounds_check { + if len(sk) < 8 { + intrinsics.trap() + } + + q[0] ~= sk[0] + q[1] ~= sk[1] + q[2] ~= sk[2] + q[3] ~= sk[3] + q[4] ~= sk[4] + q[5] ~= sk[5] + q[6] ~= sk[6] + q[7] ~= sk[7] +} + +shift_rows :: proc "contextless" (q: ^[8]u64) { + for x, i in q { + q[i] = + (x & 0x000000000000FFFF) | + ((x & 0x00000000FFF00000) >> 4) | + ((x & 0x00000000000F0000) << 12) | + ((x & 0x0000FF0000000000) >> 8) | + ((x & 0x000000FF00000000) << 8) | + ((x & 0xF000000000000000) >> 12) | + ((x & 0x0FFF000000000000) << 4) + } +} + +mix_columns :: proc "contextless" (q: ^[8]u64) { + q0 := q[0] + q1 := q[1] + q2 := q[2] + q3 := q[3] + q4 := q[4] + q5 := q[5] + q6 := q[6] + q7 := q[7] + r0 := (q0 >> 16) | (q0 << 48) + r1 := (q1 >> 16) | (q1 << 48) + r2 := (q2 >> 16) | (q2 << 48) + r3 := (q3 >> 16) | (q3 << 48) + r4 := (q4 >> 16) | (q4 << 48) + r5 := (q5 >> 16) | (q5 << 48) + r6 := (q6 >> 16) | (q6 << 48) + r7 := (q7 >> 16) | (q7 << 48) + + q[0] = q7 ~ r7 ~ r0 ~ rotr32(q0 ~ r0) + q[1] = q0 ~ r0 ~ q7 ~ r7 ~ r1 ~ rotr32(q1 ~ r1) + q[2] = q1 ~ r1 ~ r2 ~ rotr32(q2 ~ r2) + q[3] = q2 ~ r2 ~ q7 ~ r7 ~ r3 ~ rotr32(q3 ~ r3) + q[4] = q3 ~ r3 ~ q7 ~ r7 ~ r4 ~ rotr32(q4 ~ r4) + q[5] = q4 ~ r4 ~ r5 ~ rotr32(q5 ~ r5) + q[6] = q5 ~ r5 ~ r6 ~ rotr32(q6 ~ r6) + q[7] = q6 ~ r6 ~ r7 ~ rotr32(q7 ~ r7) +} + +@(private) +_encrypt :: proc "contextless" (q: ^[8]u64, skey: []u64, num_rounds: int) { + add_round_key(q, skey) + for u in 1 ..< num_rounds { + sub_bytes(q) + shift_rows(q) + mix_columns(q) + add_round_key(q, skey[u << 3:]) + } + sub_bytes(q) + shift_rows(q) + add_round_key(q, skey[num_rounds << 3:]) +} diff --git a/core/crypto/_aes/ct64/ct64_keysched.odin b/core/crypto/_aes/ct64/ct64_keysched.odin new file mode 100644 index 000000000..060a2c03e --- /dev/null +++ b/core/crypto/_aes/ct64/ct64_keysched.odin @@ -0,0 +1,179 @@ +// Copyright (c) 2016 Thomas Pornin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" +import "core:crypto/_aes" +import "core:encoding/endian" +import "core:mem" + +@(private, require_results) +sub_word :: proc "contextless" (x: u32) -> u32 { + q := [8]u64{u64(x), 0, 0, 0, 0, 0, 0, 0} + + orthogonalize(&q) + sub_bytes(&q) + orthogonalize(&q) + ret := u32(q[0]) + + mem.zero_explicit(&q[0], size_of(u64)) + + return ret +} + +@(private, require_results) +keysched :: proc(comp_skey: []u64, key: []byte) -> int { + num_rounds, key_len := 0, len(key) + switch key_len { + case _aes.KEY_SIZE_128: + num_rounds = _aes.ROUNDS_128 + case _aes.KEY_SIZE_192: + num_rounds = _aes.ROUNDS_192 + case _aes.KEY_SIZE_256: + num_rounds = _aes.ROUNDS_256 + case: + panic("crypto/aes: invalid AES key size") + } + + skey: [60]u32 = --- + nk, nkf := key_len >> 2, (num_rounds + 1) << 2 + for i in 0 ..< nk { + skey[i] = endian.unchecked_get_u32le(key[i << 2:]) + } + tmp := skey[(key_len >> 2) - 1] + for i, j, k := nk, 0, 0; i < nkf; i += 1 { + if j == 0 { + tmp = (tmp << 24) | (tmp >> 8) + tmp = sub_word(tmp) ~ u32(_aes.RCON[k]) + } else if nk > 6 && j == 4 { + tmp = sub_word(tmp) + } + tmp ~= skey[i - nk] + skey[i] = tmp + if j += 1; j == nk { + j = 0 + k += 1 + } + } + + q: [8]u64 = --- + for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 { + q[0], q[4] = interleave_in(skey[i:]) + q[1] = q[0] + q[2] = q[0] + q[3] = q[0] + q[5] = q[4] + q[6] = q[4] + q[7] = q[4] + orthogonalize(&q) + comp_skey[j + 0] = + (q[0] & 0x1111111111111111) | + (q[1] & 0x2222222222222222) | + (q[2] & 0x4444444444444444) | + (q[3] & 0x8888888888888888) + comp_skey[j + 1] = + (q[4] & 0x1111111111111111) | + (q[5] & 0x2222222222222222) | + (q[6] & 0x4444444444444444) | + (q[7] & 0x8888888888888888) + } + + mem.zero_explicit(&skey, size_of(skey)) + mem.zero_explicit(&q, size_of(q)) + + return num_rounds +} + +@(private) +skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) { + n := (num_rounds + 1) << 1 + for u, v := 0, 0; u < n; u, v = u + 1, v + 4 { + x0 := comp_skey[u] + x1, x2, x3 := x0, x0, x0 + x0 &= 0x1111111111111111 + x1 &= 0x2222222222222222 + x2 &= 0x4444444444444444 + x3 &= 0x8888888888888888 + x1 >>= 1 + x2 >>= 2 + x3 >>= 3 + skey[v + 0] = (x0 << 4) - x0 + skey[v + 1] = (x1 << 4) - x1 + skey[v + 2] = (x2 << 4) - x2 + skey[v + 3] = (x3 << 4) - x3 + } +} + +orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) { + if len(qq) < 8 || len(key) != 16 { + intrinsics.trap() + } + + skey: [4]u32 = --- + skey[0] = endian.unchecked_get_u32le(key[0:]) + skey[1] = endian.unchecked_get_u32le(key[4:]) + skey[2] = endian.unchecked_get_u32le(key[8:]) + skey[3] = endian.unchecked_get_u32le(key[12:]) + + q: [8]u64 = --- + q[0], q[4] = interleave_in(skey[:]) + q[1] = q[0] + q[2] = q[0] + q[3] = q[0] + q[5] = q[4] + q[6] = q[4] + q[7] = q[4] + orthogonalize(&q) + + comp_skey: [2]u64 = --- + comp_skey[0] = + (q[0] & 0x1111111111111111) | + (q[1] & 0x2222222222222222) | + (q[2] & 0x4444444444444444) | + (q[3] & 0x8888888888888888) + comp_skey[1] = + (q[4] & 0x1111111111111111) | + (q[5] & 0x2222222222222222) | + (q[6] & 0x4444444444444444) | + (q[7] & 0x8888888888888888) + + for x, u in comp_skey { + x0 := x + x1, x2, x3 := x0, x0, x0 + x0 &= 0x1111111111111111 + x1 &= 0x2222222222222222 + x2 &= 0x4444444444444444 + x3 &= 0x8888888888888888 + x1 >>= 1 + x2 >>= 2 + x3 >>= 3 + qq[u * 4 + 0] = (x0 << 4) - x0 + qq[u * 4 + 1] = (x1 << 4) - x1 + qq[u * 4 + 2] = (x2 << 4) - x2 + qq[u * 4 + 3] = (x3 << 4) - x3 + } + + mem.zero_explicit(&skey, size_of(skey)) + mem.zero_explicit(&q, size_of(q)) + mem.zero_explicit(&comp_skey, size_of(comp_skey)) +} diff --git a/core/crypto/_aes/ct64/ghash.odin b/core/crypto/_aes/ct64/ghash.odin new file mode 100644 index 000000000..21ac2ca97 --- /dev/null +++ b/core/crypto/_aes/ct64/ghash.odin @@ -0,0 +1,136 @@ +// Copyright (c) 2016 Thomas Pornin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHORS “AS IS” AND ANY EXPRESS OR +// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY +// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package aes_ct64 + +import "base:intrinsics" +import "core:crypto/_aes" +import "core:encoding/endian" + +@(private = "file") +bmul64 :: proc "contextless" (x, y: u64) -> u64 { + x0 := x & 0x1111111111111111 + x1 := x & 0x2222222222222222 + x2 := x & 0x4444444444444444 + x3 := x & 0x8888888888888888 + y0 := y & 0x1111111111111111 + y1 := y & 0x2222222222222222 + y2 := y & 0x4444444444444444 + y3 := y & 0x8888888888888888 + z0 := (x0 * y0) ~ (x1 * y3) ~ (x2 * y2) ~ (x3 * y1) + z1 := (x0 * y1) ~ (x1 * y0) ~ (x2 * y3) ~ (x3 * y2) + z2 := (x0 * y2) ~ (x1 * y1) ~ (x2 * y0) ~ (x3 * y3) + z3 := (x0 * y3) ~ (x1 * y2) ~ (x2 * y1) ~ (x3 * y0) + z0 &= 0x1111111111111111 + z1 &= 0x2222222222222222 + z2 &= 0x4444444444444444 + z3 &= 0x8888888888888888 + return z0 | z1 | z2 | z3 +} + +@(private = "file") +rev64 :: proc "contextless" (x: u64) -> u64 { + x := x + x = ((x & 0x5555555555555555) << 1) | ((x >> 1) & 0x5555555555555555) + x = ((x & 0x3333333333333333) << 2) | ((x >> 2) & 0x3333333333333333) + x = ((x & 0x0F0F0F0F0F0F0F0F) << 4) | ((x >> 4) & 0x0F0F0F0F0F0F0F0F) + x = ((x & 0x00FF00FF00FF00FF) << 8) | ((x >> 8) & 0x00FF00FF00FF00FF) + x = ((x & 0x0000FFFF0000FFFF) << 16) | ((x >> 16) & 0x0000FFFF0000FFFF) + return (x << 32) | (x >> 32) +} + +// ghash calculates the GHASH of data, with the key `key`, and input `dst` +// and `data`, and stores the resulting digest in `dst`. +// +// Note: `dst` is both an input and an output, to support easy implementation +// of GCM. +ghash :: proc "contextless" (dst, key, data: []byte) { + if len(dst) != _aes.GHASH_BLOCK_SIZE || len(key) != _aes.GHASH_BLOCK_SIZE { + intrinsics.trap() + } + + buf := data + l := len(buf) + + y1 := endian.unchecked_get_u64be(dst[0:]) + y0 := endian.unchecked_get_u64be(dst[8:]) + h1 := endian.unchecked_get_u64be(key[0:]) + h0 := endian.unchecked_get_u64be(key[8:]) + h0r := rev64(h0) + h1r := rev64(h1) + h2 := h0 ~ h1 + h2r := h0r ~ h1r + + src: []byte + for l > 0 { + if l >= _aes.GHASH_BLOCK_SIZE { + src = buf + buf = buf[_aes.GHASH_BLOCK_SIZE:] + l -= _aes.GHASH_BLOCK_SIZE + } else { + tmp: [_aes.GHASH_BLOCK_SIZE]byte + copy(tmp[:], buf) + src = tmp[:] + l = 0 + } + y1 ~= endian.unchecked_get_u64be(src) + y0 ~= endian.unchecked_get_u64be(src[8:]) + + y0r := rev64(y0) + y1r := rev64(y1) + y2 := y0 ~ y1 + y2r := y0r ~ y1r + + z0 := bmul64(y0, h0) + z1 := bmul64(y1, h1) + z2 := bmul64(y2, h2) + z0h := bmul64(y0r, h0r) + z1h := bmul64(y1r, h1r) + z2h := bmul64(y2r, h2r) + z2 ~= z0 ~ z1 + z2h ~= z0h ~ z1h + z0h = rev64(z0h) >> 1 + z1h = rev64(z1h) >> 1 + z2h = rev64(z2h) >> 1 + + v0 := z0 + v1 := z0h ~ z2 + v2 := z1 ~ z2h + v3 := z1h + + v3 = (v3 << 1) | (v2 >> 63) + v2 = (v2 << 1) | (v1 >> 63) + v1 = (v1 << 1) | (v0 >> 63) + v0 = (v0 << 1) + + v2 ~= v0 ~ (v0 >> 1) ~ (v0 >> 2) ~ (v0 >> 7) + v1 ~= (v0 << 63) ~ (v0 << 62) ~ (v0 << 57) + v3 ~= v1 ~ (v1 >> 1) ~ (v1 >> 2) ~ (v1 >> 7) + v2 ~= (v1 << 63) ~ (v1 << 62) ~ (v1 << 57) + + y0 = v2 + y1 = v3 + } + + endian.unchecked_put_u64be(dst[0:], y1) + endian.unchecked_put_u64be(dst[8:], y0) +} diff --git a/core/crypto/_aes/ct64/helpers.odin b/core/crypto/_aes/ct64/helpers.odin new file mode 100644 index 000000000..169271f6d --- /dev/null +++ b/core/crypto/_aes/ct64/helpers.odin @@ -0,0 +1,75 @@ +package aes_ct64 + +import "base:intrinsics" +import "core:crypto/_aes" +import "core:encoding/endian" + +load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) { + if len(src) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + w: [4]u32 = --- + w[0] = endian.unchecked_get_u32le(src[0:]) + w[1] = endian.unchecked_get_u32le(src[4:]) + w[2] = endian.unchecked_get_u32le(src[8:]) + w[3] = endian.unchecked_get_u32le(src[12:]) + q[0], q[4] = interleave_in(w[:]) + orthogonalize(q) +} + +store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) { + if len(dst) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + orthogonalize(q) + w0, w1, w2, w3 := interleave_out(q[0], q[4]) + endian.unchecked_put_u32le(dst[0:], w0) + endian.unchecked_put_u32le(dst[4:], w1) + endian.unchecked_put_u32le(dst[8:], w2) + endian.unchecked_put_u32le(dst[12:], w3) +} + +load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) { + if n := len(src); n > STRIDE || n == 0 { + intrinsics.trap() + } + + w: [4]u32 = --- + for s, i in src { + if len(s) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + w[0] = endian.unchecked_get_u32le(s[0:]) + w[1] = endian.unchecked_get_u32le(s[4:]) + w[2] = endian.unchecked_get_u32le(s[8:]) + w[3] = endian.unchecked_get_u32le(s[12:]) + q[i], q[i + 4] = interleave_in(w[:]) + } + orthogonalize(q) +} + +store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) { + if n := len(dst); n > STRIDE || n == 0 { + intrinsics.trap() + } + + orthogonalize(q) + for d, i in dst { + // Allow storing [0,4] blocks. + if d == nil { + break + } + if len(d) != _aes.BLOCK_SIZE { + intrinsics.trap() + } + + w0, w1, w2, w3 := interleave_out(q[i], q[i + 4]) + endian.unchecked_put_u32le(d[0:], w0) + endian.unchecked_put_u32le(d[4:], w1) + endian.unchecked_put_u32le(d[8:], w2) + endian.unchecked_put_u32le(d[12:], w3) + } +} diff --git a/core/crypto/aes/aes.odin b/core/crypto/aes/aes.odin new file mode 100644 index 000000000..e895c5fe0 --- /dev/null +++ b/core/crypto/aes/aes.odin @@ -0,0 +1,22 @@ +/* +package aes implements the AES block cipher and some common modes. + +See: +- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf +- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf +- https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf +*/ + +package aes + +import "core:crypto/_aes" + +// KEY_SIZE_128 is the AES-128 key size in bytes. +KEY_SIZE_128 :: _aes.KEY_SIZE_128 +// KEY_SIZE_192 is the AES-192 key size in bytes. +KEY_SIZE_192 :: _aes.KEY_SIZE_192 +// KEY_SIZE_256 is the AES-256 key size in bytes. +KEY_SIZE_256 :: _aes.KEY_SIZE_256 + +// BLOCK_SIZE is the AES block size in bytes. +BLOCK_SIZE :: _aes.BLOCK_SIZE diff --git a/core/crypto/aes/aes_ctr.odin b/core/crypto/aes/aes_ctr.odin new file mode 100644 index 000000000..1821a7bdf --- /dev/null +++ b/core/crypto/aes/aes_ctr.odin @@ -0,0 +1,199 @@ +package aes + +import "core:crypto/_aes/ct64" +import "core:encoding/endian" +import "core:math/bits" +import "core:mem" + +// CTR_IV_SIZE is the size of the CTR mode IV in bytes. +CTR_IV_SIZE :: 16 + +// Context_CTR is a keyed AES-CTR instance. +Context_CTR :: struct { + _impl: Context_Impl, + _buffer: [BLOCK_SIZE]byte, + _off: int, + _ctr_hi: u64, + _ctr_lo: u64, + _is_initialized: bool, +} + +// init_ctr initializes a Context_CTR with the provided key and IV. +init_ctr :: proc(ctx: ^Context_CTR, key, iv: []byte, impl := Implementation.Hardware) { + if len(iv) != CTR_IV_SIZE { + panic("crypto/aes: invalid CTR IV size") + } + + init_impl(&ctx._impl, key, impl) + ctx._off = BLOCK_SIZE + ctx._ctr_hi = endian.unchecked_get_u64be(iv[0:]) + ctx._ctr_lo = endian.unchecked_get_u64be(iv[8:]) + ctx._is_initialized = true +} + +// xor_bytes_ctr XORs each byte in src with bytes taken from the AES-CTR +// keystream, and writes the resulting output to dst. dst and src MUST +// alias exactly or not at all. +xor_bytes_ctr :: proc(ctx: ^Context_CTR, dst, src: []byte) { + assert(ctx._is_initialized) + + // TODO: Enforcing that dst and src alias exactly or not at all + // is a good idea, though odd aliasing should be extremely uncommon. + + src, dst := src, dst + if dst_len := len(dst); dst_len < len(src) { + src = src[:dst_len] + } + + for remaining := len(src); remaining > 0; { + // Process multiple blocks at once + if ctx._off == BLOCK_SIZE { + if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 { + direct_bytes := nr_blocks * BLOCK_SIZE + ctr_blocks(ctx, dst, src, nr_blocks) + remaining -= direct_bytes + if remaining == 0 { + return + } + dst = dst[direct_bytes:] + src = src[direct_bytes:] + } + + // If there is a partial block, generate and buffer 1 block + // worth of keystream. + ctr_blocks(ctx, ctx._buffer[:], nil, 1) + ctx._off = 0 + } + + // Process partial blocks from the buffered keystream. + to_xor := min(BLOCK_SIZE - ctx._off, remaining) + buffered_keystream := ctx._buffer[ctx._off:] + for i := 0; i < to_xor; i = i + 1 { + dst[i] = buffered_keystream[i] ~ src[i] + } + ctx._off += to_xor + dst = dst[to_xor:] + src = src[to_xor:] + remaining -= to_xor + } +} + +// keystream_bytes_ctr fills dst with the raw AES-CTR keystream output. +keystream_bytes_ctr :: proc(ctx: ^Context_CTR, dst: []byte) { + assert(ctx._is_initialized) + + dst := dst + for remaining := len(dst); remaining > 0; { + // Process multiple blocks at once + if ctx._off == BLOCK_SIZE { + if nr_blocks := remaining / BLOCK_SIZE; nr_blocks > 0 { + direct_bytes := nr_blocks * BLOCK_SIZE + ctr_blocks(ctx, dst, nil, nr_blocks) + remaining -= direct_bytes + if remaining == 0 { + return + } + dst = dst[direct_bytes:] + } + + // If there is a partial block, generate and buffer 1 block + // worth of keystream. + ctr_blocks(ctx, ctx._buffer[:], nil, 1) + ctx._off = 0 + } + + // Process partial blocks from the buffered keystream. + to_copy := min(BLOCK_SIZE - ctx._off, remaining) + buffered_keystream := ctx._buffer[ctx._off:] + copy(dst[:to_copy], buffered_keystream[:to_copy]) + ctx._off += to_copy + dst = dst[to_copy:] + remaining -= to_copy + } +} + +// reset_ctr sanitizes the Context_CTR. The Context_CTR must be +// re-initialized to be used again. +reset_ctr :: proc "contextless" (ctx: ^Context_CTR) { + reset_impl(&ctx._impl) + ctx._off = 0 + ctx._ctr_hi = 0 + ctx._ctr_lo = 0 + mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer)) + ctx._is_initialized = false +} + +@(private) +ctr_blocks :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) { + // Use the optimized hardware implementation if available. + if _, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw { + ctr_blocks_hw(ctx, dst, src, nr_blocks) + return + } + + // Portable implementation. + ct64_inc_ctr := #force_inline proc "contextless" (dst: []byte, hi, lo: u64) -> (u64, u64) { + endian.unchecked_put_u64be(dst[0:], hi) + endian.unchecked_put_u64be(dst[8:], lo) + + hi, lo := hi, lo + carry: u64 + lo, carry = bits.add_u64(lo, 1, 0) + hi, _ = bits.add_u64(hi, 0, carry) + return hi, lo + } + + impl := &ctx._impl.(ct64.Context) + src, dst := src, dst + nr_blocks := nr_blocks + ctr_hi, ctr_lo := ctx._ctr_hi, ctx._ctr_lo + + tmp: [ct64.STRIDE][BLOCK_SIZE]byte = --- + ctrs: [ct64.STRIDE][]byte = --- + for i in 0 ..< ct64.STRIDE { + ctrs[i] = tmp[i][:] + } + for nr_blocks > 0 { + n := min(ct64.STRIDE, nr_blocks) + blocks := ctrs[:n] + + for i in 0 ..< n { + ctr_hi, ctr_lo = ct64_inc_ctr(blocks[i], ctr_hi, ctr_lo) + } + ct64.encrypt_blocks(impl, blocks, blocks) + + xor_blocks(dst, src, blocks) + + if src != nil { + src = src[n * BLOCK_SIZE:] + } + dst = dst[n * BLOCK_SIZE:] + nr_blocks -= n + } + + // Write back the counter. + ctx._ctr_hi, ctx._ctr_lo = ctr_hi, ctr_lo + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +@(private) +xor_blocks :: #force_inline proc "contextless" (dst, src: []byte, blocks: [][]byte) { + // Note: This would be faster `core:simd` was used, however if + // performance of this implementation matters to where that + // optimization would be worth it, use chacha20poly1305, or a + // CPU that isn't e-waste. + if src != nil { + #no_bounds_check { + for i in 0 ..< len(blocks) { + off := i * BLOCK_SIZE + for j in 0 ..< BLOCK_SIZE { + blocks[i][j] ~= src[off + j] + } + } + } + } + for i in 0 ..< len(blocks) { + copy(dst[i * BLOCK_SIZE:], blocks[i]) + } +} diff --git a/core/crypto/aes/aes_ecb.odin b/core/crypto/aes/aes_ecb.odin new file mode 100644 index 000000000..498429e29 --- /dev/null +++ b/core/crypto/aes/aes_ecb.odin @@ -0,0 +1,57 @@ +package aes + +import "core:crypto/_aes/ct64" + +// Context_ECB is a keyed AES-ECB instance. +// +// WARNING: Using ECB mode is strongly discouraged unless it is being +// used to implement higher level constructs. +Context_ECB :: struct { + _impl: Context_Impl, + _is_initialized: bool, +} + +// init_ecb initializes a Context_ECB with the provided key. +init_ecb :: proc(ctx: ^Context_ECB, key: []byte, impl := Implementation.Hardware) { + init_impl(&ctx._impl, key, impl) + ctx._is_initialized = true +} + +// encrypt_ecb encrypts the BLOCK_SIZE buffer src, and writes the result to dst. +encrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) { + assert(ctx._is_initialized) + + if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE { + panic("crypto/aes: invalid buffer size(s)") + } + + switch &impl in ctx._impl { + case ct64.Context: + ct64.encrypt_block(&impl, dst, src) + case Context_Impl_Hardware: + encrypt_block_hw(&impl, dst, src) + } +} + +// decrypt_ecb decrypts the BLOCK_SIZE buffer src, and writes the result to dst. +decrypt_ecb :: proc(ctx: ^Context_ECB, dst, src: []byte) { + assert(ctx._is_initialized) + + if len(dst) != BLOCK_SIZE || len(src) != BLOCK_SIZE { + panic("crypto/aes: invalid buffer size(s)") + } + + switch &impl in ctx._impl { + case ct64.Context: + ct64.decrypt_block(&impl, dst, src) + case Context_Impl_Hardware: + decrypt_block_hw(&impl, dst, src) + } +} + +// reset_ecb sanitizes the Context_ECB. The Context_ECB must be +// re-initialized to be used again. +reset_ecb :: proc "contextless" (ctx: ^Context_ECB) { + reset_impl(&ctx._impl) + ctx._is_initialized = false +} diff --git a/core/crypto/aes/aes_gcm.odin b/core/crypto/aes/aes_gcm.odin new file mode 100644 index 000000000..66ef48db2 --- /dev/null +++ b/core/crypto/aes/aes_gcm.odin @@ -0,0 +1,253 @@ +package aes + +import "core:crypto" +import "core:crypto/_aes" +import "core:crypto/_aes/ct64" +import "core:encoding/endian" +import "core:mem" + +// GCM_NONCE_SIZE is the size of the GCM nonce in bytes. +GCM_NONCE_SIZE :: 12 +// GCM_TAG_SIZE is the size of a GCM tag in bytes. +GCM_TAG_SIZE :: _aes.GHASH_TAG_SIZE + +@(private) +GCM_A_MAX :: max(u64) / 8 // 2^64 - 1 bits -> bytes +@(private) +GCM_P_MAX :: 0xfffffffe0 // 2^39 - 256 bits -> bytes + +// Context_GCM is a keyed AES-GCM instance. +Context_GCM :: struct { + _impl: Context_Impl, + _is_initialized: bool, +} + +// init_gcm initializes a Context_GCM with the provided key. +init_gcm :: proc(ctx: ^Context_GCM, key: []byte, impl := Implementation.Hardware) { + init_impl(&ctx._impl, key, impl) + ctx._is_initialized = true +} + +// seal_gcm encrypts the plaintext and authenticates the aad and ciphertext, +// with the provided Context_GCM and nonce, stores the output in dst and tag. +// +// dst and plaintext MUST alias exactly or not at all. +seal_gcm :: proc(ctx: ^Context_GCM, dst, tag, nonce, aad, plaintext: []byte) { + assert(ctx._is_initialized) + + gcm_validate_common_slice_sizes(tag, nonce, aad, plaintext) + if len(dst) != len(plaintext) { + panic("crypto/aes: invalid destination ciphertext size") + } + + if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw { + gcm_seal_hw(&impl, dst, tag, nonce, aad, plaintext) + return + } + + h: [_aes.GHASH_KEY_SIZE]byte + j0: [_aes.GHASH_BLOCK_SIZE]byte + s: [_aes.GHASH_TAG_SIZE]byte + init_ghash_ct64(ctx, &h, &j0, nonce) + + // Note: Our GHASH implementation handles appending padding. + ct64.ghash(s[:], h[:], aad) + gctr_ct64(ctx, dst, &s, plaintext, &h, nonce, true) + final_ghash_ct64(&s, &h, &j0, len(aad), len(plaintext)) + copy(tag, s[:]) + + mem.zero_explicit(&h, len(h)) + mem.zero_explicit(&j0, len(j0)) +} + +// open_gcm authenticates the aad and ciphertext, and decrypts the ciphertext, +// with the provided Context_GCM, nonce, and tag, and stores the output in dst, +// returning true iff the authentication was successful. If authentication +// fails, the destination buffer will be zeroed. +// +// dst and plaintext MUST alias exactly or not at all. +open_gcm :: proc(ctx: ^Context_GCM, dst, nonce, aad, ciphertext, tag: []byte) -> bool { + assert(ctx._is_initialized) + + gcm_validate_common_slice_sizes(tag, nonce, aad, ciphertext) + if len(dst) != len(ciphertext) { + panic("crypto/aes: invalid destination plaintext size") + } + + if impl, is_hw := ctx._impl.(Context_Impl_Hardware); is_hw { + return gcm_open_hw(&impl, dst, nonce, aad, ciphertext, tag) + } + + h: [_aes.GHASH_KEY_SIZE]byte + j0: [_aes.GHASH_BLOCK_SIZE]byte + s: [_aes.GHASH_TAG_SIZE]byte + init_ghash_ct64(ctx, &h, &j0, nonce) + + ct64.ghash(s[:], h[:], aad) + gctr_ct64(ctx, dst, &s, ciphertext, &h, nonce, false) + final_ghash_ct64(&s, &h, &j0, len(aad), len(ciphertext)) + + ok := crypto.compare_constant_time(s[:], tag) == 1 + if !ok { + mem.zero_explicit(raw_data(dst), len(dst)) + } + + mem.zero_explicit(&h, len(h)) + mem.zero_explicit(&j0, len(j0)) + mem.zero_explicit(&s, len(s)) + + return ok +} + +// reset_ctr sanitizes the Context_GCM. The Context_GCM must be +// re-initialized to be used again. +reset_gcm :: proc "contextless" (ctx: ^Context_GCM) { + reset_impl(&ctx._impl) + ctx._is_initialized = false +} + +@(private) +gcm_validate_common_slice_sizes :: proc(tag, nonce, aad, text: []byte) { + if len(tag) != GCM_TAG_SIZE { + panic("crypto/aes: invalid GCM tag size") + } + + // The specification supports nonces in the range [1, 2^64) bits + // however per NIST SP 800-38D 5.2.1.1: + // + // > For IVs, it is recommended that implementations restrict support + // > to the length of 96 bits, to promote interoperability, efficiency, + // > and simplicity of design. + if len(nonce) != GCM_NONCE_SIZE { + panic("crypto/aes: invalid GCM nonce size") + } + + if aad_len := u64(len(aad)); aad_len > GCM_A_MAX { + panic("crypto/aes: oversized GCM aad") + } + if text_len := u64(len(text)); text_len > GCM_P_MAX { + panic("crypto/aes: oversized GCM src data") + } +} + +@(private = "file") +init_ghash_ct64 :: proc( + ctx: ^Context_GCM, + h: ^[_aes.GHASH_KEY_SIZE]byte, + j0: ^[_aes.GHASH_BLOCK_SIZE]byte, + nonce: []byte, +) { + impl := &ctx._impl.(ct64.Context) + + // 1. Let H = CIPH(k, 0^128) + ct64.encrypt_block(impl, h[:], h[:]) + + // ECB encrypt j0, so that we can just XOR with the tag. In theory + // this could be processed along with the final GCTR block, to + // potentially save a call to AES-ECB, but... just use AES-NI. + copy(j0[:], nonce) + j0[_aes.GHASH_BLOCK_SIZE - 1] = 1 + ct64.encrypt_block(impl, j0[:], j0[:]) +} + +@(private = "file") +final_ghash_ct64 :: proc( + s: ^[_aes.GHASH_BLOCK_SIZE]byte, + h: ^[_aes.GHASH_KEY_SIZE]byte, + j0: ^[_aes.GHASH_BLOCK_SIZE]byte, + a_len: int, + t_len: int, +) { + blk: [_aes.GHASH_BLOCK_SIZE]byte + endian.unchecked_put_u64be(blk[0:], u64(a_len) * 8) + endian.unchecked_put_u64be(blk[8:], u64(t_len) * 8) + + ct64.ghash(s[:], h[:], blk[:]) + for i in 0 ..< len(s) { + s[i] ~= j0[i] + } +} + +@(private = "file") +gctr_ct64 :: proc( + ctx: ^Context_GCM, + dst: []byte, + s: ^[_aes.GHASH_BLOCK_SIZE]byte, + src: []byte, + h: ^[_aes.GHASH_KEY_SIZE]byte, + nonce: []byte, + is_seal: bool, +) { + ct64_inc_ctr32 := #force_inline proc "contextless" (dst: []byte, ctr: u32) -> u32 { + endian.unchecked_put_u32be(dst[12:], ctr) + return ctr + 1 + } + + // 2. Define a block J_0 as follows: + // if len(IV) = 96, then let J0 = IV || 0^31 || 1 + // + // Note: We only support 96 bit IVs. + tmp, tmp2: [ct64.STRIDE][BLOCK_SIZE]byte = ---, --- + ctrs, blks: [ct64.STRIDE][]byte = ---, --- + ctr: u32 = 2 + for i in 0 ..< ct64.STRIDE { + // Setup scratch space for the keystream. + blks[i] = tmp2[i][:] + + // Pre-copy the IV to all the counter blocks. + ctrs[i] = tmp[i][:] + copy(ctrs[i], nonce) + } + + // We stitch the GCTR and GHASH operations together, so that only + // one pass over the ciphertext is required. + + impl := &ctx._impl.(ct64.Context) + src, dst := src, dst + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks > 0 { + n := min(ct64.STRIDE, nr_blocks) + l := n * BLOCK_SIZE + + if !is_seal { + ct64.ghash(s[:], h[:], src[:l]) + } + + // The keystream is written to a separate buffer, as we will + // reuse the first 96-bits of each counter. + for i in 0 ..< n { + ctr = ct64_inc_ctr32(ctrs[i], ctr) + } + ct64.encrypt_blocks(impl, blks[:n], ctrs[:n]) + + xor_blocks(dst, src, blks[:n]) + + if is_seal { + ct64.ghash(s[:], h[:], dst[:l]) + } + + src = src[l:] + dst = dst[l:] + nr_blocks -= n + } + if l := len(src); l > 0 { + if !is_seal { + ct64.ghash(s[:], h[:], src[:l]) + } + + ct64_inc_ctr32(ctrs[0], ctr) + ct64.encrypt_block(impl, ctrs[0], ctrs[0]) + + for i in 0 ..< l { + dst[i] = src[i] ~ ctrs[0][i] + } + + if is_seal { + ct64.ghash(s[:], h[:], dst[:l]) + } + } + + mem.zero_explicit(&tmp, size_of(tmp)) + mem.zero_explicit(&tmp2, size_of(tmp2)) +} diff --git a/core/crypto/aes/aes_impl.odin b/core/crypto/aes/aes_impl.odin new file mode 100644 index 000000000..03747f1fb --- /dev/null +++ b/core/crypto/aes/aes_impl.odin @@ -0,0 +1,41 @@ +package aes + +import "core:crypto/_aes/ct64" +import "core:mem" +import "core:reflect" + +@(private) +Context_Impl :: union { + ct64.Context, + Context_Impl_Hardware, +} + +// Implementation is an AES implementation. Most callers will not need +// to use this as the package will automatically select the most performant +// implementation available (See `is_hardware_accelerated()`). +Implementation :: enum { + Portable, + Hardware, +} + +@(private) +init_impl :: proc(ctx: ^Context_Impl, key: []byte, impl: Implementation) { + impl := impl + if !is_hardware_accelerated() { + impl = .Portable + } + + switch impl { + case .Portable: + reflect.set_union_variant_typeid(ctx^, typeid_of(ct64.Context)) + ct64.init(&ctx.(ct64.Context), key) + case .Hardware: + reflect.set_union_variant_typeid(ctx^, typeid_of(Context_Impl_Hardware)) + init_impl_hw(&ctx.(Context_Impl_Hardware), key) + } +} + +@(private) +reset_impl :: proc "contextless" (ctx: ^Context_Impl) { + mem.zero_explicit(ctx, size_of(Context_Impl)) +} diff --git a/core/crypto/aes/aes_impl_hw_gen.odin b/core/crypto/aes/aes_impl_hw_gen.odin new file mode 100644 index 000000000..94815f61c --- /dev/null +++ b/core/crypto/aes/aes_impl_hw_gen.odin @@ -0,0 +1,43 @@ +package aes + +@(private = "file") +ERR_HW_NOT_SUPPORTED :: "crypto/aes: hardware implementation unsupported" + +// is_hardware_accelerated returns true iff hardware accelerated AES +// is supported. +is_hardware_accelerated :: proc "contextless" () -> bool { + return false +} + +@(private) +Context_Impl_Hardware :: struct {} + +@(private) +init_impl_hw :: proc(ctx: ^Context_Impl_Hardware, key: []byte) { + panic(ERR_HW_NOT_SUPPORTED) +} + +@(private) +encrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) { + panic(ERR_HW_NOT_SUPPORTED) +} + +@(private) +decrypt_block_hw :: proc(ctx: ^Context_Impl_Hardware, dst, src: []byte) { + panic(ERR_HW_NOT_SUPPORTED) +} + +@(private) +ctr_blocks_hw :: proc(ctx: ^Context_CTR, dst, src: []byte, nr_blocks: int) { + panic(ERR_HW_NOT_SUPPORTED) +} + +@(private) +gcm_seal_hw :: proc(ctx: ^Context_Impl_Hardware, dst, tag, nonce, aad, plaintext: []byte) { + panic(ERR_HW_NOT_SUPPORTED) +} + +@(private) +gcm_open_hw :: proc(ctx: ^Context_Impl_Hardware, dst, nonce, aad, ciphertext, tag: []byte) -> bool { + panic(ERR_HW_NOT_SUPPORTED) +} diff --git a/core/simd/x86/aes.odin b/core/simd/x86/aes.odin new file mode 100644 index 000000000..3a32de0d6 --- /dev/null +++ b/core/simd/x86/aes.odin @@ -0,0 +1,49 @@ +//+build i386, amd64 +package simd_x86 + +@(require_results, enable_target_feature = "aes") +_mm_aesdec :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return aesdec(a, b) +} + +@(require_results, enable_target_feature = "aes") +_mm_aesdeclast :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return aesdeclast(a, b) +} + +@(require_results, enable_target_feature = "aes") +_mm_aesenc :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return aesenc(a, b) +} + +@(require_results, enable_target_feature = "aes") +_mm_aesenclast :: #force_inline proc "c" (a, b: __m128i) -> __m128i { + return aesenclast(a, b) +} + +@(require_results, enable_target_feature = "aes") +_mm_aesimc :: #force_inline proc "c" (a: __m128i) -> __m128i { + return aesimc(a) +} + +@(require_results, enable_target_feature = "aes") +_mm_aeskeygenassist :: #force_inline proc "c" (a: __m128i, $IMM8: u8) -> __m128i { + return aeskeygenassist(a, u8(IMM8)) +} + + +@(private, default_calling_convention = "none") +foreign _ { + @(link_name = "llvm.x86.aesni.aesdec") + aesdec :: proc(a, b: __m128i) -> __m128i --- + @(link_name = "llvm.x86.aesni.aesdeclast") + aesdeclast :: proc(a, b: __m128i) -> __m128i --- + @(link_name = "llvm.x86.aesni.aesenc") + aesenc :: proc(a, b: __m128i) -> __m128i --- + @(link_name = "llvm.x86.aesni.aesenclast") + aesenclast :: proc(a, b: __m128i) -> __m128i --- + @(link_name = "llvm.x86.aesni.aesimc") + aesimc :: proc(a: __m128i) -> __m128i --- + @(link_name = "llvm.x86.aesni.aeskeygenassist") + aeskeygenassist :: proc(a: __m128i, imm8: u8) -> __m128i --- +} diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index 1077df1ae..6c3972987 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -25,6 +25,7 @@ import rbtree "core:container/rbtree" import topological_sort "core:container/topological_sort" import crypto "core:crypto" +import aes "core:crypto/aes" import blake2b "core:crypto/blake2b" import blake2s "core:crypto/blake2s" import chacha20 "core:crypto/chacha20" @@ -150,6 +151,7 @@ _ :: rbtree _ :: topological_sort _ :: crypto _ :: crypto_hash +_ :: aes _ :: blake2b _ :: blake2s _ :: chacha20 diff --git a/tests/core/crypto/test_core_crypto.odin b/tests/core/crypto/test_core_crypto.odin index 95db3f292..1f7bcece3 100644 --- a/tests/core/crypto/test_core_crypto.odin +++ b/tests/core/crypto/test_core_crypto.odin @@ -33,6 +33,7 @@ main :: proc() { test_kdf(&t) // After hash/mac tests because those should pass first. test_ecc25519(&t) + test_aes(&t) test_chacha20(&t) test_chacha20poly1305(&t) test_sha3_variants(&t) diff --git a/tests/core/crypto/test_core_crypto_aes.odin b/tests/core/crypto/test_core_crypto_aes.odin new file mode 100644 index 000000000..f33292a53 --- /dev/null +++ b/tests/core/crypto/test_core_crypto_aes.odin @@ -0,0 +1,462 @@ +package test_core_crypto + +import "base:runtime" +import "core:encoding/hex" +import "core:fmt" +import "core:testing" + +import "core:crypto/aes" +import "core:crypto/sha2" + +import tc "tests:common" + +@(test) +test_aes :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + tc.log(t, "Testing AES") + + impls := make([dynamic]aes.Implementation, 0, 2) + append(&impls, aes.Implementation.Portable) + if aes.is_hardware_accelerated() { + append(&impls, aes.Implementation.Hardware) + } + + for impl in impls { + test_aes_ecb(t, impl) + test_aes_ctr(t, impl) + test_aes_gcm(t, impl) + } +} + +@(test) +test_aes_ecb :: proc(t: ^testing.T, impl: aes.Implementation) { + tc.log(t, fmt.tprintf("Testing AES-ECB/%v", impl)) + + test_vectors := []struct { + key: string, + plaintext: string, + ciphertext: string, + } { + // http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf + { + "2b7e151628aed2a6abf7158809cf4f3c", + "6bc1bee22e409f96e93d7e117393172a", + "3ad77bb40d7a3660a89ecaf32466ef97", + }, + { + "2b7e151628aed2a6abf7158809cf4f3c", + "ae2d8a571e03ac9c9eb76fac45af8e51", + "f5d3d58503b9699de785895a96fdbaaf", + }, + { + "2b7e151628aed2a6abf7158809cf4f3c", + "30c81c46a35ce411e5fbc1191a0a52ef", + "43b1cd7f598ece23881b00e3ed030688", + }, + { + "2b7e151628aed2a6abf7158809cf4f3c", + "f69f2445df4f9b17ad2b417be66c3710", + "7b0c785e27e8ad3f8223207104725dd4", + }, + { + "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", + "6bc1bee22e409f96e93d7e117393172a", + "bd334f1d6e45f25ff712a214571fa5cc", + }, + { + "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", + "ae2d8a571e03ac9c9eb76fac45af8e51", + "974104846d0ad3ad7734ecb3ecee4eef", + }, + { + "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", + "30c81c46a35ce411e5fbc1191a0a52ef", + "ef7afd2270e2e60adce0ba2face6444e", + }, + { + "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", + "f69f2445df4f9b17ad2b417be66c3710", + "9a4b41ba738d6c72fb16691603c18e0e", + }, + { + "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", + "6bc1bee22e409f96e93d7e117393172a", + "f3eed1bdb5d2a03c064b5a7e3db181f8", + }, + { + "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", + "ae2d8a571e03ac9c9eb76fac45af8e51", + "591ccb10d410ed26dc5ba74a31362870", + }, + { + "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", + "30c81c46a35ce411e5fbc1191a0a52ef", + "b6ed21b99ca6f4f9f153e7b1beafed1d", + }, + { + "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", + "f69f2445df4f9b17ad2b417be66c3710", + "23304b7a39f9f3ff067d8d8f9e24ecc7", + }, + } + for v, _ in test_vectors { + key, _ := hex.decode(transmute([]byte)(v.key), context.temp_allocator) + plaintext, _ := hex.decode(transmute([]byte)(v.plaintext), context.temp_allocator) + ciphertext, _ := hex.decode(transmute([]byte)(v.ciphertext), context.temp_allocator) + + ctx: aes.Context_ECB + dst: [aes.BLOCK_SIZE]byte + aes.init_ecb(&ctx, key, impl) + + aes.encrypt_ecb(&ctx, dst[:], plaintext) + dst_str := string(hex.encode(dst[:], context.temp_allocator)) + tc.expect( + t, + dst_str == v.ciphertext, + fmt.tprintf( + "AES-ECB/%v: Expected: %s for encrypt(%s, %s), but got %s instead", + impl, + v.ciphertext, + v.key, + v.plaintext, + dst_str, + ), + ) + + aes.decrypt_ecb(&ctx, dst[:], ciphertext) + dst_str = string(hex.encode(dst[:], context.temp_allocator)) + tc.expect( + t, + dst_str == v.plaintext, + fmt.tprintf( + "AES-ECB/%v: Expected: %s for decrypt(%s, %s), but got %s instead", + impl, + v.plaintext, + v.key, + v.ciphertext, + dst_str, + ), + ) + } +} + +@(test) +test_aes_ctr :: proc(t: ^testing.T, impl: aes.Implementation) { + tc.log(t, fmt.tprintf("Testing AES-CTR/%v", impl)) + + test_vectors := []struct { + key: string, + iv: string, + plaintext: string, + ciphertext: string, + } { + // http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf + { + "2b7e151628aed2a6abf7158809cf4f3c", + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff", + "6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710", + "874d6191b620e3261bef6864990db6ce9806f66b7970fdff8617187bb9fffdff5ae4df3edbd5d35e5b4f09020db03eab1e031dda2fbe03d1792170a0f3009cee", + }, + { + "8e73b0f7da0e6452c810f32b809079e562f8ead2522c6b7b", + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff", + "6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710", + "1abc932417521ca24f2b0459fe7e6e0b090339ec0aa6faefd5ccc2c6f4ce8e941e36b26bd1ebc670d1bd1d665620abf74f78a7f6d29809585a97daec58c6b050", + }, + { + "603deb1015ca71be2b73aef0857d77811f352c073b6108d72d9810a30914dff4", + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff", + "6bc1bee22e409f96e93d7e117393172aae2d8a571e03ac9c9eb76fac45af8e5130c81c46a35ce411e5fbc1191a0a52eff69f2445df4f9b17ad2b417be66c3710", + "601ec313775789a5b7a7f504bbf3d228f443e3ca4d62b59aca84e990cacaf5c52b0930daa23de94ce87017ba2d84988ddfc9c58db67aada613c2dd08457941a6", + }, + } + for v, _ in test_vectors { + key, _ := hex.decode(transmute([]byte)(v.key), context.temp_allocator) + iv, _ := hex.decode(transmute([]byte)(v.iv), context.temp_allocator) + plaintext, _ := hex.decode(transmute([]byte)(v.plaintext), context.temp_allocator) + ciphertext, _ := hex.decode(transmute([]byte)(v.ciphertext), context.temp_allocator) + + dst := make([]byte, len(ciphertext), context.temp_allocator) + + ctx: aes.Context_CTR + aes.init_ctr(&ctx, key, iv, impl) + + aes.xor_bytes_ctr(&ctx, dst, plaintext) + + dst_str := string(hex.encode(dst[:], context.temp_allocator)) + tc.expect( + t, + dst_str == v.ciphertext, + fmt.tprintf( + "AES-CTR/%v: Expected: %s for encrypt(%s, %s, %s), but got %s instead", + impl, + v.ciphertext, + v.key, + v.iv, + v.plaintext, + dst_str, + ), + ) + } + + // Incrementally read 1, 2, 3, ..., 2048 bytes of keystream, and + // compare the SHA-512/256 digest with a known value. Results + // and testcase taken from a known good implementation. + + tmp := make([]byte, 2048, context.temp_allocator) + + ctx: aes.Context_CTR + key: [aes.KEY_SIZE_256]byte + nonce: [aes.CTR_IV_SIZE]byte + aes.init_ctr(&ctx, key[:], nonce[:]) + + h_ctx: sha2.Context_512 + sha2.init_512_256(&h_ctx) + + for i := 1; i < 2048; i = i + 1 { + aes.keystream_bytes_ctr(&ctx, tmp[:i]) + sha2.update(&h_ctx, tmp[:i]) + } + + digest: [32]byte + sha2.final(&h_ctx, digest[:]) + digest_str := string(hex.encode(digest[:], context.temp_allocator)) + + expected_digest_str := "d4445343afeb9d1237f95b10d00358aed4c1d7d57c9fe480cd0afb5e2ffd448c" + tc.expect( + t, + expected_digest_str == digest_str, + fmt.tprintf( + "AES-CTR/%v: Expected %s for keystream digest, but got %s instead", + impl, + expected_digest_str, + digest_str, + ), + ) +} + +@(test) +test_aes_gcm :: proc(t: ^testing.T, impl: aes.Implementation) { + tc.log(t, fmt.tprintf("Testing AES-GCM/%v", impl)) + + // NIST did a reorg of their site, so the source of the test vectors + // is only available from an archive. The commented out tests are + // for non-96-bit IVs which our implementation does not support. + // + // https://csrc.nist.rip/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf + test_vectors := []struct { + key: string, + iv: string, + aad: string, + plaintext: string, + ciphertext: string, + tag: string, + } { + { + "00000000000000000000000000000000", + "000000000000000000000000", + "", + "", + "", + "58e2fccefa7e3061367f1d57a4e7455a", + }, + { + "00000000000000000000000000000000", + "000000000000000000000000", + "", + "00000000000000000000000000000000", + "0388dace60b6a392f328c2b971b2fe78", + "ab6e47d42cec13bdf53a67b21257bddf", + }, + { + "feffe9928665731c6d6a8f9467308308", + "cafebabefacedbaddecaf888", + "", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255", + "42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091473f5985", + "4d5c2af327cd64a62cf35abd2ba6fab4", + }, + { + "feffe9928665731c6d6a8f9467308308", + "cafebabefacedbaddecaf888", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "42831ec2217774244b7221b784d0d49ce3aa212f2c02a4e035c17e2329aca12e21d514b25466931c7d8f6a5aac84aa051ba30b396a0aac973d58e091", + "5bc94fbc3221a5db94fae95ae7121a47", + }, + /* + { + "feffe9928665731c6d6a8f9467308308", + "cafebabefacedbad", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "61353b4c2806934a777ff51fa22a4755699b2a714fcdc6f83766e5f97b6c742373806900e49f24b22b097544d4896b424989b5e1ebac0f07c23f4598", + "3612d2e79e3b0785561be14aaca2fccb", + }, + { + "feffe9928665731c6d6a8f9467308308", + "9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "8ce24998625615b603a033aca13fb894be9112a5c3a211a8ba262a3cca7e2ca701e4a9a4fba43c90ccdcb281d48c7c6fd62875d2aca417034c34aee5", + "619cc5aefffe0bfa462af43c1699d050", + }, + */ + { + "000000000000000000000000000000000000000000000000", + "000000000000000000000000", + "", + "", + "", + "cd33b28ac773f74ba00ed1f312572435", + }, + { + "000000000000000000000000000000000000000000000000", + "000000000000000000000000", + "", + "00000000000000000000000000000000", + "98e7247c07f0fe411c267e4384b0f600", + "2ff58d80033927ab8ef4d4587514f0fb", + }, + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c", + "cafebabefacedbaddecaf888", + "", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255", + "3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710acade256", + "9924a7c8587336bfb118024db8674a14", + }, + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c", + "cafebabefacedbaddecaf888", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "3980ca0b3c00e841eb06fac4872a2757859e1ceaa6efd984628593b40ca1e19c7d773d00c144c525ac619d18c84a3f4718e2448b2fe324d9ccda2710", + "2519498e80f1478f37ba55bd6d27618c", + }, + /* + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c", + "cafebabefacedbad", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "0f10f599ae14a154ed24b36e25324db8c566632ef2bbb34f8347280fc4507057fddc29df9a471f75c66541d4d4dad1c9e93a19a58e8b473fa0f062f7", + "65dcc57fcf623a24094fcca40d3533f8", + }, + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c", + "9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "d27e88681ce3243c4830165a8fdcf9ff1de9a1d8e6b447ef6ef7b79828666e4581e79012af34ddd9e2f037589b292db3e67c036745fa22e7e9b7373b", + "dcf566ff291c25bbb8568fc3d376a6d9", + }, + */ + { + "0000000000000000000000000000000000000000000000000000000000000000", + "000000000000000000000000", + "", + "", + "", + "530f8afbc74536b9a963b4f1c4cb738b", + }, + { + "0000000000000000000000000000000000000000000000000000000000000000", + "000000000000000000000000", + "", + "00000000000000000000000000000000", + "cea7403d4d606b6e074ec5d3baf39d18", + "d0d1c8a799996bf0265b98b5d48ab919", + }, + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308", + "cafebabefacedbaddecaf888", + "", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b391aafd255", + "522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662898015ad", + "b094dac5d93471bdec1a502270e3cc6c", + }, + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308", + "cafebabefacedbaddecaf888", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "522dc1f099567d07f47f37a32a84427d643a8cdcbfe5c0c97598a2bd2555d1aa8cb08e48590dbb3da7b08b1056828838c5f61e6393ba7a0abcc9f662", + "76fc6ece0f4e1768cddf8853bb2d551b", + }, + /* + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308", + "cafebabefacedbad", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "c3762df1ca787d32ae47c13bf19844cbaf1ae14d0b976afac52ff7d79bba9de0feb582d33934a4f0954cc2363bc73f7862ac430e64abe499f47c9b1f", + "3a337dbf46a792c45e454913fe2ea8f2", + }, + { + "feffe9928665731c6d6a8f9467308308feffe9928665731c6d6a8f9467308308", + "9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b", + "feedfacedeadbeeffeedfacedeadbeefabaddad2", + "d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39", + "5a8def2f0c9e53f1f75d7853659e2a20eeb2b22aafde6419a058ab4f6f746bf40fc0c3b780f244452da3ebf1c5d82cdea2418997200ef82e44ae7e3f", + "a44a8266ee1c8eb0c8b5d4cf5ae9f19a", + }, + */ + } + for v, _ in test_vectors { + key, _ := hex.decode(transmute([]byte)(v.key), context.temp_allocator) + iv, _ := hex.decode(transmute([]byte)(v.iv), context.temp_allocator) + aad, _ := hex.decode(transmute([]byte)(v.aad), context.temp_allocator) + plaintext, _ := hex.decode(transmute([]byte)(v.plaintext), context.temp_allocator) + ciphertext, _ := hex.decode(transmute([]byte)(v.ciphertext), context.temp_allocator) + tag, _ := hex.decode(transmute([]byte)(v.tag), context.temp_allocator) + + tag_ := make([]byte, len(tag), context.temp_allocator) + dst := make([]byte, len(ciphertext), context.temp_allocator) + + ctx: aes.Context_GCM + aes.init_gcm(&ctx, key, impl) + + aes.seal_gcm(&ctx, dst, tag_, iv, aad, plaintext) + dst_str := string(hex.encode(dst[:], context.temp_allocator)) + tag_str := string(hex.encode(tag_[:], context.temp_allocator)) + + tc.expect( + t, + dst_str == v.ciphertext && tag_str == v.tag, + fmt.tprintf( + "AES-GCM/%v: Expected: (%s, %s) for seal(%s, %s, %s, %s), but got (%s, %s) instead", + impl, + v.ciphertext, + v.tag, + v.key, + v.iv, + v.aad, + v.plaintext, + dst_str, + tag_str, + ), + ) + + ok := aes.open_gcm(&ctx, dst, iv, aad, ciphertext, tag) + dst_str = string(hex.encode(dst[:], context.temp_allocator)) + + tc.expect( + t, + ok && dst_str == v.plaintext, + fmt.tprintf( + "AES-GCM/%v: Expected: (%s, true) for open(%s, %s, %s, %s, %s), but got (%s, %s) instead", + impl, + v.plaintext, + v.key, + v.iv, + v.aad, + v.ciphertext, + v.tag, + dst_str, + ok, + ), + ) + } +} diff --git a/tests/core/crypto/test_crypto_benchmark.odin b/tests/core/crypto/test_crypto_benchmark.odin index cc69cb16d..600dc9ade 100644 --- a/tests/core/crypto/test_crypto_benchmark.odin +++ b/tests/core/crypto/test_crypto_benchmark.odin @@ -6,6 +6,7 @@ import "core:fmt" import "core:testing" import "core:time" +import "core:crypto/aes" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" import "core:crypto/ed25519" @@ -25,6 +26,7 @@ bench_crypto :: proc(t: ^testing.T) { bench_chacha20(t) bench_poly1305(t) bench_chacha20poly1305(t) + bench_aes256_gcm(t) bench_ed25519(t) bench_x25519(t) } @@ -134,6 +136,26 @@ _benchmark_chacha20poly1305 :: proc( return nil } +_benchmark_aes256_gcm :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + buf := options.input + nonce: [aes.GCM_NONCE_SIZE]byte + tag: [aes.GCM_TAG_SIZE]byte = --- + + ctx := transmute(^aes.Context_GCM)context.user_ptr + + for _ in 0 ..= options.rounds { + aes.seal_gcm(ctx, buf, tag[:], nonce[:], nil, buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + return nil +} + benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) { fmt.printf( "\t[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n", @@ -221,6 +243,44 @@ bench_chacha20poly1305 :: proc(t: ^testing.T) { benchmark_print(name, options) } +bench_aes256_gcm :: proc(t: ^testing.T) { + name := "AES256-GCM 64 bytes" + options := &time.Benchmark_Options { + rounds = 1_000, + bytes = 64, + setup = _setup_sized_buf, + bench = _benchmark_aes256_gcm, + teardown = _teardown_sized_buf, + } + + key := [aes.KEY_SIZE_256]byte { + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + } + ctx: aes.Context_GCM + aes.init_gcm(&ctx, key[:]) + + context.user_ptr = &ctx + + err := time.benchmark(options, context.allocator) + tc.expect(t, err == nil, name) + benchmark_print(name, options) + + name = "AES256-GCM 1024 bytes" + options.bytes = 1024 + err = time.benchmark(options, context.allocator) + tc.expect(t, err == nil, name) + benchmark_print(name, options) + + name = "AES256-GCM 65536 bytes" + options.bytes = 65536 + err = time.benchmark(options, context.allocator) + tc.expect(t, err == nil, name) + benchmark_print(name, options) +} + bench_ed25519 :: proc(t: ^testing.T) { iters :: 10000