mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-05 18:24:06 +00:00
core/crypto/aegis: Initial import
This commit is contained in:
@@ -210,11 +210,8 @@ orthogonalize :: proc "contextless" (q: ^[8]u64) {
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
interleave_in :: proc "contextless" (w: []u32) -> (q0, q1: u64) #no_bounds_check {
|
||||
if len(w) < 4 {
|
||||
panic_contextless("aes/ct64: invalid input size")
|
||||
}
|
||||
x0, x1, x2, x3 := u64(w[0]), u64(w[1]), u64(w[2]), u64(w[3])
|
||||
interleave_in :: proc "contextless" (w0, w1, w2, w3: u32) -> (q0, q1: u64) #no_bounds_check {
|
||||
x0, x1, x2, x3 := u64(w0), u64(w1), u64(w2), u64(w3)
|
||||
x0 |= (x0 << 16)
|
||||
x1 |= (x1 << 16)
|
||||
x2 |= (x2 << 16)
|
||||
|
||||
@@ -77,7 +77,7 @@ keysched :: proc(comp_skey: []u64, key: []byte) -> int {
|
||||
|
||||
q: [8]u64 = ---
|
||||
for i, j := 0, 0; i < nkf; i, j = i + 4, j + 2 {
|
||||
q[0], q[4] = interleave_in(skey[i:])
|
||||
q[0], q[4] = interleave_in(skey[i], skey[i+1], skey[i+2], skey[i+3])
|
||||
q[1] = q[0]
|
||||
q[2] = q[0]
|
||||
q[3] = q[0]
|
||||
@@ -122,57 +122,3 @@ skey_expand :: proc "contextless" (skey, comp_skey: []u64, num_rounds: int) {
|
||||
skey[v + 3] = (x3 << 4) - x3
|
||||
}
|
||||
}
|
||||
|
||||
orthogonalize_roundkey :: proc "contextless" (qq: []u64, key: []byte) {
|
||||
if len(qq) < 8 || len(key) != 16 {
|
||||
panic_contextless("aes/ct64: invalid round key size")
|
||||
}
|
||||
|
||||
skey: [4]u32 = ---
|
||||
skey[0] = endian.unchecked_get_u32le(key[0:])
|
||||
skey[1] = endian.unchecked_get_u32le(key[4:])
|
||||
skey[2] = endian.unchecked_get_u32le(key[8:])
|
||||
skey[3] = endian.unchecked_get_u32le(key[12:])
|
||||
|
||||
q: [8]u64 = ---
|
||||
q[0], q[4] = interleave_in(skey[:])
|
||||
q[1] = q[0]
|
||||
q[2] = q[0]
|
||||
q[3] = q[0]
|
||||
q[5] = q[4]
|
||||
q[6] = q[4]
|
||||
q[7] = q[4]
|
||||
orthogonalize(&q)
|
||||
|
||||
comp_skey: [2]u64 = ---
|
||||
comp_skey[0] =
|
||||
(q[0] & 0x1111111111111111) |
|
||||
(q[1] & 0x2222222222222222) |
|
||||
(q[2] & 0x4444444444444444) |
|
||||
(q[3] & 0x8888888888888888)
|
||||
comp_skey[1] =
|
||||
(q[4] & 0x1111111111111111) |
|
||||
(q[5] & 0x2222222222222222) |
|
||||
(q[6] & 0x4444444444444444) |
|
||||
(q[7] & 0x8888888888888888)
|
||||
|
||||
for x, u in comp_skey {
|
||||
x0 := x
|
||||
x1, x2, x3 := x0, x0, x0
|
||||
x0 &= 0x1111111111111111
|
||||
x1 &= 0x2222222222222222
|
||||
x2 &= 0x4444444444444444
|
||||
x3 &= 0x8888888888888888
|
||||
x1 >>= 1
|
||||
x2 >>= 2
|
||||
x3 >>= 3
|
||||
qq[u * 4 + 0] = (x0 << 4) - x0
|
||||
qq[u * 4 + 1] = (x1 << 4) - x1
|
||||
qq[u * 4 + 2] = (x2 << 4) - x2
|
||||
qq[u * 4 + 3] = (x3 << 4) - x3
|
||||
}
|
||||
|
||||
mem.zero_explicit(&skey, size_of(skey))
|
||||
mem.zero_explicit(&q, size_of(q))
|
||||
mem.zero_explicit(&comp_skey, size_of(comp_skey))
|
||||
}
|
||||
|
||||
@@ -3,17 +3,39 @@ package aes_ct64
|
||||
import "core:crypto/_aes"
|
||||
import "core:encoding/endian"
|
||||
|
||||
@(require_results)
|
||||
load_interleaved :: proc "contextless" (src: []byte) -> (u64, u64) {
|
||||
w0 := endian.unchecked_get_u32le(src[0:])
|
||||
w1 := endian.unchecked_get_u32le(src[4:])
|
||||
w2 := endian.unchecked_get_u32le(src[8:])
|
||||
w3 := endian.unchecked_get_u32le(src[12:])
|
||||
return interleave_in(w0, w1, w2, w3)
|
||||
}
|
||||
|
||||
store_interleaved :: proc "contextless" (dst: []byte, a0, a1: u64) {
|
||||
w0, w1, w2, w3 := interleave_out(a0, a1)
|
||||
endian.unchecked_put_u32le(dst[0:], w0)
|
||||
endian.unchecked_put_u32le(dst[4:], w1)
|
||||
endian.unchecked_put_u32le(dst[8:], w2)
|
||||
endian.unchecked_put_u32le(dst[12:], w3)
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
xor_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
|
||||
return a0 ~ b0, a1 ~ b1
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
and_interleaved :: #force_inline proc "contextless" (a0, a1, b0, b1: u64) -> (u64, u64) {
|
||||
return a0 & b0, a1 & b1
|
||||
}
|
||||
|
||||
load_blockx1 :: proc "contextless" (q: ^[8]u64, src: []byte) {
|
||||
if len(src) != _aes.BLOCK_SIZE {
|
||||
panic_contextless("aes/ct64: invalid block size")
|
||||
}
|
||||
|
||||
w: [4]u32 = ---
|
||||
w[0] = endian.unchecked_get_u32le(src[0:])
|
||||
w[1] = endian.unchecked_get_u32le(src[4:])
|
||||
w[2] = endian.unchecked_get_u32le(src[8:])
|
||||
w[3] = endian.unchecked_get_u32le(src[12:])
|
||||
q[0], q[4] = interleave_in(w[:])
|
||||
q[0], q[4] = #force_inline load_interleaved(src)
|
||||
orthogonalize(q)
|
||||
}
|
||||
|
||||
@@ -23,11 +45,7 @@ store_blockx1 :: proc "contextless" (dst: []byte, q: ^[8]u64) {
|
||||
}
|
||||
|
||||
orthogonalize(q)
|
||||
w0, w1, w2, w3 := interleave_out(q[0], q[4])
|
||||
endian.unchecked_put_u32le(dst[0:], w0)
|
||||
endian.unchecked_put_u32le(dst[4:], w1)
|
||||
endian.unchecked_put_u32le(dst[8:], w2)
|
||||
endian.unchecked_put_u32le(dst[12:], w3)
|
||||
#force_inline store_interleaved(dst, q[0], q[4])
|
||||
}
|
||||
|
||||
load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
|
||||
@@ -35,17 +53,11 @@ load_blocks :: proc "contextless" (q: ^[8]u64, src: [][]byte) {
|
||||
panic_contextless("aes/ct64: invalid block(s) size")
|
||||
}
|
||||
|
||||
w: [4]u32 = ---
|
||||
for s, i in src {
|
||||
if len(s) != _aes.BLOCK_SIZE {
|
||||
panic_contextless("aes/ct64: invalid block size")
|
||||
}
|
||||
|
||||
w[0] = endian.unchecked_get_u32le(s[0:])
|
||||
w[1] = endian.unchecked_get_u32le(s[4:])
|
||||
w[2] = endian.unchecked_get_u32le(s[8:])
|
||||
w[3] = endian.unchecked_get_u32le(s[12:])
|
||||
q[i], q[i + 4] = interleave_in(w[:])
|
||||
q[i], q[i + 4] = #force_inline load_interleaved(s)
|
||||
}
|
||||
orthogonalize(q)
|
||||
}
|
||||
@@ -64,11 +76,6 @@ store_blocks :: proc "contextless" (dst: [][]byte, q: ^[8]u64) {
|
||||
if len(d) != _aes.BLOCK_SIZE {
|
||||
panic_contextless("aes/ct64: invalid block size")
|
||||
}
|
||||
|
||||
w0, w1, w2, w3 := interleave_out(q[i], q[i + 4])
|
||||
endian.unchecked_put_u32le(d[0:], w0)
|
||||
endian.unchecked_put_u32le(d[4:], w1)
|
||||
endian.unchecked_put_u32le(d[8:], w2)
|
||||
endian.unchecked_put_u32le(d[12:], w3)
|
||||
#force_inline store_interleaved(d, q[i], q[i + 4])
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package aead
|
||||
|
||||
import "core:crypto/aegis"
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
import "core:crypto/chacha20poly1305"
|
||||
@@ -15,7 +16,7 @@ Implementation :: union {
|
||||
|
||||
// MAX_TAG_SIZE is the maximum size tag that can be returned by any of the
|
||||
// Algorithms supported via this package.
|
||||
MAX_TAG_SIZE :: 16
|
||||
MAX_TAG_SIZE :: 32
|
||||
|
||||
// Algorithm is the algorithm identifier associated with a given Context.
|
||||
Algorithm :: enum {
|
||||
@@ -25,9 +26,13 @@ Algorithm :: enum {
|
||||
AES_GCM_256,
|
||||
CHACHA20POLY1305,
|
||||
XCHACHA20POLY1305,
|
||||
AEGIS_128L,
|
||||
AEGIS_128L_256, // AEGIS-128L (256-bit tag)
|
||||
AEGIS_256,
|
||||
AEGIS_256_256, // AEGIS-256 (256-bit tag)
|
||||
}
|
||||
|
||||
// ALGORITM_NAMES is the Agorithm to algorithm name string.
|
||||
// ALGORITM_NAMES is the Algorithm to algorithm name string.
|
||||
ALGORITHM_NAMES := [Algorithm]string {
|
||||
.Invalid = "Invalid",
|
||||
.AES_GCM_128 = "AES-GCM-128",
|
||||
@@ -35,6 +40,10 @@ ALGORITHM_NAMES := [Algorithm]string {
|
||||
.AES_GCM_256 = "AES-GCM-256",
|
||||
.CHACHA20POLY1305 = "chacha20poly1305",
|
||||
.XCHACHA20POLY1305 = "xchacha20poly1305",
|
||||
.AEGIS_128L = "AEGIS-128L",
|
||||
.AEGIS_128L_256 = "AEGIS-128L-256",
|
||||
.AEGIS_256 = "AEGIS-256",
|
||||
.AEGIS_256_256 = "AEGIS-256-256",
|
||||
}
|
||||
|
||||
// TAG_SIZES is the Algorithm to tag size in bytes.
|
||||
@@ -45,6 +54,10 @@ TAG_SIZES := [Algorithm]int {
|
||||
.AES_GCM_256 = aes.GCM_TAG_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.TAG_SIZE,
|
||||
.AEGIS_128L = aegis.TAG_SIZE_128,
|
||||
.AEGIS_128L_256 = aegis.TAG_SIZE_256,
|
||||
.AEGIS_256 = aegis.TAG_SIZE_128,
|
||||
.AEGIS_256_256 = aegis.TAG_SIZE_256,
|
||||
}
|
||||
|
||||
// KEY_SIZES is the Algorithm to key size in bytes.
|
||||
@@ -55,6 +68,10 @@ KEY_SIZES := [Algorithm]int {
|
||||
.AES_GCM_256 = aes.KEY_SIZE_256,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.KEY_SIZE,
|
||||
.AEGIS_128L = aegis.KEY_SIZE_128L,
|
||||
.AEGIS_128L_256 = aegis.KEY_SIZE_128L,
|
||||
.AEGIS_256 = aegis.KEY_SIZE_256,
|
||||
.AEGIS_256_256 = aegis.KEY_SIZE_256,
|
||||
}
|
||||
|
||||
// IV_SIZES is the Algorithm to initialization vector size in bytes.
|
||||
@@ -67,6 +84,10 @@ IV_SIZES := [Algorithm]int {
|
||||
.AES_GCM_256 = aes.GCM_IV_SIZE,
|
||||
.CHACHA20POLY1305 = chacha20poly1305.IV_SIZE,
|
||||
.XCHACHA20POLY1305 = chacha20poly1305.XIV_SIZE,
|
||||
.AEGIS_128L = aegis.IV_SIZE_128L,
|
||||
.AEGIS_128L_256 = aegis.IV_SIZE_128L,
|
||||
.AEGIS_256 = aegis.IV_SIZE_256,
|
||||
.AEGIS_256_256 = aegis.IV_SIZE_256,
|
||||
}
|
||||
|
||||
// Context is a concrete instantiation of a specific AEAD algorithm.
|
||||
@@ -75,6 +96,7 @@ Context :: struct {
|
||||
_impl: union {
|
||||
aes.Context_GCM,
|
||||
chacha20poly1305.Context,
|
||||
aegis.Context,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -86,6 +108,10 @@ _IMPL_IDS := [Algorithm]typeid {
|
||||
.AES_GCM_256 = typeid_of(aes.Context_GCM),
|
||||
.CHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
.XCHACHA20POLY1305 = typeid_of(chacha20poly1305.Context),
|
||||
.AEGIS_128L = typeid_of(aegis.Context),
|
||||
.AEGIS_128L_256 = typeid_of(aegis.Context),
|
||||
.AEGIS_256 = typeid_of(aegis.Context),
|
||||
.AEGIS_256_256 = typeid_of(aegis.Context),
|
||||
}
|
||||
|
||||
// init initializes a Context with a specific AEAD Algorithm.
|
||||
@@ -113,6 +139,9 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
|
||||
case .XCHACHA20POLY1305:
|
||||
impl_ := impl != nil ? impl.(chacha20.Implementation) : chacha20.DEFAULT_IMPLEMENTATION
|
||||
chacha20poly1305.init_xchacha(&ctx._impl.(chacha20poly1305.Context), key, impl_)
|
||||
case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256:
|
||||
impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION
|
||||
aegis.init(&ctx._impl.(aegis.Context), key, impl_)
|
||||
case .Invalid:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
case:
|
||||
@@ -127,11 +156,17 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
if len(tag) != TAG_SIZES[ctx._algo] {
|
||||
panic("crypto/aead: invalid tag size")
|
||||
}
|
||||
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
aes.seal_gcm(&impl, dst, tag, iv, aad, plaintext)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case aegis.Context:
|
||||
aegis.seal(&impl, dst, tag, iv, aad, plaintext)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
@@ -145,11 +180,17 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
if len(tag) != TAG_SIZES[ctx._algo] {
|
||||
panic("crypto/aead: invalid tag size")
|
||||
}
|
||||
|
||||
switch &impl in ctx._impl {
|
||||
case aes.Context_GCM:
|
||||
return aes.open_gcm(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case chacha20poly1305.Context:
|
||||
return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case aegis.Context:
|
||||
return aegis.open(&impl, dst, iv, aad, ciphertext, tag)
|
||||
case:
|
||||
panic("crypto/aead: uninitialized algorithm")
|
||||
}
|
||||
@@ -163,6 +204,8 @@ reset :: proc(ctx: ^Context) {
|
||||
aes.reset_gcm(&impl)
|
||||
case chacha20poly1305.Context:
|
||||
chacha20poly1305.reset(&impl)
|
||||
case aegis.Context:
|
||||
aegis.reset(&impl)
|
||||
case:
|
||||
// Calling reset repeatedly is fine.
|
||||
}
|
||||
|
||||
213
core/crypto/aegis/aegis.odin
Normal file
213
core/crypto/aegis/aegis.odin
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
package aegis implements the AEGIS-128L and AEGIS-256 Authenticated
|
||||
Encryption with Additional Data algorithms.
|
||||
|
||||
See:
|
||||
- [[ https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-12.txt ]]
|
||||
*/
|
||||
package aegis
|
||||
|
||||
import "core:bytes"
|
||||
import "core:crypto"
|
||||
import "core:crypto/aes"
|
||||
import "core:mem"
|
||||
|
||||
// KEY_SIZE_128L is the AEGIS-128L key size in bytes.
|
||||
KEY_SIZE_128L :: 16
|
||||
// KEY_SIZE_256 is the AEGIS-256 key size in bytes.
|
||||
KEY_SIZE_256 :: 32
|
||||
// IV_SIZE_128L is the AEGIS-128L IV size in bytes.
|
||||
IV_SIZE_128L :: 16
|
||||
// IV_SIZE_256 is the AEGIS-256 IV size in bytes.
|
||||
IV_SIZE_256 :: 32
|
||||
// TAG_SIZE_128 is the AEGIS-128L or AEGIS-256 128-bit tag size in bytes.
|
||||
TAG_SIZE_128 :: 16
|
||||
// TAG_SIZE_256 is the AEGIS-128L or AEGIS-256 256-bit tag size in bytes.
|
||||
TAG_SIZE_256 :: 32
|
||||
|
||||
@(private)
|
||||
_RATE_128L :: 32
|
||||
@(private)
|
||||
_RATE_256 :: 16
|
||||
@(private)
|
||||
_RATE_MAX :: _RATE_128L
|
||||
|
||||
@(private, rodata)
|
||||
_C0 := [16]byte{
|
||||
0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d,
|
||||
0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62,
|
||||
}
|
||||
|
||||
@(private, rodata)
|
||||
_C1 := [16]byte {
|
||||
0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1,
|
||||
0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd,
|
||||
}
|
||||
|
||||
// Context is a keyed AEGIS-128L or AEGIS-256 instance.
|
||||
Context :: struct {
|
||||
_key: [KEY_SIZE_256]byte,
|
||||
_key_len: int,
|
||||
_impl: aes.Implementation,
|
||||
_is_initialized: bool,
|
||||
}
|
||||
|
||||
@(private)
|
||||
_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) {
|
||||
switch len(tag) {
|
||||
case TAG_SIZE_128, TAG_SIZE_256:
|
||||
case:
|
||||
panic("crypto/aegis: invalid tag size")
|
||||
}
|
||||
|
||||
iv_ok: bool
|
||||
switch ctx._key_len {
|
||||
case KEY_SIZE_128L:
|
||||
iv_ok = len(iv) == IV_SIZE_128L
|
||||
case KEY_SIZE_256:
|
||||
iv_ok = len(iv) == IV_SIZE_256
|
||||
}
|
||||
ensure(iv_ok,"crypto/aegis: invalid IV size")
|
||||
|
||||
#assert(size_of(int) == 8 || size_of(int) <= 4)
|
||||
// As A_MAX and P_MAX are both defined to be 2^61 - 1 bytes, and
|
||||
// the maximum length of a slice is bound by `size_of(int)`, where
|
||||
// `int` is register sized, there is no need to check AAD/text
|
||||
// lengths.
|
||||
}
|
||||
|
||||
// init initializes a Context with the provided key, for AEGIS-128L or AEGIS-256.
|
||||
init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) {
|
||||
switch len(key) {
|
||||
case KEY_SIZE_128L, KEY_SIZE_256:
|
||||
case:
|
||||
panic("crypto/aegis: invalid key size")
|
||||
}
|
||||
|
||||
copy(ctx._key[:], key)
|
||||
ctx._key_len = len(key)
|
||||
ctx._impl = impl
|
||||
if ctx._impl == .Hardware && !is_hardware_accelerated() {
|
||||
ctx._impl = .Portable
|
||||
}
|
||||
ctx._is_initialized = true
|
||||
}
|
||||
|
||||
// seal encrypts the plaintext and authenticates the aad and ciphertext,
|
||||
// with the provided Context and iv, stores the output in dst and tag.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
_validate_common_slice_sizes(ctx, tag, iv, aad, plaintext)
|
||||
ensure(len(dst) == len(plaintext), "crypto/aegis: invalid destination ciphertext size")
|
||||
ensure(!bytes.alias_inexactly(dst, plaintext), "crypto/aegis: dst and plaintext alias inexactly")
|
||||
|
||||
switch ctx._impl {
|
||||
case .Hardware:
|
||||
st: State_HW
|
||||
defer reset_state_hw(&st)
|
||||
|
||||
init_hw(ctx, &st, iv)
|
||||
|
||||
aad_len, pt_len := len(aad), len(plaintext)
|
||||
if aad_len > 0 {
|
||||
absorb_hw(&st, aad)
|
||||
}
|
||||
|
||||
if pt_len > 0 {
|
||||
enc_hw(&st, dst, plaintext)
|
||||
}
|
||||
|
||||
finalize_hw(&st, tag, aad_len, pt_len)
|
||||
case .Portable:
|
||||
st: State_SW
|
||||
defer reset_state_sw(&st)
|
||||
|
||||
init_sw(ctx, &st, iv)
|
||||
|
||||
aad_len, pt_len := len(aad), len(plaintext)
|
||||
if aad_len > 0 {
|
||||
absorb_sw(&st, aad)
|
||||
}
|
||||
|
||||
if pt_len > 0 {
|
||||
enc_sw(&st, dst, plaintext)
|
||||
}
|
||||
|
||||
finalize_sw(&st, tag, aad_len, pt_len)
|
||||
case:
|
||||
panic("core/crypto/aegis: not implemented")
|
||||
}
|
||||
}
|
||||
|
||||
// open authenticates the aad and ciphertext, and decrypts the ciphertext,
|
||||
// with the provided Context, iv, and tag, and stores the output in dst,
|
||||
// returning true iff the authentication was successful. If authentication
|
||||
// fails, the destination buffer will be zeroed.
|
||||
//
|
||||
// dst and plaintext MUST alias exactly or not at all.
|
||||
@(require_results)
|
||||
open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool {
|
||||
ensure(ctx._is_initialized)
|
||||
|
||||
_validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext)
|
||||
ensure(len(dst) == len(ciphertext), "crypto/aegis: invalid destination plaintext size")
|
||||
ensure(!bytes.alias_inexactly(dst, ciphertext), "crypto/aegis: dst and ciphertext alias inexactly")
|
||||
|
||||
tmp: [TAG_SIZE_256]byte
|
||||
derived_tag := tmp[:len(tag)]
|
||||
aad_len, ct_len := len(aad), len(ciphertext)
|
||||
|
||||
switch ctx._impl {
|
||||
case .Hardware:
|
||||
st: State_HW
|
||||
defer reset_state_hw(&st)
|
||||
|
||||
init_hw(ctx, &st, iv)
|
||||
|
||||
if aad_len > 0 {
|
||||
absorb_hw(&st, aad)
|
||||
}
|
||||
|
||||
if ct_len > 0 {
|
||||
dec_hw(&st, dst, ciphertext)
|
||||
}
|
||||
|
||||
finalize_hw(&st, derived_tag, aad_len, ct_len)
|
||||
case .Portable:
|
||||
st: State_SW
|
||||
defer reset_state_sw(&st)
|
||||
|
||||
init_sw(ctx, &st, iv)
|
||||
|
||||
if aad_len > 0 {
|
||||
absorb_sw(&st, aad)
|
||||
}
|
||||
|
||||
if ct_len > 0 {
|
||||
dec_sw(&st, dst, ciphertext)
|
||||
}
|
||||
|
||||
finalize_sw(&st, derived_tag, aad_len, ct_len)
|
||||
case:
|
||||
panic("core/crypto/aegis: not implemented")
|
||||
}
|
||||
|
||||
if crypto.compare_constant_time(tag, derived_tag) != 1 {
|
||||
mem.zero_explicit(raw_data(derived_tag), len(derived_tag))
|
||||
mem.zero_explicit(raw_data(dst), ct_len)
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// reset sanitizes the Context. The Context must be
|
||||
// re-initialized to be used again.
|
||||
reset :: proc "contextless" (ctx: ^Context) {
|
||||
mem.zero_explicit(&ctx._key, len(ctx._key))
|
||||
ctx._key_len = 0
|
||||
ctx._is_initialized = false
|
||||
}
|
||||
452
core/crypto/aegis/aegis_impl_ct64.odin
Normal file
452
core/crypto/aegis/aegis_impl_ct64.odin
Normal file
@@ -0,0 +1,452 @@
|
||||
package aegis
|
||||
|
||||
import aes "core:crypto/_aes/ct64"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
|
||||
// This uses the bitlsiced 64-bit general purpose register SWAR AES
|
||||
// round function. The intermediate state is stored in interleaved
|
||||
// but NOT orthogonalized form, as leaving things in the orthgonalized
|
||||
// format would overly complicate the update implementation.
|
||||
//
|
||||
// Note/perf: Per Frank Denis and a review of the specification, it is
|
||||
// possible to gain slightly more performance by leaving the state in
|
||||
// orthogonalized form while doing initialization, finalization, and
|
||||
// absorbing AAD. This implementation opts out of those optimizations
|
||||
// for the sake of simplicity.
|
||||
//
|
||||
// The update function leverages the paralleism (4xblocks) at once.
|
||||
|
||||
@(private)
|
||||
State_SW :: struct {
|
||||
s0_0, s0_1: u64,
|
||||
s1_0, s1_1: u64,
|
||||
s2_0, s2_1: u64,
|
||||
s3_0, s3_1: u64,
|
||||
s4_0, s4_1: u64,
|
||||
s5_0, s5_1: u64,
|
||||
s6_0, s6_1: u64,
|
||||
s7_0, s7_1: u64,
|
||||
q_k, q_b: [8]u64,
|
||||
rate: int,
|
||||
}
|
||||
|
||||
@(private)
|
||||
init_sw :: proc "contextless" (ctx: ^Context, st: ^State_SW, iv: []byte) {
|
||||
switch ctx._key_len {
|
||||
case KEY_SIZE_128L:
|
||||
key_0, key_1 := aes.load_interleaved(ctx._key[:16])
|
||||
iv_0, iv_1 := aes.load_interleaved(iv)
|
||||
|
||||
st.s0_0, st.s0_1 = aes.xor_interleaved(key_0, key_1, iv_0, iv_1)
|
||||
st.s1_0, st.s1_1 = aes.load_interleaved(_C1[:])
|
||||
st.s2_0, st.s2_1 = aes.load_interleaved(_C0[:])
|
||||
st.s3_0, st.s3_1 = st.s1_0, st.s1_1
|
||||
st.s4_0, st.s4_1 = st.s0_0, st.s0_1
|
||||
st.s5_0, st.s5_1 = aes.xor_interleaved(key_0, key_1, st.s2_0, st.s2_1)
|
||||
st.s6_0, st.s6_1 = aes.xor_interleaved(key_0, key_1, st.s1_0, st.s1_1)
|
||||
st.s7_0, st.s7_1 = st.s5_0, st.s5_1
|
||||
st.rate = _RATE_128L
|
||||
|
||||
for _ in 0 ..< 10 {
|
||||
update_sw_128l(st, iv_0, iv_1, key_0, key_1)
|
||||
}
|
||||
case KEY_SIZE_256:
|
||||
k0_0, k0_1 := aes.load_interleaved(ctx._key[:16])
|
||||
k1_0, k1_1 := aes.load_interleaved(ctx._key[16:])
|
||||
n0_0, n0_1 := aes.load_interleaved(iv[:16])
|
||||
n1_0, n1_1 := aes.load_interleaved(iv[16:])
|
||||
|
||||
st.s0_0, st.s0_1 = aes.xor_interleaved(k0_0, k0_1, n0_0, n0_1)
|
||||
st.s1_0, st.s1_1 = aes.xor_interleaved(k1_0, k1_1, n1_0, n1_1)
|
||||
st.s2_0, st.s2_1 = aes.load_interleaved(_C1[:])
|
||||
st.s3_0, st.s3_1 = aes.load_interleaved(_C0[:])
|
||||
st.s4_0, st.s4_1 = aes.xor_interleaved(k0_0, k0_1, st.s3_0, st.s3_1)
|
||||
st.s5_0, st.s5_1 = aes.xor_interleaved(k1_0, k1_1, st.s2_0, st.s2_1)
|
||||
st.rate = _RATE_256
|
||||
|
||||
u0_0, u0_1, u1_0, u1_1 := st.s0_0, st.s0_1, st.s1_0, st.s1_1
|
||||
for _ in 0 ..< 4 {
|
||||
update_sw_256(st, k0_0, k0_1)
|
||||
update_sw_256(st, k1_0, k1_1)
|
||||
update_sw_256(st, u0_0, u0_1)
|
||||
update_sw_256(st, u1_0, u1_1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
update_sw_128l :: proc "contextless" (st: ^State_SW, m0_0, m0_1, m1_0, m1_1: u64) {
|
||||
st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m0_0, m0_1)
|
||||
st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
|
||||
st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
|
||||
st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s7_0, st.s7_1
|
||||
st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
|
||||
st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
|
||||
st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
|
||||
st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
|
||||
st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
|
||||
s3_0, s3_1 := st.q_b[3], st.q_b[7]
|
||||
|
||||
st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s4_0, st.s4_1, m1_0, m1_1)
|
||||
st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
|
||||
st.q_k[2], st.q_k[6] = st.s6_0, st.s6_1
|
||||
st.q_k[3], st.q_k[7] = st.s7_0, st.s7_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
|
||||
st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
|
||||
st.q_b[2], st.q_b[6] = st.s5_0, st.s5_1
|
||||
st.q_b[3], st.q_b[7] = st.s6_0, st.s6_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s3_0, st.s3_1 = s3_0, s3_1
|
||||
st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
|
||||
st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
|
||||
st.s6_0, st.s6_1 = st.q_b[2], st.q_b[6]
|
||||
st.s7_0, st.s7_1 = st.q_b[3], st.q_b[7]
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
update_sw_256 :: proc "contextless" (st: ^State_SW, m_0, m_1: u64) {
|
||||
st.q_k[0], st.q_k[4] = aes.xor_interleaved(st.s0_0, st.s0_1, m_0, m_1)
|
||||
st.q_k[1], st.q_k[5] = st.s1_0, st.s1_1
|
||||
st.q_k[2], st.q_k[6] = st.s2_0, st.s2_1
|
||||
st.q_k[3], st.q_k[7] = st.s3_0, st.s3_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s5_0, st.s5_1
|
||||
st.q_b[1], st.q_b[5] = st.s0_0, st.s0_1
|
||||
st.q_b[2], st.q_b[6] = st.s1_0, st.s1_1
|
||||
st.q_b[3], st.q_b[7] = st.s2_0, st.s2_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s0_0, st.s0_1 = st.q_b[0], st.q_b[4]
|
||||
st.s1_0, st.s1_1 = st.q_b[1], st.q_b[5]
|
||||
st.s2_0, st.s2_1 = st.q_b[2], st.q_b[6]
|
||||
s3_0, s3_1 := st.q_b[3], st.q_b[7]
|
||||
|
||||
st.q_k[0], st.q_k[4] = st.s4_0, st.s4_1
|
||||
st.q_k[1], st.q_k[5] = st.s5_0, st.s5_1
|
||||
aes.orthogonalize(&st.q_k)
|
||||
|
||||
st.q_b[0], st.q_b[4] = st.s3_0, st.s3_1
|
||||
st.q_b[1], st.q_b[5] = st.s4_0, st.s4_1
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
aes.sub_bytes(&st.q_b)
|
||||
aes.shift_rows(&st.q_b)
|
||||
aes.mix_columns(&st.q_b)
|
||||
aes.add_round_key(&st.q_b, st.q_k[:])
|
||||
aes.orthogonalize(&st.q_b)
|
||||
|
||||
st.s3_0, st.s3_1 = s3_0, s3_1
|
||||
st.s4_0, st.s4_1 = st.q_b[0], st.q_b[4]
|
||||
st.s5_0, st.s5_1 = st.q_b[1], st.q_b[5]
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
absorb_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) #no_bounds_check {
|
||||
t0_0, t0_1 := aes.load_interleaved(ai[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(ai[16:])
|
||||
update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
absorb_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ai: []byte) {
|
||||
m_0, m_1 := aes.load_interleaved(ai)
|
||||
update_sw_256(st, m_0, m_1)
|
||||
}
|
||||
|
||||
@(private)
|
||||
absorb_sw :: proc "contextless" (st: ^State_SW, aad: []byte) #no_bounds_check {
|
||||
ai, l := aad, len(aad)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
absorb_sw_128l(st, ai)
|
||||
ai = ai[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
absorb_sw_256(st, ai)
|
||||
|
||||
ai = ai[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // AAD is not confidential.
|
||||
copy(tmp[:], ai)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
absorb_sw_128l(st, tmp[:])
|
||||
case _RATE_256:
|
||||
absorb_sw_256(st, tmp[:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", require_results)
|
||||
z_sw_128l :: proc "contextless" (st: ^State_SW) -> (u64, u64, u64, u64) {
|
||||
z0_0, z0_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
|
||||
z0_0, z0_1 = aes.xor_interleaved(st.s1_0, st.s1_1, z0_0, z0_1)
|
||||
z0_0, z0_1 = aes.xor_interleaved(st.s6_0, st.s6_1, z0_0, z0_1)
|
||||
|
||||
z1_0, z1_1 := aes.and_interleaved(st.s6_0, st.s6_1, st.s7_0, st.s7_1)
|
||||
z1_0, z1_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z1_0, z1_1)
|
||||
z1_0, z1_1 = aes.xor_interleaved(st.s2_0, st.s2_1, z1_0, z1_1)
|
||||
|
||||
return z0_0, z0_1, z1_0, z1_1
|
||||
}
|
||||
|
||||
@(private = "file", require_results)
|
||||
z_sw_256 :: proc "contextless" (st: ^State_SW) -> (u64, u64) {
|
||||
z_0, z_1 := aes.and_interleaved(st.s2_0, st.s2_1, st.s3_0, st.s3_1)
|
||||
z_0, z_1 = aes.xor_interleaved(st.s5_0, st.s5_1, z_0, z_1)
|
||||
z_0, z_1 = aes.xor_interleaved(st.s4_0, st.s4_1, z_0, z_1)
|
||||
return aes.xor_interleaved(st.s1_0, st.s1_1, z_0, z_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
enc_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
|
||||
z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
|
||||
|
||||
t0_0, t0_1 := aes.load_interleaved(xi[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(xi[16:])
|
||||
update_sw_128l(st, t0_0, t0_1, t1_0, t1_1)
|
||||
|
||||
out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
|
||||
out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
|
||||
aes.store_interleaved(ci[:16], out0_0, out0_1)
|
||||
aes.store_interleaved(ci[16:], out1_0, out1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
enc_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, ci, xi: []byte) #no_bounds_check {
|
||||
z_0, z_1 := z_sw_256(st)
|
||||
|
||||
xi_0, xi_1 := aes.load_interleaved(xi)
|
||||
update_sw_256(st, xi_0, xi_1)
|
||||
|
||||
ci_0, ci_1 := aes.xor_interleaved(xi_0, xi_1, z_0, z_1)
|
||||
aes.store_interleaved(ci, ci_0, ci_1)
|
||||
}
|
||||
|
||||
@(private)
|
||||
enc_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
|
||||
ci, xi, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
enc_sw_128l(st, ci, xi)
|
||||
ci = ci[_RATE_128L:]
|
||||
xi = xi[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
enc_sw_256(st, ci, xi)
|
||||
ci = ci[_RATE_256:]
|
||||
xi = xi[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
|
||||
copy(tmp[:], xi)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
enc_sw_128l(st, tmp[:], tmp[:])
|
||||
case _RATE_256:
|
||||
enc_sw_256(st, tmp[:], tmp[:])
|
||||
}
|
||||
copy(ci, tmp[:l])
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_sw_128l :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
|
||||
z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
|
||||
|
||||
t0_0, t0_1 := aes.load_interleaved(ci[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(ci[16:])
|
||||
out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
|
||||
out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
|
||||
|
||||
update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
|
||||
aes.store_interleaved(xi[:16], out0_0, out0_1)
|
||||
aes.store_interleaved(xi[16:], out1_0, out1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_sw_256 :: #force_inline proc "contextless" (st: ^State_SW, xi, ci: []byte) #no_bounds_check {
|
||||
z_0, z_1 := z_sw_256(st)
|
||||
|
||||
ci_0, ci_1 := aes.load_interleaved(ci)
|
||||
xi_0, xi_1 := aes.xor_interleaved(ci_0, ci_1, z_0, z_1)
|
||||
|
||||
update_sw_256(st, xi_0, xi_1)
|
||||
aes.store_interleaved(xi, xi_0, xi_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_partial_sw_128l :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_128L]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z0_0, z0_1, z1_0, z1_1 := z_sw_128l(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
t0_0, t0_1 := aes.load_interleaved(tmp[:16])
|
||||
t1_0, t1_1 := aes.load_interleaved(tmp[16:])
|
||||
out0_0, out0_1 := aes.xor_interleaved(t0_0, t0_1, z0_0, z0_1)
|
||||
out1_0, out1_1 := aes.xor_interleaved(t1_0, t1_1, z1_0, z1_1)
|
||||
|
||||
aes.store_interleaved(tmp[:16], out0_0, out0_1)
|
||||
aes.store_interleaved(tmp[16:], out1_0, out1_1)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_128L; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
out0_0, out0_1 = aes.load_interleaved(tmp[:16])
|
||||
out1_0, out1_1 = aes.load_interleaved(tmp[16:])
|
||||
update_sw_128l(st, out0_0, out0_1, out1_0, out1_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
dec_partial_sw_256 :: proc "contextless" (st: ^State_SW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_256]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z_0, z_1 := z_sw_256(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
cn_0, cn_1 := aes.load_interleaved(tmp[:])
|
||||
xn_0, xn_1 := aes.xor_interleaved(cn_0, cn_1, z_0, z_1)
|
||||
|
||||
aes.store_interleaved(tmp[:], xn_0, xn_1)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_256; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
xn_0, xn_1 = aes.load_interleaved(tmp[:])
|
||||
update_sw_256(st, xn_0, xn_1)
|
||||
}
|
||||
|
||||
@(private)
|
||||
dec_sw :: proc "contextless" (st: ^State_SW, dst, src: []byte) #no_bounds_check {
|
||||
xi, ci, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
dec_sw_128l(st, xi, ci)
|
||||
xi = xi[_RATE_128L:]
|
||||
ci = ci[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
dec_sw_256(st, xi, ci)
|
||||
xi = xi[_RATE_256:]
|
||||
ci = ci[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Process the remainder.
|
||||
if l > 0 {
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
dec_partial_sw_128l(st, xi, ci)
|
||||
case _RATE_256:
|
||||
dec_partial_sw_256(st, xi, ci)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
finalize_sw :: proc "contextless" (st: ^State_SW, tag: []byte, ad_len, msg_len: int) {
|
||||
tmp: [16]byte
|
||||
endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
|
||||
endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
|
||||
|
||||
t_0, t_1 := aes.load_interleaved(tmp[:])
|
||||
|
||||
t0_0, t0_1, t1_0, t1_1: u64 = ---, ---, ---, ---
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
t_0, t_1 = aes.xor_interleaved(st.s2_0, st.s2_1, t_0, t_1)
|
||||
for _ in 0 ..< 7 {
|
||||
update_sw_128l(st, t_0, t_1, t_0, t_1)
|
||||
}
|
||||
|
||||
t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s3_0, st.s3_1)
|
||||
|
||||
t1_0, t1_1 = aes.xor_interleaved(st.s4_0, st.s4_1, st.s5_0, st.s5_1)
|
||||
t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s6_0, st.s6_1)
|
||||
if len(tag) == TAG_SIZE_256 {
|
||||
t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s7_0, st.s7_1)
|
||||
}
|
||||
case _RATE_256:
|
||||
t_0, t_1 = aes.xor_interleaved(st.s3_0, st.s3_1, t_0, t_1)
|
||||
for _ in 0 ..< 7 {
|
||||
update_sw_256(st, t_0, t_1)
|
||||
}
|
||||
|
||||
t0_0, t0_1 = aes.xor_interleaved(st.s0_0, st.s0_1, st.s1_0, st.s1_1)
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, st.s2_0, st.s2_1)
|
||||
|
||||
t1_0, t1_1 = aes.xor_interleaved(st.s3_0, st.s3_1, st.s4_0, st.s4_1)
|
||||
t1_0, t1_1 = aes.xor_interleaved(t1_0, t1_1, st.s5_0, st.s5_1)
|
||||
}
|
||||
switch len(tag) {
|
||||
case TAG_SIZE_128:
|
||||
t0_0, t0_1 = aes.xor_interleaved(t0_0, t0_1, t1_0, t1_1)
|
||||
aes.store_interleaved(tag, t0_0, t0_1)
|
||||
case TAG_SIZE_256:
|
||||
aes.store_interleaved(tag[:16], t0_0, t0_1)
|
||||
aes.store_interleaved(tag[16:], t1_0, t1_1)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
reset_state_sw :: proc "contextless" (st: ^State_SW) {
|
||||
mem.zero_explicit(st, size_of(st^))
|
||||
}
|
||||
44
core/crypto/aegis/aegis_impl_hw_gen.odin
Normal file
44
core/crypto/aegis/aegis_impl_hw_gen.odin
Normal file
@@ -0,0 +1,44 @@
|
||||
#+build !amd64
|
||||
package aegis
|
||||
|
||||
@(private = "file")
|
||||
ERR_HW_NOT_SUPPORTED :: "crypto/aegis: hardware implementation unsupported"
|
||||
|
||||
@(private)
|
||||
State_HW :: struct {}
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated AEGIS
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
@(private)
|
||||
init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
|
||||
@(private)
|
||||
reset_state_hw :: proc "contextless" (st: ^State_HW) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
389
core/crypto/aegis/aegis_impl_hw_intel.odin
Normal file
389
core/crypto/aegis/aegis_impl_hw_intel.odin
Normal file
@@ -0,0 +1,389 @@
|
||||
#+build amd64
|
||||
package aegis
|
||||
|
||||
import "base:intrinsics"
|
||||
import "core:crypto/aes"
|
||||
import "core:encoding/endian"
|
||||
import "core:mem"
|
||||
import "core:simd/x86"
|
||||
|
||||
@(private)
|
||||
State_HW :: struct {
|
||||
s0: x86.__m128i,
|
||||
s1: x86.__m128i,
|
||||
s2: x86.__m128i,
|
||||
s3: x86.__m128i,
|
||||
s4: x86.__m128i,
|
||||
s5: x86.__m128i,
|
||||
s6: x86.__m128i,
|
||||
s7: x86.__m128i,
|
||||
rate: int,
|
||||
}
|
||||
|
||||
// is_hardware_accelerated returns true iff hardware accelerated AEGIS
|
||||
// is supported.
|
||||
is_hardware_accelerated :: proc "contextless" () -> bool {
|
||||
return aes.is_hardware_accelerated()
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
init_hw :: proc "contextless" (ctx: ^Context, st: ^State_HW, iv: []byte) {
|
||||
switch ctx._key_len {
|
||||
case KEY_SIZE_128L:
|
||||
key := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
|
||||
iv := intrinsics.unaligned_load((^x86.__m128i)(raw_data(iv)))
|
||||
|
||||
st.s0 = x86._mm_xor_si128(key, iv)
|
||||
st.s1 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
|
||||
st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
|
||||
st.s3 = st.s1
|
||||
st.s4 = st.s0
|
||||
st.s5 = x86._mm_xor_si128(key, st.s2) // key ^ C0
|
||||
st.s6 = x86._mm_xor_si128(key, st.s1) // key ^ C1
|
||||
st.s7 = st.s5
|
||||
st.rate = _RATE_128L
|
||||
|
||||
for _ in 0 ..< 10 {
|
||||
update_hw_128l(st, iv, key)
|
||||
}
|
||||
case KEY_SIZE_256:
|
||||
k0 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[0]))
|
||||
k1 := intrinsics.unaligned_load((^x86.__m128i)(&ctx._key[16]))
|
||||
n0 := intrinsics.unaligned_load((^x86.__m128i)(&iv[0]))
|
||||
n1 := intrinsics.unaligned_load((^x86.__m128i)(&iv[16]))
|
||||
|
||||
st.s0 = x86._mm_xor_si128(k0, n0)
|
||||
st.s1 = x86._mm_xor_si128(k1, n1)
|
||||
st.s2 = intrinsics.unaligned_load((^x86.__m128i)(&_C1[0]))
|
||||
st.s3 = intrinsics.unaligned_load((^x86.__m128i)(&_C0[0]))
|
||||
st.s4 = x86._mm_xor_si128(k0, st.s3) // k0 ^ C0
|
||||
st.s5 = x86._mm_xor_si128(k1, st.s2) // k1 ^ C1
|
||||
st.rate = _RATE_256
|
||||
|
||||
u0, u1 := st.s0, st.s1
|
||||
for _ in 0 ..< 4 {
|
||||
update_hw_256(st, k0)
|
||||
update_hw_256(st, k1)
|
||||
update_hw_256(st, u0)
|
||||
update_hw_256(st, u1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
update_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, m0, m1: x86.__m128i) {
|
||||
s0_ := x86._mm_aesenc_si128(st.s7, x86._mm_xor_si128(st.s0, m0))
|
||||
s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
|
||||
s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
|
||||
s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
|
||||
s4_ := x86._mm_aesenc_si128(st.s3, x86._mm_xor_si128(st.s4, m1))
|
||||
s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
|
||||
s6_ := x86._mm_aesenc_si128(st.s5, st.s6)
|
||||
s7_ := x86._mm_aesenc_si128(st.s6, st.s7)
|
||||
st.s0, st.s1, st.s2, st.s3, st.s4, st.s5, st.s6, st.s7 = s0_, s1_, s2_, s3_, s4_, s5_, s6_, s7_
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
update_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, m: x86.__m128i) {
|
||||
s0_ := x86._mm_aesenc_si128(st.s5, x86._mm_xor_si128(st.s0, m))
|
||||
s1_ := x86._mm_aesenc_si128(st.s0, st.s1)
|
||||
s2_ := x86._mm_aesenc_si128(st.s1, st.s2)
|
||||
s3_ := x86._mm_aesenc_si128(st.s2, st.s3)
|
||||
s4_ := x86._mm_aesenc_si128(st.s3, st.s4)
|
||||
s5_ := x86._mm_aesenc_si128(st.s4, st.s5)
|
||||
st.s0, st.s1, st.s2, st.s3, st.s4, st.s5 = s0_, s1_, s2_, s3_, s4_, s5_
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
absorb_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&ai[16]))
|
||||
update_hw_128l(st, t0, t1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
absorb_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ai: []byte) {
|
||||
m := intrinsics.unaligned_load((^x86.__m128i)(&ai[0]))
|
||||
update_hw_256(st, m)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
absorb_hw :: proc "contextless" (st: ^State_HW, aad: []byte) #no_bounds_check {
|
||||
ai, l := aad, len(aad)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
absorb_hw_128l(st, ai)
|
||||
ai = ai[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
absorb_hw_256(st, ai)
|
||||
|
||||
ai = ai[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // AAD is not confidential.
|
||||
copy(tmp[:], ai)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
absorb_hw_128l(st, tmp[:])
|
||||
case _RATE_256:
|
||||
absorb_hw_256(st, tmp[:])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2", require_results)
|
||||
z_hw_128l :: #force_inline proc "contextless" (st: ^State_HW) -> (x86.__m128i, x86.__m128i) {
|
||||
z0 := x86._mm_xor_si128(
|
||||
st.s6,
|
||||
x86._mm_xor_si128(
|
||||
st.s1,
|
||||
x86._mm_and_si128(st.s2, st.s3),
|
||||
),
|
||||
)
|
||||
z1 := x86._mm_xor_si128(
|
||||
st.s2,
|
||||
x86._mm_xor_si128(
|
||||
st.s5,
|
||||
x86._mm_and_si128(st.s6, st.s7),
|
||||
),
|
||||
)
|
||||
return z0, z1
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2", require_results)
|
||||
z_hw_256 :: #force_inline proc "contextless" (st: ^State_HW) -> x86.__m128i {
|
||||
return x86._mm_xor_si128(
|
||||
st.s1,
|
||||
x86._mm_xor_si128(
|
||||
st.s4,
|
||||
x86._mm_xor_si128(
|
||||
st.s5,
|
||||
x86._mm_and_si128(st.s2, st.s3),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
enc_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
|
||||
z0, z1 := z_hw_128l(st)
|
||||
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&xi[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&xi[16]))
|
||||
update_hw_128l(st, t0, t1)
|
||||
|
||||
out0 := x86._mm_xor_si128(t0, z0)
|
||||
out1 := x86._mm_xor_si128(t1, z1)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ci[0]), out0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ci[16]), out1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
enc_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, ci, xi: []byte) #no_bounds_check {
|
||||
z := z_hw_256(st)
|
||||
|
||||
xi_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(xi)))
|
||||
update_hw_256(st, xi_)
|
||||
|
||||
ci_ := x86._mm_xor_si128(xi_, z)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(ci)), ci_)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
enc_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
|
||||
ci, xi, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
enc_hw_128l(st, ci, xi)
|
||||
ci = ci[_RATE_128L:]
|
||||
xi = xi[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
enc_hw_256(st, ci, xi)
|
||||
ci = ci[_RATE_256:]
|
||||
xi = xi[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Pad out the remainder with `0`s till it is rate sized.
|
||||
if l > 0 {
|
||||
tmp: [_RATE_MAX]byte // Ciphertext is not confidential.
|
||||
copy(tmp[:], xi)
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
enc_hw_128l(st, tmp[:], tmp[:])
|
||||
case _RATE_256:
|
||||
enc_hw_256(st, tmp[:], tmp[:])
|
||||
}
|
||||
copy(ci, tmp[:l])
|
||||
}
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
|
||||
z0, z1 := z_hw_128l(st)
|
||||
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&ci[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&ci[16]))
|
||||
out0 := x86._mm_xor_si128(t0, z0)
|
||||
out1 := x86._mm_xor_si128(t1, z1)
|
||||
|
||||
update_hw_128l(st, out0, out1)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&xi[0]), out0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&xi[16]), out1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xi, ci: []byte) #no_bounds_check {
|
||||
z := z_hw_256(st)
|
||||
|
||||
ci_ := intrinsics.unaligned_load((^x86.__m128i)(raw_data(ci)))
|
||||
xi_ := x86._mm_xor_si128(ci_, z)
|
||||
|
||||
update_hw_256(st, xi_)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(raw_data(xi)), xi_)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_partial_hw_128l :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_128L]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z0, z1 := z_hw_128l(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
t0 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
t1 := intrinsics.unaligned_load((^x86.__m128i)(&tmp[16]))
|
||||
out0 := x86._mm_xor_si128(t0, z0)
|
||||
out1 := x86._mm_xor_si128(t1, z1)
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), out0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tmp[16]), out1)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_128L; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
out0 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0])) // v0
|
||||
out1 = intrinsics.unaligned_load((^x86.__m128i)(&tmp[16])) // v1
|
||||
update_hw_128l(st, out0, out1)
|
||||
}
|
||||
|
||||
@(private = "file", enable_target_feature = "sse2,aes")
|
||||
dec_partial_hw_256 :: #force_inline proc "contextless" (st: ^State_HW, xn, cn: []byte) #no_bounds_check {
|
||||
tmp: [_RATE_256]byte
|
||||
defer mem.zero_explicit(&tmp, size_of(tmp))
|
||||
|
||||
z := z_hw_256(st)
|
||||
copy(tmp[:], cn)
|
||||
|
||||
cn_ := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
xn_ := x86._mm_xor_si128(cn_, z)
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tmp[0]), xn_)
|
||||
copy(xn, tmp[:])
|
||||
|
||||
for off := len(xn); off < _RATE_256; off += 1 {
|
||||
tmp[off] = 0
|
||||
}
|
||||
xn_ = intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
update_hw_256(st, xn_)
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
dec_hw :: proc "contextless" (st: ^State_HW, dst, src: []byte) #no_bounds_check {
|
||||
xi, ci, l := dst, src, len(src)
|
||||
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
for l >= _RATE_128L {
|
||||
dec_hw_128l(st, xi, ci)
|
||||
xi = xi[_RATE_128L:]
|
||||
ci = ci[_RATE_128L:]
|
||||
l -= _RATE_128L
|
||||
}
|
||||
case _RATE_256:
|
||||
for l >= _RATE_256 {
|
||||
dec_hw_256(st, xi, ci)
|
||||
xi = xi[_RATE_256:]
|
||||
ci = ci[_RATE_256:]
|
||||
l -= _RATE_256
|
||||
}
|
||||
}
|
||||
|
||||
// Process the remainder.
|
||||
if l > 0 {
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
dec_partial_hw_128l(st, xi, ci)
|
||||
case _RATE_256:
|
||||
dec_partial_hw_256(st, xi, ci)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(private, enable_target_feature = "sse2,aes")
|
||||
finalize_hw :: proc "contextless" (st: ^State_HW, tag: []byte, ad_len, msg_len: int) {
|
||||
tmp: [16]byte
|
||||
endian.unchecked_put_u64le(tmp[0:], u64(ad_len) * 8)
|
||||
endian.unchecked_put_u64le(tmp[8:], u64(msg_len) * 8)
|
||||
|
||||
t := intrinsics.unaligned_load((^x86.__m128i)(&tmp[0]))
|
||||
|
||||
t0, t1: x86.__m128i = ---, ---
|
||||
switch st.rate {
|
||||
case _RATE_128L:
|
||||
t = x86._mm_xor_si128(st.s2, t)
|
||||
for _ in 0 ..< 7 {
|
||||
update_hw_128l(st, t, t)
|
||||
}
|
||||
|
||||
t0 = x86._mm_xor_si128(st.s0, st.s1)
|
||||
t0 = x86._mm_xor_si128(t0, st.s2)
|
||||
t0 = x86._mm_xor_si128(t0, st.s3)
|
||||
|
||||
t1 = x86._mm_xor_si128(st.s4, st.s5)
|
||||
t1 = x86._mm_xor_si128(t1, st.s6)
|
||||
if len(tag) == TAG_SIZE_256 {
|
||||
t1 = x86._mm_xor_si128(t1, st.s7)
|
||||
}
|
||||
case _RATE_256:
|
||||
t = x86._mm_xor_si128(st.s3, t)
|
||||
for _ in 0 ..< 7 {
|
||||
update_hw_256(st, t)
|
||||
}
|
||||
|
||||
t0 = x86._mm_xor_si128(st.s0, st.s1)
|
||||
t0 = x86._mm_xor_si128(t0, st.s2)
|
||||
|
||||
t1 = x86._mm_xor_si128(st.s3, st.s4)
|
||||
t1 = x86._mm_xor_si128(t1, st.s5)
|
||||
}
|
||||
switch len(tag) {
|
||||
case TAG_SIZE_128:
|
||||
t0 = x86._mm_xor_si128(t0, t1)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
|
||||
case TAG_SIZE_256:
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tag[0]), t0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&tag[16]), t1)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
reset_state_hw :: proc "contextless" (st: ^State_HW) {
|
||||
mem.zero_explicit(st, size_of(st^))
|
||||
}
|
||||
@@ -26,6 +26,7 @@ import topological_sort "core:container/topological_sort"
|
||||
|
||||
import crypto "core:crypto"
|
||||
import aead "core:crypto/aead"
|
||||
import aegis "core:crypto/aegis"
|
||||
import aes "core:crypto/aes"
|
||||
import blake2b "core:crypto/blake2b"
|
||||
import blake2s "core:crypto/blake2s"
|
||||
@@ -170,6 +171,7 @@ _ :: topological_sort
|
||||
_ :: crypto
|
||||
_ :: crypto_hash
|
||||
_ :: aead
|
||||
_ :: aegis
|
||||
_ :: aes
|
||||
_ :: blake2b
|
||||
_ :: blake2s
|
||||
|
||||
@@ -8,6 +8,7 @@ import "core:strings"
|
||||
import "core:testing"
|
||||
import "core:time"
|
||||
|
||||
import "core:crypto/aegis"
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/chacha20"
|
||||
import "core:crypto/chacha20poly1305"
|
||||
@@ -164,6 +165,43 @@ benchmark_crypto :: proc(t: ^testing.T) {
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
name := "AEGIS-256 64 bytes"
|
||||
options := &time.Benchmark_Options {
|
||||
rounds = 1_000,
|
||||
bytes = 64,
|
||||
setup = _setup_sized_buf,
|
||||
bench = _benchmark_aegis_256,
|
||||
teardown = _teardown_sized_buf,
|
||||
}
|
||||
|
||||
key := [aegis.KEY_SIZE_256]byte {
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
|
||||
}
|
||||
ctx: aegis.Context
|
||||
aegis.init(&ctx, key[:])
|
||||
|
||||
context.user_ptr = &ctx
|
||||
|
||||
err := time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "AEGIS-256 1024 bytes"
|
||||
options.bytes = 1024
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
|
||||
name = "AEGIS-256 65536 bytes"
|
||||
options.bytes = 65536
|
||||
err = time.benchmark(options, context.allocator)
|
||||
testing.expect(t, err == nil, name)
|
||||
benchmark_print(&str, name, options)
|
||||
}
|
||||
{
|
||||
iters :: 10000
|
||||
|
||||
@@ -423,6 +461,26 @@ _benchmark_aes256_gcm :: proc(
|
||||
return nil
|
||||
}
|
||||
|
||||
_benchmark_aegis_256 :: proc(
|
||||
options: ^time.Benchmark_Options,
|
||||
allocator := context.allocator,
|
||||
) -> (
|
||||
err: time.Benchmark_Error,
|
||||
) {
|
||||
buf := options.input
|
||||
iv: [aegis.IV_SIZE_256]byte
|
||||
tag: [aegis.TAG_SIZE_128]byte = ---
|
||||
|
||||
ctx := (^aegis.Context)(context.user_ptr)
|
||||
|
||||
for _ in 0 ..= options.rounds {
|
||||
aegis.seal(ctx, buf, tag[:], iv[:], nil, buf)
|
||||
}
|
||||
options.count = options.rounds
|
||||
options.processed = options.rounds * options.bytes
|
||||
return nil
|
||||
}
|
||||
|
||||
@(private)
|
||||
benchmark_print :: proc(str: ^strings.Builder, name: string, options: ^time.Benchmark_Options, loc := #caller_location) {
|
||||
fmt.sbprintfln(str, "[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package test_core_crypto
|
||||
|
||||
import "base:runtime"
|
||||
import "core:crypto/aes"
|
||||
import "core:crypto/aegis"
|
||||
import "core:crypto/aead"
|
||||
import "core:encoding/hex"
|
||||
import "core:testing"
|
||||
@@ -17,6 +19,10 @@ test_aead :: proc(t: ^testing.T) {
|
||||
for impl in supported_chacha_impls() {
|
||||
append(&chacha_impls, impl)
|
||||
}
|
||||
aegis_impls := make([dynamic]aead.Implementation, context.temp_allocator)
|
||||
for impl in supported_aegis_impls() {
|
||||
append(&aegis_impls, impl)
|
||||
}
|
||||
impls := [aead.Algorithm][dynamic]aead.Implementation{
|
||||
.Invalid = nil,
|
||||
.AES_GCM_128 = aes_impls,
|
||||
@@ -24,6 +30,10 @@ test_aead :: proc(t: ^testing.T) {
|
||||
.AES_GCM_256 = aes_impls,
|
||||
.CHACHA20POLY1305 = chacha_impls,
|
||||
.XCHACHA20POLY1305 = chacha_impls,
|
||||
.AEGIS_128L = aegis_impls,
|
||||
.AEGIS_128L_256 = aegis_impls,
|
||||
.AEGIS_256 = aegis_impls,
|
||||
.AEGIS_256_256 = aegis_impls,
|
||||
}
|
||||
|
||||
test_vectors := []struct{
|
||||
@@ -224,6 +234,190 @@ test_aead :: proc(t: ^testing.T) {
|
||||
"bd6d179d3e83d43b9576579493c0e939572a1700252bfaccbed2902c21396cbb731c7f1b0b4aa6440bf3a82f4eda7e39ae64c6708c54c216cb96b72e1213b4522f8c9ba40db5d945b11b69b982c1bb9e3f3fac2bc369488f76b2383565d3fff921f9664c97637da9768812f615c68b13b52e",
|
||||
"c0875924c1c7987947deafd8780acf49",
|
||||
},
|
||||
// AEGIS-128L
|
||||
// https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"c1c0e58bd913006feba00f4b3cc3594e",
|
||||
"abe0ece80c24868a226a35d16bdae37a",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"c1c0e58bd913006feba00f4b3cc3594e",
|
||||
"25835bfbb21632176cf03840687cb968cace4617af1bd0f7d064c639a5c79ee4",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"c2b879a67def9d74e6c14f708bbcc9b4",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"1360dc9db8ae42455f6e5b6a9d488ea4f2184c4e12120249335c4ee84bafe25d",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84",
|
||||
"cc6f3372f6aa1bb82388d695c3962d9a",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"79d94593d8c2119d7e8fd9b8fc77845c5c077a05b2528b6ac54b563aed8efe84",
|
||||
"022cb796fe7e0ae1197525ff67e309484cfbab6528ddef89f17d74ef8ecd82b3",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"79d94593d8c2119d7e8fd9b8fc77",
|
||||
"5c04b3dba849b2701effbe32c7f0fab7",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"79d94593d8c2119d7e8fd9b8fc77",
|
||||
"86f1b80bfb463aba711d15405d094baf4a55a15dbfec81a76f35ed0b9c8b04ac",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10",
|
||||
"7542a745733014f9474417b337399507",
|
||||
},
|
||||
{
|
||||
.AEGIS_128L_256,
|
||||
"10010000000000000000000000000000",
|
||||
"10000200000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"b31052ad1cca4e291abcf2df3502e6bdb1bfd6db36798be3607b1f94d34478aa7ede7f7a990fec10",
|
||||
"b91e2947a33da8bee89b6794e647baf0fc835ff574aca3fc27c33be0db2aff98",
|
||||
},
|
||||
// AEGIS-256
|
||||
// https://www.ietf.org/archive/id/draft-irtf-cfrg-aegis-aead-11.txt
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"754fc3d8c973246dcc6d741412a4b236",
|
||||
"3fe91994768b332ed7f570a19ec5896e",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"00000000000000000000000000000000",
|
||||
"754fc3d8c973246dcc6d741412a4b236",
|
||||
"1181a1d18091082bf0266f66297d167d2e68b845f61a3b0527d31fc7b7b89f13",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"e3def978a0f054afd1e761d7553afba3",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
"6a348c930adbd654896e1666aad67de989ea75ebaa2b82fb588977b1ffec864a",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711",
|
||||
"8d86f91ee606e9ff26a01b64ccbdd91d",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f",
|
||||
"f373079ed84b2709faee373584585d60accd191db310ef5d8b11833df9dec711",
|
||||
"b7d28d0c3c0ebd409fd22b44160503073a547412da0854bfb9723020dab8da1a",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"f373079ed84b2709faee37358458",
|
||||
"c60b9c2d33ceb058f96e6dd03c215652",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"0001020304050607",
|
||||
"000102030405060708090a0b0c0d",
|
||||
"f373079ed84b2709faee37358458",
|
||||
"8c1cc703c81281bee3f6d9966e14948b4a175b2efbdc31e61a98b4465235c2d9",
|
||||
},
|
||||
{
|
||||
.AEGIS_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67",
|
||||
"ab8a7d53fd0e98d727accca94925e128",
|
||||
},
|
||||
{
|
||||
.AEGIS_256_256,
|
||||
"1001000000000000000000000000000000000000000000000000000000000000",
|
||||
"1000020000000000000000000000000000000000000000000000000000000000",
|
||||
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f20212223242526272829",
|
||||
"101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637",
|
||||
"57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67",
|
||||
"a3aca270c006094d71c20e6910b5161c0826df233d08919a566ec2c05990f734",
|
||||
},
|
||||
}
|
||||
for v, _ in test_vectors {
|
||||
algo_name := aead.ALGORITHM_NAMES[v.algo]
|
||||
@@ -337,3 +531,13 @@ test_aead :: proc(t: ^testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
supported_aegis_impls :: proc() -> [dynamic]aes.Implementation {
|
||||
impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator)
|
||||
append(&impls, aes.Implementation.Portable)
|
||||
if aegis.is_hardware_accelerated() {
|
||||
append(&impls, aes.Implementation.Hardware)
|
||||
}
|
||||
|
||||
return impls
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user