Merge pull request #6362 from Yawning/feature/argon2id

core/crypto/argon2id: Initial import
This commit is contained in:
Jeroen van Rijn
2026-03-04 14:27:13 +01:00
committed by GitHub
8 changed files with 855 additions and 43 deletions

View File

@@ -19,17 +19,12 @@ BLAKE2S_SIZE :: 32
BLAKE2B_BLOCK_SIZE :: 128
BLAKE2B_SIZE :: 64
MAX_SIZE :: 255
Blake2s_Context :: struct {
h: [8]u32,
t: [2]u32,
f: [2]u32,
x: [BLAKE2S_BLOCK_SIZE]byte,
nx: int,
ih: [8]u32,
padded_key: [BLAKE2S_BLOCK_SIZE]byte,
is_keyed: bool,
size: byte,
is_last_node: bool,
@@ -42,9 +37,6 @@ Blake2b_Context :: struct {
f: [2]u64,
x: [BLAKE2B_BLOCK_SIZE]byte,
nx: int,
ih: [8]u64,
padded_key: [BLAKE2B_BLOCK_SIZE]byte,
is_keyed: bool,
size: byte,
is_last_node: bool,
@@ -87,11 +79,12 @@ BLAKE2B_IV := [8]u64 {
init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) {
when T == Blake2s_Context {
max_size :: BLAKE2S_SIZE
MAX_SIZE :: BLAKE2S_SIZE
} else when T == Blake2b_Context {
max_size :: BLAKE2B_SIZE
MAX_SIZE :: BLAKE2B_SIZE
}
ensure_contextless(cfg.size <= max_size, "blake2: requested output size exceeeds algorithm max")
ensure_contextless(cfg.size <= MAX_SIZE, "blake2: requested output size exceeeds algorithm max")
ensure_contextless(len(cfg.key) <= MAX_SIZE, "blake2: requested key size exceeeds algorithm max")
// To save having to allocate a scratch buffer, use the internal
// data buffer (`ctx.x`), as it is exactly the correct size.
@@ -152,17 +145,11 @@ init :: proc "contextless" (ctx: ^$T, cfg: ^Blake2_Config) {
ctx.is_last_node = true
}
if len(cfg.key) > 0 {
copy(ctx.padded_key[:], cfg.key)
update(ctx, ctx.padded_key[:])
ctx.is_keyed = true
copy(ctx.x[:], cfg.key)
ctx.nx = len(ctx.x)
} else {
ctx.nx = 0
}
copy(ctx.ih[:], ctx.h[:])
copy(ctx.h[:], ctx.ih[:])
if ctx.is_keyed {
update(ctx, ctx.padded_key[:])
}
ctx.nx = 0
ctx.is_initialized = true
}
@@ -172,22 +159,22 @@ update :: proc "contextless" (ctx: ^$T, p: []byte) {
p := p
when T == Blake2s_Context {
block_size :: BLAKE2S_BLOCK_SIZE
BLOCK_SIZE :: BLAKE2S_BLOCK_SIZE
} else when T == Blake2b_Context {
block_size :: BLAKE2B_BLOCK_SIZE
BLOCK_SIZE :: BLAKE2B_BLOCK_SIZE
}
left := block_size - ctx.nx
left := BLOCK_SIZE - ctx.nx
if len(p) > left {
copy(ctx.x[ctx.nx:], p[:left])
p = p[left:]
blocks(ctx, ctx.x[:])
ctx.nx = 0
}
if len(p) > block_size {
n := len(p) &~ (block_size - 1)
if len(p) > BLOCK_SIZE {
n := len(p) &~ (BLOCK_SIZE - 1)
if n == len(p) {
n -= block_size
n -= BLOCK_SIZE
}
blocks(ctx, p[:n])
p = p[n:]
@@ -228,12 +215,6 @@ reset :: proc "contextless" (ctx: ^$T) {
@(private)
blake2s_final :: proc "contextless" (ctx: ^Blake2s_Context, hash: []byte) {
if ctx.is_keyed {
for i := 0; i < len(ctx.padded_key); i += 1 {
ctx.padded_key[i] = 0
}
}
dec := BLAKE2S_BLOCK_SIZE - u32(ctx.nx)
if ctx.t[0] < dec {
ctx.t[1] -= 1
@@ -254,17 +235,11 @@ blake2s_final :: proc "contextless" (ctx: ^Blake2s_Context, hash: []byte) {
for i := 0; i < BLAKE2S_SIZE / 4; i += 1 {
endian.unchecked_put_u32le(dst[i * 4:], ctx.h[i])
}
copy(hash, dst[:])
copy(hash, dst[:ctx.size])
}
@(private)
blake2b_final :: proc "contextless" (ctx: ^Blake2b_Context, hash: []byte) {
if ctx.is_keyed {
for i := 0; i < len(ctx.padded_key); i += 1 {
ctx.padded_key[i] = 0
}
}
dec := BLAKE2B_BLOCK_SIZE - u64(ctx.nx)
if ctx.t[0] < dec {
ctx.t[1] -= 1

View File

@@ -0,0 +1,622 @@
/*
package argon2id implements the Argon2id password hashing algorithm.
See: [[ https://datatracker.ietf.org/doc/rfc9106/ ]]
*/
package argon2id
import "core:crypto/blake2b"
import "core:encoding/endian"
import "core:math/bits"
import "core:mem"
// Implementation based on the RFC, Monocypher (CC0-1.0), and the reference
// code (CC0-1.0).
// MAX_INPUT_SIZE is the mamximum size of the various inputs (password,
// salt, secret, ad) in bytes.
MAX_INPUT_SIZE :: (1 << 32) - 1
// MIN_PARALLELISM is the minimum allowed parallelism.
MIN_PARALLELISM :: 1
// MAX_PARALLELISM is the maximum allowed parallelism.
MAX_PARALLELISM :: (1 << 24) - 1
// MIN_TAG_SIZE is the minimum digest size in bytes.
MIN_TAG_SIZE :: 4
// MAX_TAG_SIZE is the maximum digest size in bytes.
MAX_TAG_SIZE :: (1 << 32) - 1
// RECOMMENDED_TAG_SIZE is the recommended tag size in bytes.
RECOMMENTED_TAG_SIZE :: 32 // 256-bits
// RECOMMENDNED_SALT_SIZE is the recommended salt size in bytes.
RECOMMENDED_SALT_SIZE :: 16 // 128-bits
@(private)
V_RFC9106 :: 0x13
@(private)
Y_ID :: 0x02
@(private)
BLOCK_SIZE_BYTES :: 1024
@(private)
BLOCK_SIZE_U64 :: 128
// PARAMS_RFC9106 is the first recommended "uniformly safe" parameter set
// per RFC 9106.
@(rodata)
PARAMS_RFC9106 := Parameters{
memory_size = 2 * 1024 * 1024, // 2 GiB
passes = 1,
parallelism = 4,
}
// PARAMS_RFC9106_SMALL is the second recommended "uniformly safe" parameter
// set per RFC 9106 tailored for memory constrained environments.
@(rodata)
PARAMS_RFC9106_SMALL := Parameters{
memory_size = 64 * 1024, // 64 MiB
passes = 3,
parallelism = 4,
}
// PARAMS_OWASP is one of the recommended parameter set from the OWASP
// Password Storage Cheat Sheet (as of 2026/02). The cheat sheet contains
// additional variations to this parameter set with various trade-offs
// between `memory_size` and `passes` that are intended to provide
// equivalent security.
//
// See: [[ https://cheatsheetseries.owasp.org/cheatsheets/Password_Storage_Cheat_Sheet.html ]]
@(rodata)
PARAMS_OWASP := Parameters{
memory_size = 19 * 1024, // 19 MiB
passes = 2,
parallelism = 1,
}
// PARAMS_OWASP_SMALL is equivalent in strength to PARAMS_OWASP, but
// trades off less memory use for more CPU usage.
@(rodata)
PARAMS_OWASP_SMALL := Parameters{
memory_size = 7 * 1024, // 7 MiB
passes = 5,
parallelism = 1,
}
// Parameters is an Argon2id parameter set.
Parameters :: struct {
memory_size: u32, // m (KiB)
passes: u32, // t
parallelism: u32, // p
}
@(private)
Block :: [BLOCK_SIZE_U64]u64
// derive invokes Argon2id with the specified parameter set and inputs,
// and outputs the derived key to dst.
@(require_results)
derive :: proc(
parameters: ^Parameters,
password: []byte, // P
salt: []byte, // S
dst: []byte,
secret: []byte = nil, // K (aka `pepper`)
ad: []byte = nil, // X
sanitize := true,
allocator := context.allocator, // Not temp as this can be large.
) -> mem.Allocator_Error #no_bounds_check {
if u64(len(password)) > MAX_INPUT_SIZE {
panic("crypto/argon2id: invalid password size")
}
if u64(len(salt)) > MAX_INPUT_SIZE {
panic("crypto/argon2id: invalid salt size")
}
if u64(len(secret)) > MAX_INPUT_SIZE {
panic("crypto/argon2id: invalid secret size")
}
if u64(len(ad)) > MAX_INPUT_SIZE {
panic("crypto/argon2id: invalid ad size")
}
if l := u64(len(dst)); l > MAX_TAG_SIZE || l < MIN_TAG_SIZE {
panic("crypto/argon2id: invalid dst size")
}
p, t, m := parameters.parallelism, parameters.passes, u64(parameters.memory_size)
if p < MIN_PARALLELISM || p > MAX_PARALLELISM {
panic("crypto/argon2id: invalid parallelism")
}
if t < 1 {
panic("crypto/argon2id: invalid passes")
}
if m < 8 * u64(p) {
panic("crypto/argon2id: insufficient memory size")
}
if m * BLOCK_SIZE_BYTES > u64(max(int)) {
panic("crypto/argon2id: excessive memory size")
}
// Allocate the memory as m' 1024-byte blocks, where m' is derived as:
// m' = 4 * p * floor (m / 4p)
//
// For p lanes, the memory is organized in a matrix B[i][j] of
// blocks with p rows (lanes) and q = m' / p columns.
m_ := 4 * u64(p) * (m / u64(4 * p))
b := mem.alloc_bytes_non_zeroed(
int(m_) * BLOCK_SIZE_BYTES,
alignment = mem.DEFAULT_PAGE_SIZE,
allocator = allocator,
) or_return
defer delete(b, allocator)
block_buf: [BLOCK_SIZE_BYTES]byte = ---
blocks := ([^]Block)(raw_data(b))[:m_]
segment_size := u32(m_ / u64(p) / 4)
lane_size := segment_size * 4
// Establish H_0 as the 64-byte value as shown below. If K, X, or S
// has zero length, it is just absent, but its length field remains.
//
// H_0 = H^(64)(LE32(p) || LE32(T) || LE32(m) || LE32(t) ||
// LE32(v) || LE32(y) || LE32(length(P)) || P ||
// LE32(length(S)) || S || LE32(length(K)) || K ||
// LE32(length(X)) || X)
{
ctx: blake2b.Context
blake2b.init(&ctx)
blake2b_update_u32le(&ctx, u32(p))
blake2b_update_u32le(&ctx, u32(len(dst)))
blake2b_update_u32le(&ctx, parameters.memory_size)
blake2b_update_u32le(&ctx, t)
blake2b_update_u32le(&ctx, V_RFC9106)
blake2b_update_u32le(&ctx, Y_ID)
blake2b_update_u32le(&ctx, u32(len(password)))
blake2b.update(&ctx, password)
blake2b_update_u32le(&ctx, u32(len(salt)))
blake2b.update(&ctx, salt)
blake2b_update_u32le(&ctx, u32(len(secret)))
blake2b.update(&ctx, secret)
blake2b_update_u32le(&ctx, u32(len(ad)))
blake2b.update(&ctx, ad)
h_0: [blake2b.DIGEST_SIZE+8]byte
blake2b.final(&ctx, h_0[:blake2b.DIGEST_SIZE])
// Compute B[i][0] for all i ranging from (and including) 0 to (not
// including) p.
//
// B[i][0] = H'^(1024)(H_0 || LE32(0) || LE32(i))
//
// Compute B[i][1] for all i ranging from (and including) 0 to (not
// including) p.
//
// B[i][1] = H'^(1024)(H_0 || LE32(1) || LE32(i))
for l in u32(0) ..< p {
for i in u32(0) ..< 2 {
endian.unchecked_put_u32le(h_0[blake2b.DIGEST_SIZE:], i) // LE32({0,1})
endian.unchecked_put_u32le(h_0[blake2b.DIGEST_SIZE+4:], l) // LE32(i)
h_prime(block_buf[:], h_0[:])
blk := &blocks[l * lane_size + i]
for j in 0 ..< BLOCK_SIZE_U64 {
blk[j] = endian.unchecked_get_u64le(block_buf[j*8:])
}
}
}
mem.zero_explicit(&h_0, size_of(h_0)) // No longer needed.
}
// Compute B[i][j] for all i ranging from (and including) 0 to (not
// including) p and for all j ranging from (and including) 2 to (not
// including) q. The computation MUST proceed slicewise
// (Section 3.4): first, blocks from slice 0 are computed for all
// lanes (in an arbitrary order of lanes), then blocks from slice 1
// are computed, etc. The block indices l and z are determined for
// each i, j differently for Argon2d, Argon2i, and Argon2id.
//
// B[i][j] = G(B[i][j-1], B[l][z])
//
// If the number of passes t is larger than 1, we repeat step 5. We
// compute B[i][0] and B[i][j] for all i raging from (and including)
// 0 to (not including) p and for all j ranging from (and including)
// 1 to (not including) q. However, blocks are computed differently
// as the old value is XORed with the new one:
//
// B[i][0] = G(B[i][q-1], B[l][z]) XOR B[i][0];
// B[i][j] = G(B[i][j-1], B[l][z]) XOR B[i][j].
constant_time := true // Start with constant time indexing.
tmp, index_block: Block = ---, ---
for pass in u32(0) ..< t {
for slice in u32(0) ..< 4 {
// The first slice of the first pass has blocks 0 and 1
// pre-filled.
pass_offset: u32 = pass == 0 && slice == 0 ? 2 : 0
slice_offset := slice * segment_size
// 3.4.1.3. Argon2id
//
// If the pass number is 0 and the slice number is 0 or 1, then compute
// J_1 and J_2 as for Argon2i, else compute J_1 and J_2 as for Argon2d.
if slice == 2 {
constant_time = false
}
// Each segment can be processed in parallel, as long as
// each iteration of the loop completes before proceeding
// to the next. For simplicity we do this in serial
// instead of using threads.
for segment in u32(0) ..< u32(p) {
index_ctr: u64 = 1
for block in pass_offset ..< segment_size {
// Current and previous blocks (indexes, not pointers)
lane_offset := segment * lane_size
segment_start := lane_offset + slice_offset
current := segment_start + block
previous := segment_start - 1
switch {
case block == 0 && slice_offset == 0:
previous += lane_size
case:
previous += block
}
index_seed: u64
if constant_time {
// 3.4.1.2. Argon2i
//
// For each segment, we do the following. First, we compute the value Z
// as:
//
// Z= ( LE64(r) || LE64(l) || LE64(sl) || LE64(m') ||
// LE64(t) || LE64(y) )
//
// Figure 11: Input to Compute J1,J2 in Argon2i
//
// where
//
// r: the pass number
// l: the lane number
// sl: the slice number
// m': the total number of memory blocks
// t: the total number of passes
// y: the Argon2 type (0 for Argon2d, 1 for Argon2i, 2 for Argon2id)
//
// Then we compute:
//
// q/(128*SL) 1024-byte values
// G(ZERO(1024),G(ZERO(1024),
// Z || LE64(1) || ZERO(968) )),
// G(ZERO(1024),G(ZERO(1024),
// Z || LE64(2) || ZERO(968) )),... ,
// G(ZERO(1024),G(ZERO(1024),
// Z || LE64(q/(128*SL)) || ZERO(968) )),
//
// which are partitioned into q/(SL) 8-byte values X, which are viewed
// as X1||X2 and converted to J_1=int32(X1) and J_2=int32(X2).
//
// The values r, l, sl, m', t, y, and i are represented as 8 bytes in
// little endian.
if block == pass_offset || (block % 128) == 0 {
mem.zero(&index_block, size_of(index_block))
index_block[0] = u64(pass)
index_block[1] = u64(segment)
index_block[2] = u64(slice)
index_block[3] = u64(lane_size * p)
index_block[4] = u64(t) // passes
index_block[5] = Y_ID
index_block[6] = index_ctr
index_ctr += 1
copy(tmp[:], index_block[:])
g_rounds(&index_block)
xor_block(&index_block, &tmp)
copy(tmp[:], index_block[:])
g_rounds(&index_block)
xor_block(&index_block, &tmp)
}
index_seed = index_block[block % 128]
} else {
// 3.4.1.1. Argon2d
//
// J_1 is given by the first 32 bits of block B[i][j-1], while J_2 is
// given by the next 32 bits of block B[i][j-1]:
//
// J_1 = int32(extract(B[i][j-1], 0))
// J_2 = int32(extract(B[i][j-1], 1))
//
// Figure 10: Deriving J1,J2 in Argon2d
index_seed = blocks[previous][0]
}
// 3.4.2. Mapping J_1 and J_2 to Reference Block Index [l][z]
//
// The value of l = J_2 mod p gives the index of the lane from which the
// block will be taken. For the first pass (r=0) and the first slice
// (sl=0), the block is taken from the current lane.
//
// The set W contains the indices that are referenced according to the
// following rules:
//
// 1. If l is the current lane, then W includes the indices of all
// blocks in the last SL - 1 = 3 segments computed and finished, as
// well as the blocks computed in the current segment in the current
// pass excluding B[i][j-1].
//
// 2. If l is not the current lane, then W includes the indices of all
// blocks in the last SL - 1 = 3 segments computed and finished in
// lane l. If B[i][j] is the first block of a segment, then the
// very last index from W is excluded.
//
// Then take a block from W with a nonuniform distribution over [0, |W|)
// using the following mapping:
//
// J_1 -> |W|(1 - J_1^2 / 2^(64))
//
// Figure 12: Computing J1
//
// To avoid floating point computation, the following approximation is
// used:
//
// x = J_1^2 / 2^(32)
// y = (|W| * x) / 2^(32)
// zz = |W| - 1 - y
//
// Figure 13: Computing J1, Part 2
//
// Then take the zz-th index from W; it will be the z value for the
// reference block index [l][z].
next_slice: u32 = ((slice + 1) % 4) * segment_size
window_start, nb_segments: u32
lane := u32(index_seed >> 32) % p
switch {
case pass == 0:
nb_segments = slice
if slice == 0 {
lane = segment
}
case:
window_start = next_slice
nb_segments = 3
}
window_size := nb_segments * segment_size
if lane == segment {
window_size += block - 1
} else if block == 0 {
window_size += ~u32(0)
}
j1 := index_seed & 0xffffffff
x := (j1 * j1) >> 32
y := (u64(window_size) * x) >> 32
z := (u64(window_size) - 1) - y
ref := u32((u64(window_start) + z) % u64(lane_size))
reference: u32 = lane * lane_size + ref
copy(tmp[:], blocks[previous][:])
xor_block(&tmp, &blocks[reference])
if pass == 0 {
copy(blocks[current][:], tmp[:])
} else {
xor_block(&blocks[current], &tmp)
}
g_rounds(&tmp)
xor_block(&blocks[current], &tmp)
}
}
}
}
mem.zero_explicit(&tmp, size_of(tmp))
mem.zero_explicit(&index_block, size_of(index_block))
// After t steps have been iterated, the final block C is computed
// as the XOR of the last column:
//
// C = B[0][q-1] XOR B[1][q-1] XOR ... XOR B[p-1][q-1]
idx := lane_size - 1
last_block := &blocks[idx]
for _ in 1 ..< p {
idx += lane_size
next_block := &blocks[idx]
xor_block(next_block, last_block)
last_block = next_block
}
for v, i in last_block {
endian.unchecked_put_u64le(block_buf[i*8:], v)
}
// The output tag is computed as H'^T(C).
h_prime(dst, block_buf[:])
mem.zero_explicit(&block_buf, size_of(block_buf))
// Sanitize the working memory. While the RFC implies that this is
// optional ("enable the memory-wiping option in the library call"),
// the reference code defaults to enabling it.
//
// An opt-out is provided, as this can get somewhat expensive when
// m gets large.
if sanitize {
mem.zero_explicit(raw_data(b), len(b))
}
return nil
}
@(private)
xor_block :: #force_inline proc(dst, src: ^Block) {
for v, i in src {
dst[i] ~= v
}
}
@(private)
blake2b_update_u32le :: #force_inline proc(ctx: ^blake2b.Context, i: u32) {
tmp: [4]byte = ---
endian.unchecked_put_u32le(tmp[:], i)
blake2b.update(ctx, tmp[:])
mem.zero_explicit(&tmp, size_of(tmp)) // Probably overkill.
}
// 3.3. Variable-Length Hash Function H'
//
// Let V_i be a 64-byte block and W_i be its first 32 bytes. Then we
// define function H' as follows:
//
// if T <= 64
// H'^T(A) = H^T(LE32(T)||A)
// else
// r = ceil(T/32)-2
// V_1 = H^(64)(LE32(T)||A)
// V_2 = H^(64)(V_1)
// ...
// V_r = H^(64)(V_{r-1})
// V_{r+1} = H^(T-32*r)(V_{r})
// H'^T(X) = W_1 || W_2 || ... || W_r || V_{r+1}
//
// Figure 8: Function H' for Tag and Initial Block Computations
@(private)
h_prime :: proc(dst, src: []byte) {
t := len(dst)
ctx: blake2b.Context
blake2b.init(&ctx, min(t, blake2b.DIGEST_SIZE))
blake2b_update_u32le(&ctx, u32(t))
blake2b.update(&ctx, src)
blake2b.final(&ctx, dst)
if t > 64 {
r := u32((u64(t) + 31) >> 5) - 2
i: u32 = 1
off_in := 0
off_out := 32
for i < r {
blake2b.init(&ctx, blake2b.DIGEST_SIZE)
blake2b.update(&ctx, dst[off_in:off_in+64])
blake2b.final(&ctx, dst[off_out:])
i += 1
off_in += 32
off_out += 32
}
blake2b.init(&ctx, t - int(32 * r))
blake2b.update(&ctx, dst[off_in:off_in+64])
blake2b.final(&ctx, dst[off_out:])
}
}
// GB(a, b, c, d) is defined as follows:
//
// a = (a + b + 2 * trunc(a) * trunc(b)) mod 2^(64)
// d = (d XOR a) >>> 32
// c = (c + d + 2 * trunc(c) * trunc(d)) mod 2^(64)
// b = (b XOR c) >>> 24
//
// a = (a + b + 2 * trunc(a) * trunc(b)) mod 2^(64)
// d = (d XOR a) >>> 16
// c = (c + d + 2 * trunc(c) * trunc(d)) mod 2^(64)
// b = (b XOR c) >>> 63
//
// Figure 19: Details of GB
//
// The modular additions in GB are combined with 64-bit multiplications.
// Multiplications are the only difference from the original BLAKE2b
// design. This choice is done to increase the circuit depth and thus
// the running time of ASIC implementations, while having roughly the
// same running time on CPUs thanks to parallelism and pipelining.
@(private,require_results)
gb :: #force_inline proc(a, b, c, d: u64) -> (u64, u64, u64, u64) {
a, b, c, d := a, b, c, d
trunc := #force_inline proc(v: u64) -> u64 {
return u64(u32(v))
}
a += b + ((trunc(a) * trunc(b)) << 1)
d = bits.rotate_left64(d ~ a, 32) // >>> 32
c += d + ((trunc(c) * trunc(d)) << 1)
b = bits.rotate_left64((b ~ c), 40) // >>> 24
a += b + ((trunc(a) * trunc(b)) << 1)
d = bits.rotate_left64(d ~ a, 48) // >>> 16
c += d + ((trunc(c) * trunc(d)) << 1)
b = bits.rotate_left64((b ~ c), 1) // >>> 63
return a, b, c, d
}
// 3.6. Permutation P
//
// Permutation P is based on the round function of BLAKE2b. The eight
// 16-byte inputs S_0, S_1, ... , S_7 are viewed as a 4x4 matrix of
// 64-bit words, where S_i = (v_{2*i+1} || v_{2*i}):
//
// v_0 v_1 v_2 v_3
// v_4 v_5 v_6 v_7
// v_8 v_9 v_10 v_11
// v_12 v_13 v_14 v_15
//
// Figure 17: Matrix Element Labeling
//
// It works as follows:
//
// GB(v_0, v_4, v_8, v_12)
// GB(v_1, v_5, v_9, v_13)
// GB(v_2, v_6, v_10, v_14)
// GB(v_3, v_7, v_11, v_15)
//
// GB(v_0, v_5, v_10, v_15)
// GB(v_1, v_6, v_11, v_12)
// GB(v_2, v_7, v_8, v_13)
// GB(v_3, v_4, v_9, v_14)
//
// Figure 18: Feeding Matrix Elements to GB
@(private,require_results)
perm_p :: #force_inline proc(v_0, v_1, v_2, v_3, v_4, v_5, v_6, v_7, v_8, v_9, v_10, v_11, v_12, v_13, v_14, v_15: u64) -> (u64, u64, u64, u64, u64, u64, u64, u64, u64, u64, u64, u64, u64, u64, u64, u64) {
v_0, v_1, v_2, v_3, v_4, v_5, v_6, v_7, v_8, v_9, v_10, v_11, v_12, v_13, v_14, v_15 := v_0, v_1, v_2, v_3, v_4, v_5, v_6, v_7, v_8, v_9, v_10, v_11, v_12, v_13, v_14, v_15
v_0, v_4, v_8, v_12 = gb(v_0, v_4, v_8, v_12)
v_1, v_5, v_9, v_13 = gb(v_1, v_5, v_9, v_13)
v_2, v_6, v_10, v_14 = gb(v_2, v_6, v_10, v_14)
v_3, v_7, v_11, v_15 = gb(v_3, v_7, v_11, v_15)
v_0, v_5, v_10, v_15 = gb(v_0, v_5, v_10, v_15)
v_1, v_6, v_11, v_12 = gb(v_1, v_6, v_11, v_12)
v_2, v_7, v_8, v_13 = gb(v_2, v_7, v_8, v_13)
v_3, v_4, v_9, v_14 = gb(v_3, v_4, v_9, v_14)
return v_0, v_1, v_2, v_3, v_4, v_5, v_6, v_7, v_8, v_9, v_10, v_11, v_12, v_13, v_14, v_15
}
// 3.5. Compression Function G
//
// The compression function G is built upon the BLAKE2b-based
// transformation P. P operates on the 128-byte input, which can be
// viewed as eight 16-byte registers:
//
// P(A_0, A_1, ... ,A_7) = (B_0, B_1, ... ,B_7)
//
// Figure 14: Blake Round Function P
//
// The compression function G(X, Y) operates on two 1024-byte blocks X
// and Y. It first computes R = X XOR Y. Then R is viewed as an 8x8
// matrix of 16-byte registers R_0, R_1, ... , R_63. Then P is first
// applied to each row, and then to each column to get Z:
//
// ( Q_0, Q_1, Q_2, ... , Q_7) <- P( R_0, R_1, R_2, ... , R_7)
// ( Q_8, Q_9, Q_10, ... , Q_15) <- P( R_8, R_9, R_10, ... , R_15)
// ...
// (Q_56, Q_57, Q_58, ... , Q_63) <- P(R_56, R_57, R_58, ... , R_63)
// ( Z_0, Z_8, Z_16, ... , Z_56) <- P( Q_0, Q_8, Q_16, ... , Q_56)
// ( Z_1, Z_9, Z_17, ... , Z_57) <- P( Q_1, Q_9, Q_17, ... , Q_57)
// ...
// ( Z_7, Z_15, Z 23, ... , Z_63) <- P( Q_7, Q_15, Q_23, ... , Q_63)
//
// Figure 15: Core of Compression Function G
@(private)
g_rounds :: proc(b: ^Block) {
for i := 0; i < 128; i += 16 {
b[i], b[i+1], b[i+2], b[i+3], b[i+4], b[i+5], b[i+6], b[i+7], b[i+8], b[i+9], b[i+10], b[i+11], b[i+12], b[i+13], b[i+14], b[i+15] = perm_p(b[i], b[i+1], b[i+2], b[i+3], b[i+4], b[i+5], b[i+6], b[i+7], b[i+8], b[i+9], b[i+10], b[i+11], b[i+12], b[i+13], b[i+14], b[i+15])
}
for i := 0; i < 16; i += 2 {
b[i], b[i+1], b[i+16], b[i+17], b[i+32], b[i+33], b[i+48], b[i+49], b[i+64], b[i+65], b[i+80], b[i+81], b[i+96], b[i+97], b[i+112], b[i+113] = perm_p(b[i], b[i+1], b[i+16], b[i+17], b[i+32], b[i+33], b[i+48], b[i+49], b[i+64], b[i+65], b[i+80], b[i+81], b[i+96], b[i+97], b[i+112], b[i+113])
}
}

View File

@@ -28,13 +28,24 @@ Context :: _blake2.Blake2b_Context
// init initializes a Context with the default BLAKE2b config.
init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2b: invalid digest size")
ensure(digest_size <= DIGEST_SIZE, "crypto/blake2b: invalid digest size")
cfg: _blake2.Blake2_Config
cfg.size = u8(digest_size)
_blake2.init(ctx, &cfg)
}
// init_mac initializes a Context with a user provided key.
init_mac :: proc(ctx: ^Context, key: []byte, digest_size := DIGEST_SIZE) {
ensure(digest_size <= DIGEST_SIZE, "crypto/blake2b: invalid digest size")
ensure(len(key) <= DIGEST_SIZE, "crypto/blake2b: invalid key size")
cfg: _blake2.Blake2_Config
cfg.size = u8(digest_size)
cfg.key = key
_blake2.init(ctx, &cfg)
}
// update adds more data to the Context.
update :: proc(ctx: ^Context, data: []byte) {
_blake2.update(ctx, data)

View File

@@ -28,13 +28,24 @@ Context :: _blake2.Blake2s_Context
// init initializes a Context with the default BLAKE2s config.
init :: proc(ctx: ^Context, digest_size := DIGEST_SIZE) {
ensure(digest_size <= _blake2.MAX_SIZE, "crypto/blake2s: invalid digest size")
ensure(digest_size <= DIGEST_SIZE, "crypto/blake2s: invalid digest size")
cfg: _blake2.Blake2_Config
cfg.size = u8(digest_size)
_blake2.init(ctx, &cfg)
}
// init_mac initializes a Context with a user provided key.
init_mac :: proc(ctx: ^Context, key: []byte, digest_size := DIGEST_SIZE) {
ensure(digest_size <= DIGEST_SIZE, "crypto/blake2s: invalid digest size")
ensure(len(key) <= DIGEST_SIZE, "crypto/blake2s: invalid key size")
cfg: _blake2.Blake2_Config
cfg.size = u8(digest_size)
cfg.key = key
_blake2.init(ctx, &cfg)
}
// update adds more data to the Context.
update :: proc(ctx: ^Context, data: []byte) {
_blake2.update(ctx, data)

View File

@@ -27,6 +27,7 @@ package all
@(require) import "core:crypto/aead"
@(require) import "core:crypto/aegis"
@(require) import "core:crypto/aes"
@(require) import "core:crypto/argon2id"
@(require) import "core:crypto/blake2b"
@(require) import "core:crypto/blake2s"
@(require) import "core:crypto/chacha20"

View File

@@ -32,6 +32,7 @@ package all
@(require) import "core:crypto/aead"
@(require) import "core:crypto/aegis"
@(require) import "core:crypto/aes"
@(require) import "core:crypto/argon2id"
@(require) import "core:crypto/blake2b"
@(require) import "core:crypto/blake2s"
@(require) import "core:crypto/chacha20"

View File

@@ -5,6 +5,9 @@ import "core:bytes"
import "core:encoding/hex"
import "core:strings"
import "core:testing"
import "core:crypto/blake2b"
import "core:crypto/blake2s"
import "core:crypto/hash"
@(test)
@@ -596,4 +599,139 @@ test_hash :: proc(t: ^testing.T) {
c_str,
)
}
}
}
@(private="file")
selftest_seq :: proc(dst: []byte, seed: u32) {
a := 0xdead4bad * seed
b: u32 = 1
for i in 0 ..< len(dst) {
a, b = b, a + b
dst[i] = byte(b >> 24)
}
}
@(test)
test_blake2b_self :: proc(t: ^testing.T) {
expected := []byte{
0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD,
0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56,
0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73,
0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75,
}
md_lens := []int{20, 32, 48, 64}
src_lens := []int{0, 3, 128, 129, 255, 1024}
b2b := proc(dst, src: []byte) {
ctx: blake2b.Context
blake2b.init(&ctx, len(dst))
blake2b.update(&ctx, src)
blake2b.final(&ctx, dst)
}
b2b_keyed := proc(dst, key, src: []byte) {
ctx: blake2b.Context
blake2b.init_mac(&ctx, key, len(dst))
blake2b.update(&ctx, src)
blake2b.final(&ctx, dst)
}
buf: [1024]byte
md, key: [64]byte
ctx: blake2b.Context
blake2b.init(&ctx, 32)
for md_len in md_lens {
dst := md[:md_len]
for src_len in src_lens {
src := buf[:src_len]
selftest_seq(src, u32(src_len))
b2b(dst, src)
blake2b.update(&ctx, dst)
k := key[:md_len]
selftest_seq(k, u32(md_len))
b2b_keyed(dst, k, src)
blake2b.update(&ctx, dst)
}
}
blake2b.final(&ctx, md[:32])
expected_str := string(hex.encode(expected, context.temp_allocator))
actual_str := string(hex.encode(md[:32], context.temp_allocator))
testing.expectf(
t,
expected_str == actual_str,
"blake2b/self-test: Expected: %s Got %s",
expected_str,
actual_str,
)
}
@(test)
test_blake2s_self :: proc(t: ^testing.T) {
expected := []byte{
0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD,
0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC,
0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87,
0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE,
}
md_lens := []int{16, 20, 28, 32}
src_lens := []int{0, 3, 64, 65, 255, 1024}
b2s := proc(dst, src: []byte) {
ctx: blake2s.Context
blake2s.init(&ctx, len(dst))
blake2s.update(&ctx, src)
blake2s.final(&ctx, dst)
}
b2s_keyed := proc(dst, key, src: []byte) {
ctx: blake2s.Context
blake2s.init_mac(&ctx, key, len(dst))
blake2s.update(&ctx, src)
blake2s.final(&ctx, dst)
}
buf: [1024]byte
md, key: [32]byte
ctx: blake2s.Context
blake2s.init(&ctx)
for md_len in md_lens {
dst := md[:md_len]
for src_len in src_lens {
src := buf[:src_len]
selftest_seq(src, u32(src_len))
b2s(dst, src)
blake2s.update(&ctx, dst)
k := key[:md_len]
selftest_seq(k, u32(md_len))
b2s_keyed(dst, k, src)
blake2s.update(&ctx, dst)
}
}
blake2s.final(&ctx, md[:])
expected_str := string(hex.encode(expected, context.temp_allocator))
actual_str := string(hex.encode(md[:], context.temp_allocator))
testing.expectf(
t,
expected_str == actual_str,
"blake2s/self-test: Expected: %s Got %s",
expected_str,
actual_str,
)
}

View File

@@ -3,10 +3,63 @@ package test_core_crypto
import "base:runtime"
import "core:encoding/hex"
import "core:testing"
import "core:crypto/argon2id"
import "core:crypto/hash"
import "core:crypto/hkdf"
import "core:crypto/pbkdf2"
@(test)
test_argon2id :: proc(t: ^testing.T) {
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
test_vectors := []struct {
params: ^argon2id.Parameters,
password: string,
salt: string,
secret: string,
ad: string,
tag: string,
} {
// RFC 9106 5.3.
{
&argon2id.Parameters{
32,
3,
4,
},
"0101010101010101010101010101010101010101010101010101010101010101",
"02020202020202020202020202020202",
"0303030303030303",
"040404040404040404040404",
"0d640df58d78766c08c037a34a8b53c9d01ef0452d75b65eb52520e96b01e659",
},
}
for v, _ in test_vectors {
tag := make([]byte, len(v.tag)/2, context.temp_allocator)
password, _ := hex.decode(transmute([]byte)(v.password), context.temp_allocator)
salt, _ := hex.decode(transmute([]byte)(v.salt), context.temp_allocator)
secret, _ := hex.decode(transmute([]byte)(v.secret), context.temp_allocator)
ad, _ := hex.decode(transmute([]byte)(v.ad), context.temp_allocator)
_ = argon2id.derive(v.params, password, salt, tag, secret, ad)
tag_str := string(hex.encode(tag, context.temp_allocator))
testing.expectf(
t,
tag_str == v.tag,
"argon2id: Expected: %s for input of (%s, %s, %s, %s), but got %s instead",
v.tag,
v.password,
v.salt,
v.secret,
v.ad,
tag_str,
)
}
}
@(test)
test_hkdf :: proc(t: ^testing.T) {
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()