This commit is contained in:
gingerBill
2021-11-24 16:31:47 +00:00
22 changed files with 2994 additions and 54 deletions

View File

@@ -0,0 +1,35 @@
# fiat
This package contains low level arithmetic required to implement certain
cryptographic primitives, ported from the [fiat-crypto project][1]
along with some higher-level helpers.
## Notes
fiat-crypto gives the choice of 3 licenses for derived works. The 1-Clause
BSD license is chosen as it is compatible with Odin's existing licensing.
The routines are intended to be timing-safe, as long as the underlying
integer arithmetic is constant time. This is true on most systems commonly
used today, with the notable exception of WASM.
While fiat-crypto provides both output targeting both 32-bit and 64-bit
architectures, only the 64-bit versions were used, as 32-bit architectures
are becoming increasingly uncommon and irrelevant.
With the current Odin syntax, the Go output is trivially ported in most
cases and was used as the basis of the port.
In the future, it would be better to auto-generate Odin either directly
by adding an appropriate code-gen backend written in Coq, or perhaps by
parsing the JSON output.
As this is a port rather than autogenerated output, none of fiat-crypto's
formal verification guarantees apply, unless it is possible to prove binary
equivalence.
For the most part, alterations to the base fiat-crypto generated code was
kept to a minimum, to aid auditability. This results in a somewhat
ideosyncratic style, and in some cases minor performance penalties.
[1]: https://github.com/mit-plv/fiat-crypto

View File

@@ -0,0 +1,24 @@
package fiat
// This package provides various helpers and types common to all of the
// fiat-crypto derived backends.
// This code only works on a two's complement system.
#assert((-1 & 3) == 3)
u1 :: distinct u8
i1 :: distinct i8
cmovznz_u64 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
x1 := (u64(arg1) * 0xffffffffffffffff)
x2 := ((x1 & arg3) | ((~x1) & arg2))
out1 = x2
return
}
cmovznz_u32 :: #force_inline proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
x1 := (u32(arg1) * 0xffffffff)
x2 := ((x1 & arg3) | ((~x1) & arg2))
out1 = x2
return
}

View File

@@ -0,0 +1,138 @@
package field_curve25519
import "core:crypto"
import "core:mem"
fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
return transmute(^Loose_Field_Element)(arg1)
}
fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
return transmute(^Tight_Field_Element)(arg1)
}
fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
// Ignore the unused bit by copying the input and masking the bit off
// prior to deserialization.
tmp1: [32]byte = ---
copy_slice(tmp1[:], arg1[:])
tmp1[31] &= 127
_fe_from_bytes(out1, &tmp1)
mem.zero_explicit(&tmp1, size_of(tmp1))
}
fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
tmp2: [32]byte = ---
fe_to_bytes(&tmp2, arg2)
ret := fe_equal_bytes(arg1, &tmp2)
mem.zero_explicit(&tmp2, size_of(tmp2))
return ret
}
fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byte) -> int {
tmp1: [32]byte = ---
fe_to_bytes(&tmp1, arg1)
ret := crypto.compare_constant_time(tmp1[:], arg2[:])
mem.zero_explicit(&tmp1, size_of(tmp1))
return ret
}
fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
if arg2 == 0 {
fe_one(out1)
return
}
fe_carry_square(out1, arg1)
for _ in 1..<arg2 {
fe_carry_square(out1, fe_relax_cast(out1))
}
}
fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
fe_opp(fe_relax_cast(out1), arg1)
fe_carry(out1, fe_relax_cast(out1))
}
fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
// Inverse square root taken from Monocypher.
tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
// t0 = x^((p-5)/8)
// Can be achieved with a simple double & add ladder,
// but it would be slower.
fe_carry_pow2k(&tmp1, arg1, 1)
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 5)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 10)
fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 20)
fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 10)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 50)
fe_carry_mul(&tmp2, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp3, fe_relax_cast(&tmp2), 100)
fe_carry_mul(&tmp2, fe_relax_cast(&tmp3), fe_relax_cast(&tmp2))
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
// quartic = x^((p-1)/4)
quartic := &tmp2
fe_carry_square(quartic, fe_relax_cast(&tmp1))
fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
// Serialize quartic once to save on repeated serialization/sanitization.
quartic_buf: [32]byte = ---
fe_to_bytes(&quartic_buf, quartic)
check := &tmp3
fe_one(check)
p1 := fe_equal_bytes(check, &quartic_buf)
fe_carry_opp(check, check)
m1 := fe_equal_bytes(check, &quartic_buf)
fe_carry_opp(check, &SQRT_M1)
ms := fe_equal_bytes(check, &quartic_buf)
// if quartic == -1 or sqrt(-1)
// then isr = x^((p-1)/4) * sqrt(-1)
// else isr = x^((p-1)/4)
fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
mem.zero_explicit(&tmp1, size_of(tmp1))
mem.zero_explicit(&tmp2, size_of(tmp2))
mem.zero_explicit(&tmp3, size_of(tmp3))
mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
return p1 | m1
}
fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
tmp1: Tight_Field_Element
fe_carry_square(&tmp1, arg1)
_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
mem.zero_explicit(&tmp1, size_of(tmp1))
}

View File

@@ -0,0 +1,616 @@
// The BSD 1-Clause License (BSD-1-Clause)
//
// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package field_curve25519
// The file provides arithmetic on the field Z/(2^255-19) using
// unsaturated 64-bit integer arithmetic. It is derived primarily
// from the machine generated Golang output from the fiat-crypto project.
//
// While the base implementation is provably correct, this implementation
// makes no such claims as the port and optimizations were done by hand.
// At some point, it may be worth adding support to fiat-crypto for
// generating Odin output.
//
// TODO:
// * When fiat-crypto supports it, using a saturated 64-bit limbs
// instead of 51-bit limbs will be faster, though the gains are
// minimal unless adcx/adox/mulx are used.
import fiat "core:crypto/_fiat"
import "core:math/bits"
Loose_Field_Element :: distinct [5]u64
Tight_Field_Element :: distinct [5]u64
SQRT_M1 := Tight_Field_Element{
1718705420411056,
234908883556509,
2233514472574048,
2117202627021982,
765476049583133,
}
_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
x1 := ((u64(arg1) + arg2) + arg3)
x2 := (x1 & 0x7ffffffffffff)
x3 := fiat.u1((x1 >> 51))
out1 = x2
out2 = x3
return
}
_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
x2 := fiat.i1((x1 >> 51))
x3 := (u64(x1) & 0x7ffffffffffff)
out1 = x3
out2 = (0x0 - fiat.u1(x2))
return
}
fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
x8, x7 := bits.mul_u64(arg1[4], (arg2[1] * 0x13))
x10, x9 := bits.mul_u64(arg1[3], (arg2[4] * 0x13))
x12, x11 := bits.mul_u64(arg1[3], (arg2[3] * 0x13))
x14, x13 := bits.mul_u64(arg1[3], (arg2[2] * 0x13))
x16, x15 := bits.mul_u64(arg1[2], (arg2[4] * 0x13))
x18, x17 := bits.mul_u64(arg1[2], (arg2[3] * 0x13))
x20, x19 := bits.mul_u64(arg1[1], (arg2[4] * 0x13))
x22, x21 := bits.mul_u64(arg1[4], arg2[0])
x24, x23 := bits.mul_u64(arg1[3], arg2[1])
x26, x25 := bits.mul_u64(arg1[3], arg2[0])
x28, x27 := bits.mul_u64(arg1[2], arg2[2])
x30, x29 := bits.mul_u64(arg1[2], arg2[1])
x32, x31 := bits.mul_u64(arg1[2], arg2[0])
x34, x33 := bits.mul_u64(arg1[1], arg2[3])
x36, x35 := bits.mul_u64(arg1[1], arg2[2])
x38, x37 := bits.mul_u64(arg1[1], arg2[1])
x40, x39 := bits.mul_u64(arg1[1], arg2[0])
x42, x41 := bits.mul_u64(arg1[0], arg2[4])
x44, x43 := bits.mul_u64(arg1[0], arg2[3])
x46, x45 := bits.mul_u64(arg1[0], arg2[2])
x48, x47 := bits.mul_u64(arg1[0], arg2[1])
x50, x49 := bits.mul_u64(arg1[0], arg2[0])
x51, x52 := bits.add_u64(x13, x7, u64(0x0))
x53, _ := bits.add_u64(x14, x8, u64(fiat.u1(x52)))
x55, x56 := bits.add_u64(x17, x51, u64(0x0))
x57, _ := bits.add_u64(x18, x53, u64(fiat.u1(x56)))
x59, x60 := bits.add_u64(x19, x55, u64(0x0))
x61, _ := bits.add_u64(x20, x57, u64(fiat.u1(x60)))
x63, x64 := bits.add_u64(x49, x59, u64(0x0))
x65, _ := bits.add_u64(x50, x61, u64(fiat.u1(x64)))
x67 := ((x63 >> 51) | ((x65 << 13) & 0xffffffffffffffff))
x68 := (x63 & 0x7ffffffffffff)
x69, x70 := bits.add_u64(x23, x21, u64(0x0))
x71, _ := bits.add_u64(x24, x22, u64(fiat.u1(x70)))
x73, x74 := bits.add_u64(x27, x69, u64(0x0))
x75, _ := bits.add_u64(x28, x71, u64(fiat.u1(x74)))
x77, x78 := bits.add_u64(x33, x73, u64(0x0))
x79, _ := bits.add_u64(x34, x75, u64(fiat.u1(x78)))
x81, x82 := bits.add_u64(x41, x77, u64(0x0))
x83, _ := bits.add_u64(x42, x79, u64(fiat.u1(x82)))
x85, x86 := bits.add_u64(x25, x1, u64(0x0))
x87, _ := bits.add_u64(x26, x2, u64(fiat.u1(x86)))
x89, x90 := bits.add_u64(x29, x85, u64(0x0))
x91, _ := bits.add_u64(x30, x87, u64(fiat.u1(x90)))
x93, x94 := bits.add_u64(x35, x89, u64(0x0))
x95, _ := bits.add_u64(x36, x91, u64(fiat.u1(x94)))
x97, x98 := bits.add_u64(x43, x93, u64(0x0))
x99, _ := bits.add_u64(x44, x95, u64(fiat.u1(x98)))
x101, x102 := bits.add_u64(x9, x3, u64(0x0))
x103, _ := bits.add_u64(x10, x4, u64(fiat.u1(x102)))
x105, x106 := bits.add_u64(x31, x101, u64(0x0))
x107, _ := bits.add_u64(x32, x103, u64(fiat.u1(x106)))
x109, x110 := bits.add_u64(x37, x105, u64(0x0))
x111, _ := bits.add_u64(x38, x107, u64(fiat.u1(x110)))
x113, x114 := bits.add_u64(x45, x109, u64(0x0))
x115, _ := bits.add_u64(x46, x111, u64(fiat.u1(x114)))
x117, x118 := bits.add_u64(x11, x5, u64(0x0))
x119, _ := bits.add_u64(x12, x6, u64(fiat.u1(x118)))
x121, x122 := bits.add_u64(x15, x117, u64(0x0))
x123, _ := bits.add_u64(x16, x119, u64(fiat.u1(x122)))
x125, x126 := bits.add_u64(x39, x121, u64(0x0))
x127, _ := bits.add_u64(x40, x123, u64(fiat.u1(x126)))
x129, x130 := bits.add_u64(x47, x125, u64(0x0))
x131, _ := bits.add_u64(x48, x127, u64(fiat.u1(x130)))
x133, x134 := bits.add_u64(x67, x129, u64(0x0))
x135 := (u64(fiat.u1(x134)) + x131)
x136 := ((x133 >> 51) | ((x135 << 13) & 0xffffffffffffffff))
x137 := (x133 & 0x7ffffffffffff)
x138, x139 := bits.add_u64(x136, x113, u64(0x0))
x140 := (u64(fiat.u1(x139)) + x115)
x141 := ((x138 >> 51) | ((x140 << 13) & 0xffffffffffffffff))
x142 := (x138 & 0x7ffffffffffff)
x143, x144 := bits.add_u64(x141, x97, u64(0x0))
x145 := (u64(fiat.u1(x144)) + x99)
x146 := ((x143 >> 51) | ((x145 << 13) & 0xffffffffffffffff))
x147 := (x143 & 0x7ffffffffffff)
x148, x149 := bits.add_u64(x146, x81, u64(0x0))
x150 := (u64(fiat.u1(x149)) + x83)
x151 := ((x148 >> 51) | ((x150 << 13) & 0xffffffffffffffff))
x152 := (x148 & 0x7ffffffffffff)
x153 := (x151 * 0x13)
x154 := (x68 + x153)
x155 := (x154 >> 51)
x156 := (x154 & 0x7ffffffffffff)
x157 := (x155 + x137)
x158 := fiat.u1((x157 >> 51))
x159 := (x157 & 0x7ffffffffffff)
x160 := (u64(x158) + x142)
out1[0] = x156
out1[1] = x159
out1[2] = x160
out1[3] = x147
out1[4] = x152
}
fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
x1 := (arg1[4] * 0x13)
x2 := (x1 * 0x2)
x3 := (arg1[4] * 0x2)
x4 := (arg1[3] * 0x13)
x5 := (x4 * 0x2)
x6 := (arg1[3] * 0x2)
x7 := (arg1[2] * 0x2)
x8 := (arg1[1] * 0x2)
x10, x9 := bits.mul_u64(arg1[4], x1)
x12, x11 := bits.mul_u64(arg1[3], x2)
x14, x13 := bits.mul_u64(arg1[3], x4)
x16, x15 := bits.mul_u64(arg1[2], x2)
x18, x17 := bits.mul_u64(arg1[2], x5)
x20, x19 := bits.mul_u64(arg1[2], arg1[2])
x22, x21 := bits.mul_u64(arg1[1], x2)
x24, x23 := bits.mul_u64(arg1[1], x6)
x26, x25 := bits.mul_u64(arg1[1], x7)
x28, x27 := bits.mul_u64(arg1[1], arg1[1])
x30, x29 := bits.mul_u64(arg1[0], x3)
x32, x31 := bits.mul_u64(arg1[0], x6)
x34, x33 := bits.mul_u64(arg1[0], x7)
x36, x35 := bits.mul_u64(arg1[0], x8)
x38, x37 := bits.mul_u64(arg1[0], arg1[0])
x39, x40 := bits.add_u64(x21, x17, u64(0x0))
x41, _ := bits.add_u64(x22, x18, u64(fiat.u1(x40)))
x43, x44 := bits.add_u64(x37, x39, u64(0x0))
x45, _ := bits.add_u64(x38, x41, u64(fiat.u1(x44)))
x47 := ((x43 >> 51) | ((x45 << 13) & 0xffffffffffffffff))
x48 := (x43 & 0x7ffffffffffff)
x49, x50 := bits.add_u64(x23, x19, u64(0x0))
x51, _ := bits.add_u64(x24, x20, u64(fiat.u1(x50)))
x53, x54 := bits.add_u64(x29, x49, u64(0x0))
x55, _ := bits.add_u64(x30, x51, u64(fiat.u1(x54)))
x57, x58 := bits.add_u64(x25, x9, u64(0x0))
x59, _ := bits.add_u64(x26, x10, u64(fiat.u1(x58)))
x61, x62 := bits.add_u64(x31, x57, u64(0x0))
x63, _ := bits.add_u64(x32, x59, u64(fiat.u1(x62)))
x65, x66 := bits.add_u64(x27, x11, u64(0x0))
x67, _ := bits.add_u64(x28, x12, u64(fiat.u1(x66)))
x69, x70 := bits.add_u64(x33, x65, u64(0x0))
x71, _ := bits.add_u64(x34, x67, u64(fiat.u1(x70)))
x73, x74 := bits.add_u64(x15, x13, u64(0x0))
x75, _ := bits.add_u64(x16, x14, u64(fiat.u1(x74)))
x77, x78 := bits.add_u64(x35, x73, u64(0x0))
x79, _ := bits.add_u64(x36, x75, u64(fiat.u1(x78)))
x81, x82 := bits.add_u64(x47, x77, u64(0x0))
x83 := (u64(fiat.u1(x82)) + x79)
x84 := ((x81 >> 51) | ((x83 << 13) & 0xffffffffffffffff))
x85 := (x81 & 0x7ffffffffffff)
x86, x87 := bits.add_u64(x84, x69, u64(0x0))
x88 := (u64(fiat.u1(x87)) + x71)
x89 := ((x86 >> 51) | ((x88 << 13) & 0xffffffffffffffff))
x90 := (x86 & 0x7ffffffffffff)
x91, x92 := bits.add_u64(x89, x61, u64(0x0))
x93 := (u64(fiat.u1(x92)) + x63)
x94 := ((x91 >> 51) | ((x93 << 13) & 0xffffffffffffffff))
x95 := (x91 & 0x7ffffffffffff)
x96, x97 := bits.add_u64(x94, x53, u64(0x0))
x98 := (u64(fiat.u1(x97)) + x55)
x99 := ((x96 >> 51) | ((x98 << 13) & 0xffffffffffffffff))
x100 := (x96 & 0x7ffffffffffff)
x101 := (x99 * 0x13)
x102 := (x48 + x101)
x103 := (x102 >> 51)
x104 := (x102 & 0x7ffffffffffff)
x105 := (x103 + x85)
x106 := fiat.u1((x105 >> 51))
x107 := (x105 & 0x7ffffffffffff)
x108 := (u64(x106) + x90)
out1[0] = x104
out1[1] = x107
out1[2] = x108
out1[3] = x95
out1[4] = x100
}
fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
x1 := arg1[0]
x2 := ((x1 >> 51) + arg1[1])
x3 := ((x2 >> 51) + arg1[2])
x4 := ((x3 >> 51) + arg1[3])
x5 := ((x4 >> 51) + arg1[4])
x6 := ((x1 & 0x7ffffffffffff) + ((x5 >> 51) * 0x13))
x7 := (u64(fiat.u1((x6 >> 51))) + (x2 & 0x7ffffffffffff))
x8 := (x6 & 0x7ffffffffffff)
x9 := (x7 & 0x7ffffffffffff)
x10 := (u64(fiat.u1((x7 >> 51))) + (x3 & 0x7ffffffffffff))
x11 := (x4 & 0x7ffffffffffff)
x12 := (x5 & 0x7ffffffffffff)
out1[0] = x8
out1[1] = x9
out1[2] = x10
out1[3] = x11
out1[4] = x12
}
fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
x1 := (arg1[0] + arg2[0])
x2 := (arg1[1] + arg2[1])
x3 := (arg1[2] + arg2[2])
x4 := (arg1[3] + arg2[3])
x5 := (arg1[4] + arg2[4])
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}
fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
x1 := ((0xfffffffffffda + arg1[0]) - arg2[0])
x2 := ((0xffffffffffffe + arg1[1]) - arg2[1])
x3 := ((0xffffffffffffe + arg1[2]) - arg2[2])
x4 := ((0xffffffffffffe + arg1[3]) - arg2[3])
x5 := ((0xffffffffffffe + arg1[4]) - arg2[4])
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}
fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
x1 := (0xfffffffffffda - arg1[0])
x2 := (0xffffffffffffe - arg1[1])
x3 := (0xffffffffffffe - arg1[2])
x4 := (0xffffffffffffe - arg1[3])
x5 := (0xffffffffffffe - arg1[4])
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}
fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
x5 := fiat.cmovznz_u64(fiat.u1(arg2), out1[4], arg1[4])
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}
fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
x1, x2 := _subborrowx_u51(0x0, arg1[0], 0x7ffffffffffed)
x3, x4 := _subborrowx_u51(x2, arg1[1], 0x7ffffffffffff)
x5, x6 := _subborrowx_u51(x4, arg1[2], 0x7ffffffffffff)
x7, x8 := _subborrowx_u51(x6, arg1[3], 0x7ffffffffffff)
x9, x10 := _subborrowx_u51(x8, arg1[4], 0x7ffffffffffff)
x11 := fiat.cmovznz_u64(x10, u64(0x0), 0xffffffffffffffff)
x12, x13 := _addcarryx_u51(0x0, x1, (x11 & 0x7ffffffffffed))
x14, x15 := _addcarryx_u51(x13, x3, (x11 & 0x7ffffffffffff))
x16, x17 := _addcarryx_u51(x15, x5, (x11 & 0x7ffffffffffff))
x18, x19 := _addcarryx_u51(x17, x7, (x11 & 0x7ffffffffffff))
x20, _ := _addcarryx_u51(x19, x9, (x11 & 0x7ffffffffffff))
x22 := (x20 << 4)
x23 := (x18 * u64(0x2))
x24 := (x16 << 6)
x25 := (x14 << 3)
x26 := (u8(x12) & 0xff)
x27 := (x12 >> 8)
x28 := (u8(x27) & 0xff)
x29 := (x27 >> 8)
x30 := (u8(x29) & 0xff)
x31 := (x29 >> 8)
x32 := (u8(x31) & 0xff)
x33 := (x31 >> 8)
x34 := (u8(x33) & 0xff)
x35 := (x33 >> 8)
x36 := (u8(x35) & 0xff)
x37 := u8((x35 >> 8))
x38 := (x25 + u64(x37))
x39 := (u8(x38) & 0xff)
x40 := (x38 >> 8)
x41 := (u8(x40) & 0xff)
x42 := (x40 >> 8)
x43 := (u8(x42) & 0xff)
x44 := (x42 >> 8)
x45 := (u8(x44) & 0xff)
x46 := (x44 >> 8)
x47 := (u8(x46) & 0xff)
x48 := (x46 >> 8)
x49 := (u8(x48) & 0xff)
x50 := u8((x48 >> 8))
x51 := (x24 + u64(x50))
x52 := (u8(x51) & 0xff)
x53 := (x51 >> 8)
x54 := (u8(x53) & 0xff)
x55 := (x53 >> 8)
x56 := (u8(x55) & 0xff)
x57 := (x55 >> 8)
x58 := (u8(x57) & 0xff)
x59 := (x57 >> 8)
x60 := (u8(x59) & 0xff)
x61 := (x59 >> 8)
x62 := (u8(x61) & 0xff)
x63 := (x61 >> 8)
x64 := (u8(x63) & 0xff)
x65 := fiat.u1((x63 >> 8))
x66 := (x23 + u64(x65))
x67 := (u8(x66) & 0xff)
x68 := (x66 >> 8)
x69 := (u8(x68) & 0xff)
x70 := (x68 >> 8)
x71 := (u8(x70) & 0xff)
x72 := (x70 >> 8)
x73 := (u8(x72) & 0xff)
x74 := (x72 >> 8)
x75 := (u8(x74) & 0xff)
x76 := (x74 >> 8)
x77 := (u8(x76) & 0xff)
x78 := u8((x76 >> 8))
x79 := (x22 + u64(x78))
x80 := (u8(x79) & 0xff)
x81 := (x79 >> 8)
x82 := (u8(x81) & 0xff)
x83 := (x81 >> 8)
x84 := (u8(x83) & 0xff)
x85 := (x83 >> 8)
x86 := (u8(x85) & 0xff)
x87 := (x85 >> 8)
x88 := (u8(x87) & 0xff)
x89 := (x87 >> 8)
x90 := (u8(x89) & 0xff)
x91 := u8((x89 >> 8))
out1[0] = x26
out1[1] = x28
out1[2] = x30
out1[3] = x32
out1[4] = x34
out1[5] = x36
out1[6] = x39
out1[7] = x41
out1[8] = x43
out1[9] = x45
out1[10] = x47
out1[11] = x49
out1[12] = x52
out1[13] = x54
out1[14] = x56
out1[15] = x58
out1[16] = x60
out1[17] = x62
out1[18] = x64
out1[19] = x67
out1[20] = x69
out1[21] = x71
out1[22] = x73
out1[23] = x75
out1[24] = x77
out1[25] = x80
out1[26] = x82
out1[27] = x84
out1[28] = x86
out1[29] = x88
out1[30] = x90
out1[31] = x91
}
_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
x1 := (u64(arg1[31]) << 44)
x2 := (u64(arg1[30]) << 36)
x3 := (u64(arg1[29]) << 28)
x4 := (u64(arg1[28]) << 20)
x5 := (u64(arg1[27]) << 12)
x6 := (u64(arg1[26]) << 4)
x7 := (u64(arg1[25]) << 47)
x8 := (u64(arg1[24]) << 39)
x9 := (u64(arg1[23]) << 31)
x10 := (u64(arg1[22]) << 23)
x11 := (u64(arg1[21]) << 15)
x12 := (u64(arg1[20]) << 7)
x13 := (u64(arg1[19]) << 50)
x14 := (u64(arg1[18]) << 42)
x15 := (u64(arg1[17]) << 34)
x16 := (u64(arg1[16]) << 26)
x17 := (u64(arg1[15]) << 18)
x18 := (u64(arg1[14]) << 10)
x19 := (u64(arg1[13]) << 2)
x20 := (u64(arg1[12]) << 45)
x21 := (u64(arg1[11]) << 37)
x22 := (u64(arg1[10]) << 29)
x23 := (u64(arg1[9]) << 21)
x24 := (u64(arg1[8]) << 13)
x25 := (u64(arg1[7]) << 5)
x26 := (u64(arg1[6]) << 48)
x27 := (u64(arg1[5]) << 40)
x28 := (u64(arg1[4]) << 32)
x29 := (u64(arg1[3]) << 24)
x30 := (u64(arg1[2]) << 16)
x31 := (u64(arg1[1]) << 8)
x32 := arg1[0]
x33 := (x31 + u64(x32))
x34 := (x30 + x33)
x35 := (x29 + x34)
x36 := (x28 + x35)
x37 := (x27 + x36)
x38 := (x26 + x37)
x39 := (x38 & 0x7ffffffffffff)
x40 := u8((x38 >> 51))
x41 := (x25 + u64(x40))
x42 := (x24 + x41)
x43 := (x23 + x42)
x44 := (x22 + x43)
x45 := (x21 + x44)
x46 := (x20 + x45)
x47 := (x46 & 0x7ffffffffffff)
x48 := u8((x46 >> 51))
x49 := (x19 + u64(x48))
x50 := (x18 + x49)
x51 := (x17 + x50)
x52 := (x16 + x51)
x53 := (x15 + x52)
x54 := (x14 + x53)
x55 := (x13 + x54)
x56 := (x55 & 0x7ffffffffffff)
x57 := u8((x55 >> 51))
x58 := (x12 + u64(x57))
x59 := (x11 + x58)
x60 := (x10 + x59)
x61 := (x9 + x60)
x62 := (x8 + x61)
x63 := (x7 + x62)
x64 := (x63 & 0x7ffffffffffff)
x65 := u8((x63 >> 51))
x66 := (x6 + u64(x65))
x67 := (x5 + x66)
x68 := (x4 + x67)
x69 := (x3 + x68)
x70 := (x2 + x69)
x71 := (x1 + x70)
out1[0] = x39
out1[1] = x47
out1[2] = x56
out1[3] = x64
out1[4] = x71
}
fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
x1 := arg1[0]
x2 := arg1[1]
x3 := arg1[2]
x4 := arg1[3]
x5 := arg1[4]
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}
fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
x2, x1 := bits.mul_u64(0x1db42, arg1[4])
x4, x3 := bits.mul_u64(0x1db42, arg1[3])
x6, x5 := bits.mul_u64(0x1db42, arg1[2])
x8, x7 := bits.mul_u64(0x1db42, arg1[1])
x10, x9 := bits.mul_u64(0x1db42, arg1[0])
x11 := ((x9 >> 51) | ((x10 << 13) & 0xffffffffffffffff))
x12 := (x9 & 0x7ffffffffffff)
x13, x14 := bits.add_u64(x11, x7, u64(0x0))
x15 := (u64(fiat.u1(x14)) + x8)
x16 := ((x13 >> 51) | ((x15 << 13) & 0xffffffffffffffff))
x17 := (x13 & 0x7ffffffffffff)
x18, x19 := bits.add_u64(x16, x5, u64(0x0))
x20 := (u64(fiat.u1(x19)) + x6)
x21 := ((x18 >> 51) | ((x20 << 13) & 0xffffffffffffffff))
x22 := (x18 & 0x7ffffffffffff)
x23, x24 := bits.add_u64(x21, x3, u64(0x0))
x25 := (u64(fiat.u1(x24)) + x4)
x26 := ((x23 >> 51) | ((x25 << 13) & 0xffffffffffffffff))
x27 := (x23 & 0x7ffffffffffff)
x28, x29 := bits.add_u64(x26, x1, u64(0x0))
x30 := (u64(fiat.u1(x29)) + x2)
x31 := ((x28 >> 51) | ((x30 << 13) & 0xffffffffffffffff))
x32 := (x28 & 0x7ffffffffffff)
x33 := (x31 * 0x13)
x34 := (x12 + x33)
x35 := fiat.u1((x34 >> 51))
x36 := (x34 & 0x7ffffffffffff)
x37 := (u64(x35) + x17)
x38 := fiat.u1((x37 >> 51))
x39 := (x37 & 0x7ffffffffffff)
x40 := (u64(x38) + x22)
out1[0] = x36
out1[1] = x39
out1[2] = x40
out1[3] = x27
out1[4] = x32
}
// The following routines were added by hand, and do not come from fiat-crypto.
fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
out1[0] = 0
out1[1] = 0
out1[2] = 0
out1[3] = 0
out1[4] = 0
}
fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
out1[0] = 1
out1[1] = 0
out1[2] = 0
out1[3] = 0
out1[4] = 0
}
fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
x1 := arg1[0]
x2 := arg1[1]
x3 := arg1[2]
x4 := arg1[3]
x5 := arg1[4]
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}
fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
mask := -u64(arg1)
x := (out1[0] ~ out2[0]) & mask
x1, y1 := out1[0] ~ x, out2[0] ~ x
x = (out1[1] ~ out2[1]) & mask
x2, y2 := out1[1] ~ x, out2[1] ~ x
x = (out1[2] ~ out2[2]) & mask
x3, y3 := out1[2] ~ x, out2[2] ~ x
x = (out1[3] ~ out2[3]) & mask
x4, y4 := out1[3] ~ x, out2[3] ~ x
x = (out1[4] ~ out2[4]) & mask
x5, y5 := out1[4] ~ x, out2[4] ~ x
out1[0], out2[0] = x1, y1
out1[1], out2[1] = x2, y2
out1[2], out2[2] = x3, y3
out1[3], out2[3] = x4, y4
out1[4], out2[4] = x5, y5
}

View File

@@ -0,0 +1,66 @@
package field_poly1305
import "core:crypto/util"
import "core:mem"
fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
return transmute(^Loose_Field_Element)(arg1)
}
fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
return transmute(^Tight_Field_Element)(arg1)
}
fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte, sanitize: bool = true) {
// fiat-crypto's deserialization routine effectively processes a
// single byte at a time, and wants 256-bits of input for a value
// that will be 128-bits or 129-bits.
//
// This is somewhat cumbersome to use, so at a minimum a wrapper
// makes implementing the actual MAC block processing considerably
// neater.
assert(len(arg1) == 16)
when ODIN_ARCH == "386" || ODIN_ARCH == "amd64" {
// While it may be unwise to do deserialization here on our
// own when fiat-crypto provides equivalent functionality,
// doing it this way provides a little under 3x performance
// improvement when optimization is enabled.
src_p := transmute(^[2]u64)(&arg1[0])
lo := src_p[0]
hi := src_p[1]
// This is inspired by poly1305-donna, though adjustments were
// made since a Tight_Field_Element's limbs are 44-bits, 43-bits,
// and 43-bits wide.
//
// Note: This could be transplated into fe_from_u64s, but that
// code is called once per MAC, and is non-criticial path.
hibit := u64(arg2) << 41 // arg2 << 128
out1[0] = lo & 0xfffffffffff
out1[1] = ((lo >> 44) | (hi << 20)) & 0x7ffffffffff
out1[2] = ((hi >> 23) & 0x7ffffffffff) | hibit
} else {
tmp: [32]byte
copy_slice(tmp[0:16], arg1[:])
tmp[16] = arg2
_fe_from_bytes(out1, &tmp)
if sanitize {
// This is used to deserialize `s` which is confidential.
mem.zero_explicit(&tmp, size_of(tmp))
}
}
}
fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) {
tmp: [32]byte
util.PUT_U64_LE(tmp[0:8], lo)
util.PUT_U64_LE(tmp[8:16], hi)
_fe_from_bytes(out1, &tmp)
// This routine is only used to deserialize `r` which is confidential.
mem.zero_explicit(&tmp, size_of(tmp))
}

View File

@@ -0,0 +1,356 @@
// The BSD 1-Clause License (BSD-1-Clause)
//
// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package field_poly1305
// This file provides arithmetic on the field Z/(2^130 - 5) using
// unsaturated 64-bit integer arithmetic. It is derived primarily
// from the machine generate Golang output from the fiat-crypto project.
//
// While the base implementation is provably correct, this implementation
// makes no such claims as the port and optimizations were done by hand.
// At some point, it may be worth adding support to fiat-crypto for
// generating Odin output.
import fiat "core:crypto/_fiat"
import "core:math/bits"
Loose_Field_Element :: distinct [3]u64
Tight_Field_Element :: distinct [3]u64
_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
x1 := ((u64(arg1) + arg2) + arg3)
x2 := (x1 & 0xfffffffffff)
x3 := fiat.u1((x1 >> 44))
out1 = x2
out2 = x3
return
}
_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
x2 := fiat.i1((x1 >> 44))
x3 := (u64(x1) & 0xfffffffffff)
out1 = x3
out2 = (0x0 - fiat.u1(x2))
return
}
_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
x1 := ((u64(arg1) + arg2) + arg3)
x2 := (x1 & 0x7ffffffffff)
x3 := fiat.u1((x1 >> 43))
out1 = x2
out2 = x3
return
}
_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
x2 := fiat.i1((x1 >> 43))
x3 := (u64(x1) & 0x7ffffffffff)
out1 = x3
out2 = (0x0 - fiat.u1(x2))
return
}
fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
x8, x7 := bits.mul_u64(arg1[2], arg2[0])
x10, x9 := bits.mul_u64(arg1[1], (arg2[1] * 0x2))
x12, x11 := bits.mul_u64(arg1[1], arg2[0])
x14, x13 := bits.mul_u64(arg1[0], arg2[2])
x16, x15 := bits.mul_u64(arg1[0], arg2[1])
x18, x17 := bits.mul_u64(arg1[0], arg2[0])
x19, x20 := bits.add_u64(x5, x3, u64(0x0))
x21, _ := bits.add_u64(x6, x4, u64(fiat.u1(x20)))
x23, x24 := bits.add_u64(x17, x19, u64(0x0))
x25, _ := bits.add_u64(x18, x21, u64(fiat.u1(x24)))
x27 := ((x23 >> 44) | ((x25 << 20) & 0xffffffffffffffff))
x28 := (x23 & 0xfffffffffff)
x29, x30 := bits.add_u64(x9, x7, u64(0x0))
x31, _ := bits.add_u64(x10, x8, u64(fiat.u1(x30)))
x33, x34 := bits.add_u64(x13, x29, u64(0x0))
x35, _ := bits.add_u64(x14, x31, u64(fiat.u1(x34)))
x37, x38 := bits.add_u64(x11, x1, u64(0x0))
x39, _ := bits.add_u64(x12, x2, u64(fiat.u1(x38)))
x41, x42 := bits.add_u64(x15, x37, u64(0x0))
x43, _ := bits.add_u64(x16, x39, u64(fiat.u1(x42)))
x45, x46 := bits.add_u64(x27, x41, u64(0x0))
x47 := (u64(fiat.u1(x46)) + x43)
x48 := ((x45 >> 43) | ((x47 << 21) & 0xffffffffffffffff))
x49 := (x45 & 0x7ffffffffff)
x50, x51 := bits.add_u64(x48, x33, u64(0x0))
x52 := (u64(fiat.u1(x51)) + x35)
x53 := ((x50 >> 43) | ((x52 << 21) & 0xffffffffffffffff))
x54 := (x50 & 0x7ffffffffff)
x55 := (x53 * 0x5)
x56 := (x28 + x55)
x57 := (x56 >> 44)
x58 := (x56 & 0xfffffffffff)
x59 := (x57 + x49)
x60 := fiat.u1((x59 >> 43))
x61 := (x59 & 0x7ffffffffff)
x62 := (u64(x60) + x54)
out1[0] = x58
out1[1] = x61
out1[2] = x62
}
fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
x1 := (arg1[2] * 0x5)
x2 := (x1 * 0x2)
x3 := (arg1[2] * 0x2)
x4 := (arg1[1] * 0x2)
x6, x5 := bits.mul_u64(arg1[2], x1)
x8, x7 := bits.mul_u64(arg1[1], (x2 * 0x2))
x10, x9 := bits.mul_u64(arg1[1], (arg1[1] * 0x2))
x12, x11 := bits.mul_u64(arg1[0], x3)
x14, x13 := bits.mul_u64(arg1[0], x4)
x16, x15 := bits.mul_u64(arg1[0], arg1[0])
x17, x18 := bits.add_u64(x15, x7, u64(0x0))
x19, _ := bits.add_u64(x16, x8, u64(fiat.u1(x18)))
x21 := ((x17 >> 44) | ((x19 << 20) & 0xffffffffffffffff))
x22 := (x17 & 0xfffffffffff)
x23, x24 := bits.add_u64(x11, x9, u64(0x0))
x25, _ := bits.add_u64(x12, x10, u64(fiat.u1(x24)))
x27, x28 := bits.add_u64(x13, x5, u64(0x0))
x29, _ := bits.add_u64(x14, x6, u64(fiat.u1(x28)))
x31, x32 := bits.add_u64(x21, x27, u64(0x0))
x33 := (u64(fiat.u1(x32)) + x29)
x34 := ((x31 >> 43) | ((x33 << 21) & 0xffffffffffffffff))
x35 := (x31 & 0x7ffffffffff)
x36, x37 := bits.add_u64(x34, x23, u64(0x0))
x38 := (u64(fiat.u1(x37)) + x25)
x39 := ((x36 >> 43) | ((x38 << 21) & 0xffffffffffffffff))
x40 := (x36 & 0x7ffffffffff)
x41 := (x39 * 0x5)
x42 := (x22 + x41)
x43 := (x42 >> 44)
x44 := (x42 & 0xfffffffffff)
x45 := (x43 + x35)
x46 := fiat.u1((x45 >> 43))
x47 := (x45 & 0x7ffffffffff)
x48 := (u64(x46) + x40)
out1[0] = x44
out1[1] = x47
out1[2] = x48
}
fe_carry :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
x1 := arg1[0]
x2 := ((x1 >> 44) + arg1[1])
x3 := ((x2 >> 43) + arg1[2])
x4 := ((x1 & 0xfffffffffff) + ((x3 >> 43) * 0x5))
x5 := (u64(fiat.u1((x4 >> 44))) + (x2 & 0x7ffffffffff))
x6 := (x4 & 0xfffffffffff)
x7 := (x5 & 0x7ffffffffff)
x8 := (u64(fiat.u1((x5 >> 43))) + (x3 & 0x7ffffffffff))
out1[0] = x6
out1[1] = x7
out1[2] = x8
}
fe_add :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
x1 := (arg1[0] + arg2[0])
x2 := (arg1[1] + arg2[1])
x3 := (arg1[2] + arg2[2])
out1[0] = x1
out1[1] = x2
out1[2] = x3
}
fe_sub :: proc "contextless" (out1: ^Loose_Field_Element, arg1, arg2: ^Tight_Field_Element) {
x1 := ((0x1ffffffffff6 + arg1[0]) - arg2[0])
x2 := ((0xffffffffffe + arg1[1]) - arg2[1])
x3 := ((0xffffffffffe + arg1[2]) - arg2[2])
out1[0] = x1
out1[1] = x2
out1[2] = x3
}
fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
x1 := (0x1ffffffffff6 - arg1[0])
x2 := (0xffffffffffe - arg1[1])
x3 := (0xffffffffffe - arg1[2])
out1[0] = x1
out1[1] = x2
out1[2] = x3
}
fe_cond_assign :: proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) {
x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
out1[0] = x1
out1[1] = x2
out1[2] = x3
}
fe_to_bytes :: proc "contextless" (out1: ^[32]byte, arg1: ^Tight_Field_Element) {
x1, x2 := _subborrowx_u44(0x0, arg1[0], 0xffffffffffb)
x3, x4 := _subborrowx_u43(x2, arg1[1], 0x7ffffffffff)
x5, x6 := _subborrowx_u43(x4, arg1[2], 0x7ffffffffff)
x7 := fiat.cmovznz_u64(x6, u64(0x0), 0xffffffffffffffff)
x8, x9 := _addcarryx_u44(0x0, x1, (x7 & 0xffffffffffb))
x10, x11 := _addcarryx_u43(x9, x3, (x7 & 0x7ffffffffff))
x12, _ := _addcarryx_u43(x11, x5, (x7 & 0x7ffffffffff))
x14 := (x12 << 7)
x15 := (x10 << 4)
x16 := (u8(x8) & 0xff)
x17 := (x8 >> 8)
x18 := (u8(x17) & 0xff)
x19 := (x17 >> 8)
x20 := (u8(x19) & 0xff)
x21 := (x19 >> 8)
x22 := (u8(x21) & 0xff)
x23 := (x21 >> 8)
x24 := (u8(x23) & 0xff)
x25 := u8((x23 >> 8))
x26 := (x15 + u64(x25))
x27 := (u8(x26) & 0xff)
x28 := (x26 >> 8)
x29 := (u8(x28) & 0xff)
x30 := (x28 >> 8)
x31 := (u8(x30) & 0xff)
x32 := (x30 >> 8)
x33 := (u8(x32) & 0xff)
x34 := (x32 >> 8)
x35 := (u8(x34) & 0xff)
x36 := u8((x34 >> 8))
x37 := (x14 + u64(x36))
x38 := (u8(x37) & 0xff)
x39 := (x37 >> 8)
x40 := (u8(x39) & 0xff)
x41 := (x39 >> 8)
x42 := (u8(x41) & 0xff)
x43 := (x41 >> 8)
x44 := (u8(x43) & 0xff)
x45 := (x43 >> 8)
x46 := (u8(x45) & 0xff)
x47 := (x45 >> 8)
x48 := (u8(x47) & 0xff)
x49 := u8((x47 >> 8))
out1[0] = x16
out1[1] = x18
out1[2] = x20
out1[3] = x22
out1[4] = x24
out1[5] = x27
out1[6] = x29
out1[7] = x31
out1[8] = x33
out1[9] = x35
out1[10] = x38
out1[11] = x40
out1[12] = x42
out1[13] = x44
out1[14] = x46
out1[15] = x48
out1[16] = x49
}
_fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
x1 := (u64(arg1[16]) << 41)
x2 := (u64(arg1[15]) << 33)
x3 := (u64(arg1[14]) << 25)
x4 := (u64(arg1[13]) << 17)
x5 := (u64(arg1[12]) << 9)
x6 := (u64(arg1[11]) * u64(0x2))
x7 := (u64(arg1[10]) << 36)
x8 := (u64(arg1[9]) << 28)
x9 := (u64(arg1[8]) << 20)
x10 := (u64(arg1[7]) << 12)
x11 := (u64(arg1[6]) << 4)
x12 := (u64(arg1[5]) << 40)
x13 := (u64(arg1[4]) << 32)
x14 := (u64(arg1[3]) << 24)
x15 := (u64(arg1[2]) << 16)
x16 := (u64(arg1[1]) << 8)
x17 := arg1[0]
x18 := (x16 + u64(x17))
x19 := (x15 + x18)
x20 := (x14 + x19)
x21 := (x13 + x20)
x22 := (x12 + x21)
x23 := (x22 & 0xfffffffffff)
x24 := u8((x22 >> 44))
x25 := (x11 + u64(x24))
x26 := (x10 + x25)
x27 := (x9 + x26)
x28 := (x8 + x27)
x29 := (x7 + x28)
x30 := (x29 & 0x7ffffffffff)
x31 := fiat.u1((x29 >> 43))
x32 := (x6 + u64(x31))
x33 := (x5 + x32)
x34 := (x4 + x33)
x35 := (x3 + x34)
x36 := (x2 + x35)
x37 := (x1 + x36)
out1[0] = x23
out1[1] = x30
out1[2] = x37
}
fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Element) {
x1 := arg1[0]
x2 := arg1[1]
x3 := arg1[2]
out1[0] = x1
out1[1] = x2
out1[2] = x3
}
// The following routines were added by hand, and do not come from fiat-crypto.
fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
out1[0] = 0
out1[1] = 0
out1[2] = 0
}
fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
x1 := arg1[0]
x2 := arg1[1]
x3 := arg1[2]
out1[0] = x1
out1[1] = x2
out1[2] = x3
}
fe_cond_swap :: proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
mask := -u64(arg1)
x := (out1[0] ~ out2[0]) & mask
x1, y1 := out1[0] ~ x, out2[0] ~ x
x = (out1[1] ~ out2[1]) & mask
x2, y2 := out1[1] ~ x, out2[1] ~ x
x = (out1[2] ~ out2[2]) & mask
x3, y3 := out1[2] ~ x, out2[2] ~ x
out1[0], out2[0] = x1, y1
out1[1], out2[1] = x2, y2
out1[2], out2[2] = x3, y3
}

View File

@@ -0,0 +1,581 @@
package chacha20
import "core:crypto/util"
import "core:math/bits"
import "core:mem"
KEY_SIZE :: 32
NONCE_SIZE :: 12
XNONCE_SIZE :: 24
_MAX_CTR_IETF :: 0xffffffff
_BLOCK_SIZE :: 64
_STATE_SIZE_U32 :: 16
_ROUNDS :: 20
_SIGMA_0 : u32 : 0x61707865
_SIGMA_1 : u32 : 0x3320646e
_SIGMA_2 : u32 : 0x79622d32
_SIGMA_3 : u32 : 0x6b206574
Context :: struct {
_s: [_STATE_SIZE_U32]u32,
_buffer: [_BLOCK_SIZE]byte,
_off: int,
_is_ietf_flavor: bool,
_is_initialized: bool,
}
init :: proc (ctx: ^Context, key, nonce: []byte) {
if len(key) != KEY_SIZE {
panic("crypto/chacha20: invalid ChaCha20 key size")
}
if n_len := len(nonce); n_len != NONCE_SIZE && n_len != XNONCE_SIZE {
panic("crypto/chacha20: invalid (X)ChaCha20 nonce size")
}
k, n := key, nonce
// Derive the XChaCha20 subkey and sub-nonce via HChaCha20.
is_xchacha := len(nonce) == XNONCE_SIZE
if is_xchacha {
sub_key := ctx._buffer[:KEY_SIZE]
_hchacha20(sub_key, k, n)
k = sub_key
n = n[16:24]
}
ctx._s[0] = _SIGMA_0
ctx._s[1] = _SIGMA_1
ctx._s[2] = _SIGMA_2
ctx._s[3] = _SIGMA_3
ctx._s[4] = util.U32_LE(k[0:4])
ctx._s[5] = util.U32_LE(k[4:8])
ctx._s[6] = util.U32_LE(k[8:12])
ctx._s[7] = util.U32_LE(k[12:16])
ctx._s[8] = util.U32_LE(k[16:20])
ctx._s[9] = util.U32_LE(k[20:24])
ctx._s[10] = util.U32_LE(k[24:28])
ctx._s[11] = util.U32_LE(k[28:32])
ctx._s[12] = 0
if !is_xchacha {
ctx._s[13] = util.U32_LE(n[0:4])
ctx._s[14] = util.U32_LE(n[4:8])
ctx._s[15] = util.U32_LE(n[8:12])
} else {
ctx._s[13] = 0
ctx._s[14] = util.U32_LE(n[0:4])
ctx._s[15] = util.U32_LE(n[4:8])
// The sub-key is stored in the keystream buffer. While
// this will be overwritten in most circumstances, explicitly
// clear it out early.
mem.zero_explicit(&ctx._buffer, KEY_SIZE)
}
ctx._off = _BLOCK_SIZE
ctx._is_ietf_flavor = !is_xchacha
ctx._is_initialized = true
}
seek :: proc (ctx: ^Context, block_nr: u64) {
assert(ctx._is_initialized)
if ctx._is_ietf_flavor {
if block_nr > _MAX_CTR_IETF {
panic("crypto/chacha20: attempted to seek past maximum counter")
}
} else {
ctx._s[13] = u32(block_nr >> 32)
}
ctx._s[12] = u32(block_nr)
ctx._off = _BLOCK_SIZE
}
xor_bytes :: proc (ctx: ^Context, dst, src: []byte) {
assert(ctx._is_initialized)
// TODO: Enforcing that dst and src alias exactly or not at all
// is a good idea, though odd aliasing should be extremely uncommon.
src, dst := src, dst
if dst_len := len(dst); dst_len < len(src) {
src = src[:dst_len]
}
for remaining := len(src); remaining > 0; {
// Process multiple blocks at once
if ctx._off == _BLOCK_SIZE {
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * _BLOCK_SIZE
_do_blocks(ctx, dst, src, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
}
dst = dst[direct_bytes:]
src = src[direct_bytes:]
}
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
_do_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
}
// Process partial blocks from the buffered keystream.
to_xor := min(_BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
for i := 0; i < to_xor; i = i + 1 {
dst[i] = buffered_keystream[i] ~ src[i]
}
ctx._off += to_xor
dst = dst[to_xor:]
src = src[to_xor:]
remaining -= to_xor
}
}
keystream_bytes :: proc (ctx: ^Context, dst: []byte) {
assert(ctx._is_initialized)
dst := dst
for remaining := len(dst); remaining > 0; {
// Process multiple blocks at once
if ctx._off == _BLOCK_SIZE {
if nr_blocks := remaining / _BLOCK_SIZE; nr_blocks > 0 {
direct_bytes := nr_blocks * _BLOCK_SIZE
_do_blocks(ctx, dst, nil, nr_blocks)
remaining -= direct_bytes
if remaining == 0 {
return
}
dst = dst[direct_bytes:]
}
// If there is a partial block, generate and buffer 1 block
// worth of keystream.
_do_blocks(ctx, ctx._buffer[:], nil, 1)
ctx._off = 0
}
// Process partial blocks from the buffered keystream.
to_copy := min(_BLOCK_SIZE - ctx._off, remaining)
buffered_keystream := ctx._buffer[ctx._off:]
copy(dst[:to_copy], buffered_keystream[:to_copy])
ctx._off += to_copy
dst = dst[to_copy:]
remaining -= to_copy
}
}
reset :: proc (ctx: ^Context) {
mem.zero_explicit(&ctx._s, size_of(ctx._s))
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
ctx._is_initialized = false
}
_do_blocks :: proc (ctx: ^Context, dst, src: []byte, nr_blocks: int) {
// Enforce the maximum consumed keystream per nonce.
//
// While all modern "standard" definitions of ChaCha20 use
// the IETF 32-bit counter, for XChaCha20 most common
// implementations allow for a 64-bit counter.
//
// Honestly, the answer here is "use a MRAE primitive", but
// go with common practice in the case of XChaCha20.
if ctx._is_ietf_flavor {
if u64(ctx._s[12]) + u64(nr_blocks) > 0xffffffff {
panic("crypto/chacha20: maximum ChaCha20 keystream per nonce reached")
}
} else {
ctr := (u64(ctx._s[13]) << 32) | u64(ctx._s[12])
if _, carry := bits.add_u64(ctr, u64(nr_blocks), 0); carry != 0 {
panic("crypto/chacha20: maximum XChaCha20 keystream per nonce reached")
}
}
dst, src := dst, src
x := &ctx._s
for n := 0; n < nr_blocks; n = n + 1 {
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 := x[4], x[5], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15]
for i := _ROUNDS; i > 0; i = i - 2 {
// Even when forcing inlining manually inlining all of
// these is decently faster.
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ~= x0
x12 = util.ROTL32(x12, 16)
x8 += x12
x4 ~= x8
x4 = util.ROTL32(x4, 12)
x0 += x4
x12 ~= x0
x12 = util.ROTL32(x12, 8)
x8 += x12
x4 ~= x8
x4 = util.ROTL32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ~= x1
x13 = util.ROTL32(x13, 16)
x9 += x13
x5 ~= x9
x5 = util.ROTL32(x5, 12)
x1 += x5
x13 ~= x1
x13 = util.ROTL32(x13, 8)
x9 += x13
x5 ~= x9
x5 = util.ROTL32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ~= x2
x14 = util.ROTL32(x14, 16)
x10 += x14
x6 ~= x10
x6 = util.ROTL32(x6, 12)
x2 += x6
x14 ~= x2
x14 = util.ROTL32(x14, 8)
x10 += x14
x6 ~= x10
x6 = util.ROTL32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ~= x3
x15 = util.ROTL32(x15, 16)
x11 += x15
x7 ~= x11
x7 = util.ROTL32(x7, 12)
x3 += x7
x15 ~= x3
x15 = util.ROTL32(x15, 8)
x11 += x15
x7 ~= x11
x7 = util.ROTL32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ~= x0
x15 = util.ROTL32(x15, 16)
x10 += x15
x5 ~= x10
x5 = util.ROTL32(x5, 12)
x0 += x5
x15 ~= x0
x15 = util.ROTL32(x15, 8)
x10 += x15
x5 ~= x10
x5 = util.ROTL32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ~= x1
x12 = util.ROTL32(x12, 16)
x11 += x12
x6 ~= x11
x6 = util.ROTL32(x6, 12)
x1 += x6
x12 ~= x1
x12 = util.ROTL32(x12, 8)
x11 += x12
x6 ~= x11
x6 = util.ROTL32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ~= x2
x13 = util.ROTL32(x13, 16)
x8 += x13
x7 ~= x8
x7 = util.ROTL32(x7, 12)
x2 += x7
x13 ~= x2
x13 = util.ROTL32(x13, 8)
x8 += x13
x7 ~= x8
x7 = util.ROTL32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ~= x3
x14 = util.ROTL32(x14, 16)
x9 += x14
x4 ~= x9
x4 = util.ROTL32(x4, 12)
x3 += x4
x14 ~= x3
x14 = util.ROTL32(x14, 8)
x9 += x14
x4 ~= x9
x4 = util.ROTL32(x4, 7)
}
x0 += _SIGMA_0
x1 += _SIGMA_1
x2 += _SIGMA_2
x3 += _SIGMA_3
x4 += x[4]
x5 += x[5]
x6 += x[6]
x7 += x[7]
x8 += x[8]
x9 += x[9]
x10 += x[10]
x11 += x[11]
x12 += x[12]
x13 += x[13]
x14 += x[14]
x15 += x[15]
// While the "correct" answer to getting more performance out of
// this is "use vector operations", support for that is currently
// a work in progress/to be designed.
//
// Until dedicated assembly can be written leverage the fact that
// the callers of this routine ensure that src/dst are valid.
when ODIN_ARCH == "386" || ODIN_ARCH == "amd64" {
// util.PUT_U32_LE/util.U32_LE are not required on little-endian
// systems that also happen to not be strict about aligned
// memory access.
dst_p := transmute(^[16]u32)(&dst[0])
if src != nil {
src_p := transmute(^[16]u32)(&src[0])
dst_p[0] = src_p[0] ~ x0
dst_p[1] = src_p[1] ~ x1
dst_p[2] = src_p[2] ~ x2
dst_p[3] = src_p[3] ~ x3
dst_p[4] = src_p[4] ~ x4
dst_p[5] = src_p[5] ~ x5
dst_p[6] = src_p[6] ~ x6
dst_p[7] = src_p[7] ~ x7
dst_p[8] = src_p[8] ~ x8
dst_p[9] = src_p[9] ~ x9
dst_p[10] = src_p[10] ~ x10
dst_p[11] = src_p[11] ~ x11
dst_p[12] = src_p[12] ~ x12
dst_p[13] = src_p[13] ~ x13
dst_p[14] = src_p[14] ~ x14
dst_p[15] = src_p[15] ~ x15
src = src[_BLOCK_SIZE:]
} else {
dst_p[0] = x0
dst_p[1] = x1
dst_p[2] = x2
dst_p[3] = x3
dst_p[4] = x4
dst_p[5] = x5
dst_p[6] = x6
dst_p[7] = x7
dst_p[8] = x8
dst_p[9] = x9
dst_p[10] = x10
dst_p[11] = x11
dst_p[12] = x12
dst_p[13] = x13
dst_p[14] = x14
dst_p[15] = x15
}
dst = dst[_BLOCK_SIZE:]
} else {
#no_bounds_check {
if src != nil {
util.PUT_U32_LE(dst[0:4], util.U32_LE(src[0:4]) ~ x0)
util.PUT_U32_LE(dst[4:8], util.U32_LE(src[4:8]) ~ x1)
util.PUT_U32_LE(dst[8:12], util.U32_LE(src[8:12]) ~ x2)
util.PUT_U32_LE(dst[12:16], util.U32_LE(src[12:16]) ~ x3)
util.PUT_U32_LE(dst[16:20], util.U32_LE(src[16:20]) ~ x4)
util.PUT_U32_LE(dst[20:24], util.U32_LE(src[20:24]) ~ x5)
util.PUT_U32_LE(dst[24:28], util.U32_LE(src[24:28]) ~ x6)
util.PUT_U32_LE(dst[28:32], util.U32_LE(src[28:32]) ~ x7)
util.PUT_U32_LE(dst[32:36], util.U32_LE(src[32:36]) ~ x8)
util.PUT_U32_LE(dst[36:40], util.U32_LE(src[36:40]) ~ x9)
util.PUT_U32_LE(dst[40:44], util.U32_LE(src[40:44]) ~ x10)
util.PUT_U32_LE(dst[44:48], util.U32_LE(src[44:48]) ~ x11)
util.PUT_U32_LE(dst[48:52], util.U32_LE(src[48:52]) ~ x12)
util.PUT_U32_LE(dst[52:56], util.U32_LE(src[52:56]) ~ x13)
util.PUT_U32_LE(dst[56:60], util.U32_LE(src[56:60]) ~ x14)
util.PUT_U32_LE(dst[60:64], util.U32_LE(src[60:64]) ~ x15)
src = src[_BLOCK_SIZE:]
} else {
util.PUT_U32_LE(dst[0:4], x0)
util.PUT_U32_LE(dst[4:8], x1)
util.PUT_U32_LE(dst[8:12], x2)
util.PUT_U32_LE(dst[12:16], x3)
util.PUT_U32_LE(dst[16:20], x4)
util.PUT_U32_LE(dst[20:24], x5)
util.PUT_U32_LE(dst[24:28], x6)
util.PUT_U32_LE(dst[28:32], x7)
util.PUT_U32_LE(dst[32:36], x8)
util.PUT_U32_LE(dst[36:40], x9)
util.PUT_U32_LE(dst[40:44], x10)
util.PUT_U32_LE(dst[44:48], x11)
util.PUT_U32_LE(dst[48:52], x12)
util.PUT_U32_LE(dst[52:56], x13)
util.PUT_U32_LE(dst[56:60], x14)
util.PUT_U32_LE(dst[60:64], x15)
}
dst = dst[_BLOCK_SIZE:]
}
}
// Increment the counter. Overflow checking is done upon
// entry into the routine, so a 64-bit increment safely
// covers both cases.
new_ctr := ((u64(ctx._s[13]) << 32) | u64(ctx._s[12])) + 1
x[12] = u32(new_ctr)
x[13] = u32(new_ctr >> 32)
}
}
_hchacha20 :: proc (dst, key, nonce: []byte) {
x0, x1, x2, x3 := _SIGMA_0, _SIGMA_1, _SIGMA_2, _SIGMA_3
x4 := util.U32_LE(key[0:4])
x5 := util.U32_LE(key[4:8])
x6 := util.U32_LE(key[8:12])
x7 := util.U32_LE(key[12:16])
x8 := util.U32_LE(key[16:20])
x9 := util.U32_LE(key[20:24])
x10 := util.U32_LE(key[24:28])
x11 := util.U32_LE(key[28:32])
x12 := util.U32_LE(nonce[0:4])
x13 := util.U32_LE(nonce[4:8])
x14 := util.U32_LE(nonce[8:12])
x15 := util.U32_LE(nonce[12:16])
for i := _ROUNDS; i > 0; i = i - 2 {
// quarterround(x, 0, 4, 8, 12)
x0 += x4
x12 ~= x0
x12 = util.ROTL32(x12, 16)
x8 += x12
x4 ~= x8
x4 = util.ROTL32(x4, 12)
x0 += x4
x12 ~= x0
x12 = util.ROTL32(x12, 8)
x8 += x12
x4 ~= x8
x4 = util.ROTL32(x4, 7)
// quarterround(x, 1, 5, 9, 13)
x1 += x5
x13 ~= x1
x13 = util.ROTL32(x13, 16)
x9 += x13
x5 ~= x9
x5 = util.ROTL32(x5, 12)
x1 += x5
x13 ~= x1
x13 = util.ROTL32(x13, 8)
x9 += x13
x5 ~= x9
x5 = util.ROTL32(x5, 7)
// quarterround(x, 2, 6, 10, 14)
x2 += x6
x14 ~= x2
x14 = util.ROTL32(x14, 16)
x10 += x14
x6 ~= x10
x6 = util.ROTL32(x6, 12)
x2 += x6
x14 ~= x2
x14 = util.ROTL32(x14, 8)
x10 += x14
x6 ~= x10
x6 = util.ROTL32(x6, 7)
// quarterround(x, 3, 7, 11, 15)
x3 += x7
x15 ~= x3
x15 = util.ROTL32(x15, 16)
x11 += x15
x7 ~= x11
x7 = util.ROTL32(x7, 12)
x3 += x7
x15 ~= x3
x15 = util.ROTL32(x15, 8)
x11 += x15
x7 ~= x11
x7 = util.ROTL32(x7, 7)
// quarterround(x, 0, 5, 10, 15)
x0 += x5
x15 ~= x0
x15 = util.ROTL32(x15, 16)
x10 += x15
x5 ~= x10
x5 = util.ROTL32(x5, 12)
x0 += x5
x15 ~= x0
x15 = util.ROTL32(x15, 8)
x10 += x15
x5 ~= x10
x5 = util.ROTL32(x5, 7)
// quarterround(x, 1, 6, 11, 12)
x1 += x6
x12 ~= x1
x12 = util.ROTL32(x12, 16)
x11 += x12
x6 ~= x11
x6 = util.ROTL32(x6, 12)
x1 += x6
x12 ~= x1
x12 = util.ROTL32(x12, 8)
x11 += x12
x6 ~= x11
x6 = util.ROTL32(x6, 7)
// quarterround(x, 2, 7, 8, 13)
x2 += x7
x13 ~= x2
x13 = util.ROTL32(x13, 16)
x8 += x13
x7 ~= x8
x7 = util.ROTL32(x7, 12)
x2 += x7
x13 ~= x2
x13 = util.ROTL32(x13, 8)
x8 += x13
x7 ~= x8
x7 = util.ROTL32(x7, 7)
// quarterround(x, 3, 4, 9, 14)
x3 += x4
x14 ~= x3
x14 = util.ROTL32(x14, 16)
x9 += x14
x4 ~= x9
x4 = util.ROTL32(x4, 12)
x3 += x4
x14 ~= x3
x14 = util.ROTL32(x14, 8)
x9 += x14
x4 ~= x9
x4 = util.ROTL32(x4, 7)
}
util.PUT_U32_LE(dst[0:4], x0)
util.PUT_U32_LE(dst[4:8], x1)
util.PUT_U32_LE(dst[8:12], x2)
util.PUT_U32_LE(dst[12:16], x3)
util.PUT_U32_LE(dst[16:20], x12)
util.PUT_U32_LE(dst[20:24], x13)
util.PUT_U32_LE(dst[24:28], x14)
util.PUT_U32_LE(dst[28:32], x15)
}

View File

@@ -0,0 +1,146 @@
package chacha20poly1305
import "core:crypto"
import "core:crypto/chacha20"
import "core:crypto/poly1305"
import "core:crypto/util"
import "core:mem"
KEY_SIZE :: chacha20.KEY_SIZE
NONCE_SIZE :: chacha20.NONCE_SIZE
TAG_SIZE :: poly1305.TAG_SIZE
_P_MAX :: 64 * 0xffffffff // 64 * (2^32-1)
_validate_common_slice_sizes :: proc (tag, key, nonce, aad, text: []byte) {
if len(tag) != TAG_SIZE {
panic("crypto/chacha20poly1305: invalid destination tag size")
}
if len(key) != KEY_SIZE {
panic("crypto/chacha20poly1305: invalid key size")
}
if len(nonce) != NONCE_SIZE {
panic("crypto/chacha20poly1305: invalid nonce size")
}
#assert(size_of(int) == 8 || size_of(int) <= 4)
when size_of(int) == 8 {
// A_MAX = 2^64 - 1 due to the length field limit.
// P_MAX = 64 * (2^32 - 1) due to the IETF ChaCha20 counter limit.
//
// A_MAX is limited by size_of(int), so there is no need to
// enforce it. P_MAX only needs to be checked on 64-bit targets,
// for reasons that should be obvious.
if text_len := len(text); text_len > _P_MAX {
panic("crypto/chacha20poly1305: oversized src data")
}
}
}
_PAD: [16]byte
_update_mac_pad16 :: #force_inline proc (ctx: ^poly1305.Context, x_len: int) {
if pad_len := 16 - (x_len & (16-1)); pad_len != 16 {
poly1305.update(ctx, _PAD[:pad_len])
}
}
encrypt :: proc (ciphertext, tag, key, nonce, aad, plaintext: []byte) {
_validate_common_slice_sizes(tag, key, nonce, aad, plaintext)
if len(ciphertext) != len(plaintext) {
panic("crypto/chacha20poly1305: invalid destination ciphertext size")
}
stream_ctx: chacha20.Context = ---
chacha20.init(&stream_ctx, key, nonce)
// otk = poly1305_key_gen(key, nonce)
otk: [poly1305.KEY_SIZE]byte = ---
chacha20.keystream_bytes(&stream_ctx, otk[:])
mac_ctx: poly1305.Context = ---
poly1305.init(&mac_ctx, otk[:])
mem.zero_explicit(&otk, size_of(otk))
aad_len, ciphertext_len := len(aad), len(ciphertext)
// There is nothing preventing aad and ciphertext from overlapping
// so auth the AAD before encrypting (slightly different from the
// RFC, since the RFC encrypts into a new buffer).
//
// mac_data = aad | pad16(aad)
poly1305.update(&mac_ctx, aad)
_update_mac_pad16(&mac_ctx, aad_len)
// ciphertext = chacha20_encrypt(key, 1, nonce, plaintext)
chacha20.seek(&stream_ctx, 1)
chacha20.xor_bytes(&stream_ctx, ciphertext, plaintext)
chacha20.reset(&stream_ctx) // Don't need the stream context anymore.
// mac_data |= ciphertext | pad16(ciphertext)
poly1305.update(&mac_ctx, ciphertext)
_update_mac_pad16(&mac_ctx, ciphertext_len)
// mac_data |= num_to_8_le_bytes(aad.length)
// mac_data |= num_to_8_le_bytes(ciphertext.length)
l_buf := otk[0:16] // Reuse the scratch buffer.
util.PUT_U64_LE(l_buf[0:8], u64(aad_len))
util.PUT_U64_LE(l_buf[8:16], u64(ciphertext_len))
poly1305.update(&mac_ctx, l_buf)
// tag = poly1305_mac(mac_data, otk)
poly1305.final(&mac_ctx, tag) // Implicitly sanitizes context.
}
decrypt :: proc (plaintext, tag, key, nonce, aad, ciphertext: []byte) -> bool {
_validate_common_slice_sizes(tag, key, nonce, aad, ciphertext)
if len(ciphertext) != len(plaintext) {
panic("crypto/chacha20poly1305: invalid destination plaintext size")
}
// Note: Unlike encrypt, this can fail early, so use defer for
// sanitization rather than assuming control flow reaches certain
// points where needed.
stream_ctx: chacha20.Context = ---
chacha20.init(&stream_ctx, key, nonce)
// otk = poly1305_key_gen(key, nonce)
otk: [poly1305.KEY_SIZE]byte = ---
chacha20.keystream_bytes(&stream_ctx, otk[:])
defer chacha20.reset(&stream_ctx)
mac_ctx: poly1305.Context = ---
poly1305.init(&mac_ctx, otk[:])
defer mem.zero_explicit(&otk, size_of(otk))
aad_len, ciphertext_len := len(aad), len(ciphertext)
// mac_data = aad | pad16(aad)
// mac_data |= ciphertext | pad16(ciphertext)
// mac_data |= num_to_8_le_bytes(aad.length)
// mac_data |= num_to_8_le_bytes(ciphertext.length)
poly1305.update(&mac_ctx, aad)
_update_mac_pad16(&mac_ctx, aad_len)
poly1305.update(&mac_ctx, ciphertext)
_update_mac_pad16(&mac_ctx, ciphertext_len)
l_buf := otk[0:16] // Reuse the scratch buffer.
util.PUT_U64_LE(l_buf[0:8], u64(aad_len))
util.PUT_U64_LE(l_buf[8:16], u64(ciphertext_len))
poly1305.update(&mac_ctx, l_buf)
// tag = poly1305_mac(mac_data, otk)
derived_tag := otk[0:poly1305.TAG_SIZE] // Reuse the scratch buffer again.
poly1305.final(&mac_ctx, derived_tag) // Implicitly sanitizes context.
// Validate the tag in constant time.
if crypto.compare_constant_time(tag, derived_tag) != 1 {
// Zero out the plaintext, as a defense in depth measure.
mem.zero_explicit(raw_data(plaintext), ciphertext_len)
return false
}
// plaintext = chacha20_decrypt(key, 1, nonce, ciphertext)
chacha20.seek(&stream_ctx, 1)
chacha20.xor_bytes(&stream_ctx, plaintext, ciphertext)
return true
}

52
core/crypto/crypto.odin Normal file
View File

@@ -0,0 +1,52 @@
package crypto
import "core:mem"
// compare_constant_time returns 1 iff a and b are equal, 0 otherwise.
//
// The execution time of this routine is constant regardless of the contents
// of the slices being compared, as long as the length of the slices is equal.
// If the length of the two slices is different, it will early-return 0.
compare_constant_time :: proc "contextless" (a, b: []byte) -> int {
// If the length of the slices is different, early return.
//
// This leaks the fact that the slices have a different length,
// but the routine is primarily intended for comparing things
// like MACS and password digests.
n := len(a)
if n != len(b) {
return 0
}
return compare_byte_ptrs_constant_time(raw_data(a), raw_data(b), n)
}
// compare_byte_ptrs_constant_time returns 1 iff the bytes pointed to by
// a and b are equal, 0 otherwise.
//
// The execution time of this routine is constant regardless of the
// contents of the memory being compared.
compare_byte_ptrs_constant_time :: proc "contextless" (a, b: ^byte, n: int) -> int {
x := mem.slice_ptr(a, n)
y := mem.slice_ptr(b, n)
v: byte
for i in 0..<n {
v |= x[i] ~ y[i]
}
// After the loop, v == 0 iff a == b. The subtraction will underflow
// iff v == 0, setting the sign-bit, which gets returned.
return int((u32(v)-1) >> 31)
}
// rand_bytes fills the dst buffer with cryptographic entropy taken from
// the system entropy source. This routine will block if the system entropy
// source is not ready yet. All system entropy source failures are treated
// as catastrophic, resulting in a panic.
rand_bytes :: proc (dst: []byte) {
// zero-fill the buffer first
mem.zero_explicit(raw_data(dst), len(dst))
_rand_bytes(dst)
}

View File

@@ -0,0 +1,163 @@
package poly1305
import "core:crypto"
import "core:crypto/util"
import field "core:crypto/_fiat/field_poly1305"
import "core:mem"
KEY_SIZE :: 32
TAG_SIZE :: 16
_BLOCK_SIZE :: 16
sum :: proc (dst, msg, key: []byte) {
ctx: Context = ---
init(&ctx, key)
update(&ctx, msg)
final(&ctx, dst)
}
verify :: proc (tag, msg, key: []byte) -> bool {
ctx: Context = ---
derived_tag: [16]byte = ---
if len(tag) != TAG_SIZE {
panic("crypto/poly1305: invalid tag size")
}
init(&ctx, key)
update(&ctx, msg)
final(&ctx, derived_tag[:])
return crypto.compare_constant_time(derived_tag[:], tag) == 1
}
Context :: struct {
_r: field.Tight_Field_Element,
_a: field.Tight_Field_Element,
_s: field.Tight_Field_Element,
_buffer: [_BLOCK_SIZE]byte,
_leftover: int,
_is_initialized: bool,
}
init :: proc (ctx: ^Context, key: []byte) {
if len(key) != KEY_SIZE {
panic("crypto/poly1305: invalid key size")
}
// r = le_bytes_to_num(key[0..15])
// r = clamp(r) (r &= 0xffffffc0ffffffc0ffffffc0fffffff)
tmp_lo := util.U64_LE(key[0:8]) & 0x0ffffffc0fffffff
tmp_hi := util.U64_LE(key[8:16]) & 0xffffffc0ffffffc
field.fe_from_u64s(&ctx._r, tmp_lo, tmp_hi)
// s = le_bytes_to_num(key[16..31])
field.fe_from_bytes(&ctx._s, key[16:32], 0)
// a = 0
field.fe_zero(&ctx._a)
// No leftover in buffer
ctx._leftover = 0
ctx._is_initialized = true
}
update :: proc (ctx: ^Context, data: []byte) {
assert(ctx._is_initialized)
msg := data
msg_len := len(data)
// Handle leftover
if ctx._leftover > 0 {
want := min(_BLOCK_SIZE - ctx._leftover, msg_len)
copy_slice(ctx._buffer[ctx._leftover:], msg[:want])
msg_len = msg_len - want
msg = msg[want:]
ctx._leftover = ctx._leftover + want
if ctx._leftover < _BLOCK_SIZE {
return
}
_blocks(ctx, ctx._buffer[:])
ctx._leftover = 0
}
// Process full blocks
if msg_len >= _BLOCK_SIZE {
want := msg_len & (~int(_BLOCK_SIZE - 1))
_blocks(ctx, msg[:want])
msg = msg[want:]
msg_len = msg_len - want
}
// Store leftover
if msg_len > 0 {
// TODO: While -donna does it this way, I'm fairly sure that
// `ctx._leftover == 0` is an invariant at this point.
copy(ctx._buffer[ctx._leftover:], msg)
ctx._leftover = ctx._leftover + msg_len
}
}
final :: proc (ctx: ^Context, dst: []byte) {
assert(ctx._is_initialized)
if len(dst) != TAG_SIZE {
panic("poly1305: invalid destination tag size")
}
// Process remaining block
if ctx._leftover > 0 {
ctx._buffer[ctx._leftover] = 1
for i := ctx._leftover + 1; i < _BLOCK_SIZE; i = i + 1 {
ctx._buffer[i] = 0
}
_blocks(ctx, ctx._buffer[:], true)
}
// a += s
field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &ctx._s) // _a unreduced
field.fe_carry(&ctx._a, field.fe_relax_cast(&ctx._a)) // _a reduced
// return num_to_16_le_bytes(a)
tmp: [32]byte = ---
field.fe_to_bytes(&tmp, &ctx._a)
copy_slice(dst, tmp[0:16])
reset(ctx)
}
reset :: proc (ctx: ^Context) {
mem.zero_explicit(&ctx._r, size_of(ctx._r))
mem.zero_explicit(&ctx._a, size_of(ctx._a))
mem.zero_explicit(&ctx._s, size_of(ctx._s))
mem.zero_explicit(&ctx._buffer, size_of(ctx._buffer))
ctx._is_initialized = false
}
_blocks :: proc (ctx: ^Context, msg: []byte, final := false) {
n: field.Tight_Field_Element = ---
final_byte := byte(!final)
data := msg
data_len := len(data)
for data_len >= _BLOCK_SIZE {
// n = le_bytes_to_num(msg[((i-1)*16)..*i*16] | [0x01])
field.fe_from_bytes(&n, data[:_BLOCK_SIZE], final_byte, false)
// a += n
field.fe_add(field.fe_relax_cast(&ctx._a), &ctx._a, &n) // _a unreduced
// a = (r * a) % p
field.fe_carry_mul(&ctx._a, field.fe_relax_cast(&ctx._a), field.fe_relax_cast(&ctx._r)) // _a reduced
data = data[_BLOCK_SIZE:]
data_len = data_len - _BLOCK_SIZE
}
}

View File

@@ -0,0 +1,7 @@
package crypto
when ODIN_OS != "linux" {
_rand_bytes :: proc (dst: []byte) {
unimplemented("crypto: rand_bytes not supported on this OS")
}
}

View File

@@ -0,0 +1,37 @@
package crypto
import "core:fmt"
import "core:os"
import "core:sys/unix"
_MAX_PER_CALL_BYTES :: 33554431 // 2^25 - 1
_rand_bytes :: proc (dst: []byte) {
dst := dst
l := len(dst)
for l > 0 {
to_read := min(l, _MAX_PER_CALL_BYTES)
ret := unix.sys_getrandom(raw_data(dst), to_read, 0)
if ret < 0 {
switch os.Errno(-ret) {
case os.EINTR:
// Call interupted by a signal handler, just retry the
// request.
continue
case os.ENOSYS:
// The kernel is apparently prehistoric (< 3.17 circa 2014)
// and does not support getrandom.
panic("crypto: getrandom not available in kernel")
case:
// All other failures are things that should NEVER happen
// unless the kernel interface changes (ie: the Linux
// developers break userland).
panic(fmt.tprintf("crypto: getrandom failed: %d", ret))
}
}
l -= ret
dst = dst[ret:]
}
}

View File

@@ -0,0 +1,126 @@
package x25519
import field "core:crypto/_fiat/field_curve25519"
import "core:mem"
SCALAR_SIZE :: 32
POINT_SIZE :: 32
_BASE_POINT: [32]byte = {9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
_scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
if i < 0 {
return 0
}
return (s[i>>3] >> uint(i&7)) & 1
}
_scalarmult :: proc (out, scalar, point: ^[32]byte) {
// Montgomery pseduo-multiplication taken from Monocypher.
// computes the scalar product
x1: field.Tight_Field_Element = ---
field.fe_from_bytes(&x1, point)
// computes the actual scalar product (the result is in x2 and z2)
x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, ---
t0, t1: field.Loose_Field_Element = ---, ---
// Montgomery ladder
// In projective coordinates, to avoid divisions: x = X / Z
// We don't care about the y coordinate, it's only 1 bit of information
field.fe_one(&x2) // "zero" point
field.fe_zero(&z2)
field.fe_set(&x3, &x1) // "one" point
field.fe_one(&z3)
swap: int
for pos := 255-1; pos >= 0; pos = pos - 1 {
// constant time conditional swap before ladder step
b := int(_scalar_bit(scalar, pos))
swap ~= b // xor trick avoids swapping at the end of the loop
field.fe_cond_swap(&x2, &x3, swap)
field.fe_cond_swap(&z2, &z3, swap)
swap = b // anticipates one last swap after the loop
// Montgomery ladder step: replaces (P2, P3) by (P2*2, P2+P3)
// with differential addition
//
// Note: This deliberately omits reductions after add/sub operations
// if the result is only ever used as the input to a mul/square since
// the implementations of those can deal with non-reduced inputs.
//
// fe_tighten_cast is only used to store a fully reduced
// output in a Loose_Field_Element, or to provide such a
// Loose_Field_Element as a Tight_Field_Element argument.
field.fe_sub(&t0, &x3, &z3)
field.fe_sub(&t1, &x2, &z2)
field.fe_add(field.fe_relax_cast(&x2), &x2, &z2) // x2 - unreduced
field.fe_add(field.fe_relax_cast(&z2), &x3, &z3) // z2 - unreduced
field.fe_carry_mul(&z3, &t0, field.fe_relax_cast(&x2))
field.fe_carry_mul(&z2, field.fe_relax_cast(&z2), &t1) // z2 - reduced
field.fe_carry_square(field.fe_tighten_cast(&t0), &t1) // t0 - reduced
field.fe_carry_square(field.fe_tighten_cast(&t1), field.fe_relax_cast(&x2)) // t1 - reduced
field.fe_add(field.fe_relax_cast(&x3), &z3, &z2) // x3 - unreduced
field.fe_sub(field.fe_relax_cast(&z2), &z3, &z2) // z2 - unreduced
field.fe_carry_mul(&x2, &t1, &t0) // x2 - reduced
field.fe_sub(&t1, field.fe_tighten_cast(&t1), field.fe_tighten_cast(&t0)) // safe - t1/t0 is reduced
field.fe_carry_square(&z2, field.fe_relax_cast(&z2)) // z2 - reduced
field.fe_carry_scmul_121666(&z3, &t1)
field.fe_carry_square(&x3, field.fe_relax_cast(&x3)) // x3 - reduced
field.fe_add(&t0, field.fe_tighten_cast(&t0), &z3) // safe - t0 is reduced
field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z2))
field.fe_carry_mul(&z2, &t1, &t0)
}
// last swap is necessary to compensate for the xor trick
// Note: after this swap, P3 == P2 + P1.
field.fe_cond_swap(&x2, &x3, swap)
field.fe_cond_swap(&z2, &z3, swap)
// normalises the coordinates: x == X / Z
field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
field.fe_to_bytes(out, &x2)
mem.zero_explicit(&x1, size_of(x1))
mem.zero_explicit(&x2, size_of(x2))
mem.zero_explicit(&x3, size_of(x3))
mem.zero_explicit(&z2, size_of(z2))
mem.zero_explicit(&z3, size_of(z3))
mem.zero_explicit(&t0, size_of(t0))
mem.zero_explicit(&t1, size_of(t1))
}
scalarmult :: proc (dst, scalar, point: []byte) {
if len(scalar) != SCALAR_SIZE {
panic("crypto/x25519: invalid scalar size")
}
if len(point) != POINT_SIZE {
panic("crypto/x25519: invalid point size")
}
if len(dst) != POINT_SIZE {
panic("crypto/x25519: invalid destination point size")
}
// "clamp" the scalar
e: [32]byte = ---
copy_slice(e[:], scalar)
e[0] &= 248
e[31] &= 127
e[31] |= 64
p: [32]byte = ---
copy_slice(p[:], point)
d: [32]byte = ---
_scalarmult(&d, &e, &p)
copy_slice(dst, d[:])
mem.zero_explicit(&e, size_of(e))
mem.zero_explicit(&d, size_of(d))
}
scalarmult_basepoint :: proc (dst, scalar: []byte) {
// TODO/perf: Switch to using a precomputed table.
scalarmult(dst, scalar, _BASE_POINT[:])
}

View File

@@ -4,64 +4,56 @@ package mem_virtual
import "core:c"
import "core:intrinsics"
import "core:sys/unix"
when ODIN_ARCH == "amd64" {
SYS_mmap :: 9
SYS_mprotect :: 10
SYS_munmap :: 11
SYS_madvise :: 28
PROT_NONE :: 0x0
PROT_READ :: 0x1
PROT_WRITE :: 0x2
PROT_EXEC :: 0x4
PROT_GROWSDOWN :: 0x01000000
PROT_GROWSUP :: 0x02000000
PROT_NONE :: 0x0
PROT_READ :: 0x1
PROT_WRITE :: 0x2
PROT_EXEC :: 0x4
PROT_GROWSDOWN :: 0x01000000
PROT_GROWSUP :: 0x02000000
MAP_FIXED :: 0x1
MAP_PRIVATE :: 0x2
MAP_SHARED :: 0x4
MAP_ANONYMOUS :: 0x20
MADV_NORMAL :: 0
MADV_RANDOM :: 1
MADV_SEQUENTIAL :: 2
MADV_WILLNEED :: 3
MADV_DONTNEED :: 4
MADV_FREE :: 8
MADV_REMOVE :: 9
MADV_DONTFORK :: 10
MADV_DOFORK :: 11
MADV_MERGEABLE :: 12
MADV_UNMERGEABLE :: 13
MADV_HUGEPAGE :: 14
MADV_NOHUGEPAGE :: 15
MADV_DONTDUMP :: 16
MADV_DODUMP :: 17
MADV_WIPEONFORK :: 18
MADV_KEEPONFORK :: 19
MADV_HWPOISON :: 100
} else {
#panic("Unsupported architecture")
}
MAP_FIXED :: 0x1
MAP_PRIVATE :: 0x2
MAP_SHARED :: 0x4
MAP_ANONYMOUS :: 0x20
MADV_NORMAL :: 0
MADV_RANDOM :: 1
MADV_SEQUENTIAL :: 2
MADV_WILLNEED :: 3
MADV_DONTNEED :: 4
MADV_FREE :: 8
MADV_REMOVE :: 9
MADV_DONTFORK :: 10
MADV_DOFORK :: 11
MADV_MERGEABLE :: 12
MADV_UNMERGEABLE :: 13
MADV_HUGEPAGE :: 14
MADV_NOHUGEPAGE :: 15
MADV_DONTDUMP :: 16
MADV_DODUMP :: 17
MADV_WIPEONFORK :: 18
MADV_KEEPONFORK :: 19
MADV_HWPOISON :: 100
mmap :: proc "contextless" (addr: rawptr, length: uint, prot: c.int, flags: c.int, fd: c.int, offset: uintptr) -> rawptr {
res := intrinsics.syscall(SYS_mmap, uintptr(addr), uintptr(length), uintptr(prot), uintptr(flags), uintptr(fd), offset)
res := intrinsics.syscall(unix.SYS_mmap, uintptr(addr), uintptr(length), uintptr(prot), uintptr(flags), uintptr(fd), offset)
return rawptr(res)
}
munmap :: proc "contextless" (addr: rawptr, length: uint) -> c.int {
res := intrinsics.syscall(SYS_munmap, uintptr(addr), uintptr(length))
res := intrinsics.syscall(unix.SYS_munmap, uintptr(addr), uintptr(length))
return c.int(res)
}
mprotect :: proc "contextless" (addr: rawptr, length: uint, prot: c.int) -> c.int {
res := intrinsics.syscall(SYS_mprotect, uintptr(addr), uintptr(length), uint(prot))
res := intrinsics.syscall(unix.SYS_mprotect, uintptr(addr), uintptr(length), uint(prot))
return c.int(res)
}
madvise :: proc "contextless" (addr: rawptr, length: uint, advice: c.int) -> c.int {
res := intrinsics.syscall(SYS_madvise, uintptr(addr), uintptr(length), uintptr(advice))
res := intrinsics.syscall(unix.SYS_madvise, uintptr(addr), uintptr(length), uintptr(advice))
return c.int(res)
}

View File

@@ -8,6 +8,7 @@ import "core:strings"
import "core:c"
import "core:strconv"
import "core:intrinsics"
import "core:sys/unix"
Handle :: distinct i32
File_Time :: distinct u64
@@ -265,8 +266,6 @@ X_OK :: 1 // Test for execute permission
W_OK :: 2 // Test for write permission
R_OK :: 4 // Test for read permission
SYS_GETTID :: 186
foreign libc {
@(link_name="__errno_location") __errno_location :: proc() -> ^int ---
@@ -594,7 +593,7 @@ exit :: proc "contextless" (code: int) -> ! {
}
current_thread_id :: proc "contextless" () -> int {
return cast(int)intrinsics.syscall(SYS_GETTID)
return unix.sys_gettid()
}
dlopen :: proc(filename: string, flags: int) -> rawptr {

View File

@@ -5,6 +5,7 @@ package sync2
import "core:c"
import "core:time"
import "core:intrinsics"
import "core:sys/unix"
FUTEX_WAIT :: 0
FUTEX_WAKE :: 1
@@ -34,7 +35,7 @@ get_errno :: proc(r: int) -> int {
}
internal_futex :: proc(f: ^Futex, op: c.int, val: u32, timeout: rawptr) -> int {
code := int(intrinsics.syscall(202, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
code := int(intrinsics.syscall(unix.SYS_futex, uintptr(f), uintptr(op), uintptr(val), uintptr(timeout), 0, 0))
return get_errno(code)
}

View File

@@ -2,9 +2,8 @@
//+private
package sync2
import "core:intrinsics"
import "core:sys/unix"
_current_thread_id :: proc "contextless" () -> int {
SYS_GETTID :: 186
return int(intrinsics.syscall(SYS_GETTID))
return unix.sys_gettid()
}

View File

@@ -1,11 +1,9 @@
package sync
import "core:sys/unix"
import "core:intrinsics"
current_thread_id :: proc "contextless" () -> int {
SYS_GETTID :: 186
return int(intrinsics.syscall(SYS_GETTID))
return unix.sys_gettid()
}

View File

@@ -0,0 +1,60 @@
package unix
import "core:intrinsics"
// Linux has inconsistent system call numbering across architectures,
// for largely historical reasons. This attempts to provide a unified
// Odin-side interface for system calls that are required for the core
// library to work.
// For authorative system call numbers, the following files in the kernel
// source can be used:
//
// amd64: arch/x86/entry/syscalls/syscall_64.tbl
// arm64: include/uapi/asm-generic/unistd.h
// 386: arch/x86/entry/syscalls/sycall_32.tbl
// arm: arch/arm/tools/syscall.tbl
when ODIN_ARCH == "amd64" {
SYS_mmap : uintptr : 9
SYS_mprotect : uintptr : 10
SYS_munmap : uintptr : 11
SYS_madvise : uintptr : 28
SYS_futex : uintptr : 202
SYS_gettid : uintptr : 186
SYS_getrandom : uintptr : 318
} else when ODIN_ARCH == "arm64" {
SYS_mmap : uintptr : 222
SYS_mprotect : uintptr : 226
SYS_munmap : uintptr : 215
SYS_madvise : uintptr : 233
SYS_futex : uintptr : 98
SYS_gettid : uintptr : 178
SYS_getrandom : uintptr : 278
} else when ODIN_ARCH == "386" {
SYS_mmap : uintptr : 192 // 90 is "sys_old_mmap", we want mmap2
SYS_mprotect : uintptr : 125
SYS_munmap : uintptr : 91
SYS_madvise : uintptr : 219
SYS_futex : uintptr : 240
SYS_gettid : uintptr : 224
SYS_getrandom : uintptr : 355
} else when ODIN_ARCH == "arm" {
SYS_mmap : uintptr : 192 // 90 is "sys_old_mmap", we want mmap2
SYS_mprotect : uintptr : 125
SYS_munmap: uintptr : 91
SYS_madvise: uintptr : 220
SYS_futex : uintptr : 240
SYS_gettid : uintptr: 224
SYS_getrandom : uintptr : 384
} else {
#panic("Unsupported architecture")
}
sys_gettid :: proc "contextless" () -> int {
return cast(int)intrinsics.syscall(SYS_gettid)
}
sys_getrandom :: proc "contextless" (buf: ^byte, buflen: int, flags: uint) -> int {
return cast(int)intrinsics.syscall(SYS_getrandom, buf, cast(uintptr)(buflen), cast(uintptr)(flags))
}

View File

@@ -545,8 +545,8 @@ void usage(String argv0) {
print_usage_line(1, "version print version");
print_usage_line(1, "report print information useful to reporting a bug");
print_usage_line(0, "");
print_usage_line(0, "For more information of flags, apply the flag to see what is possible");
print_usage_line(1, "-help");
print_usage_line(0, "For further details on a command, use -help after the command name");
print_usage_line(1, "e.g. odin build -help");
}

View File

@@ -115,6 +115,15 @@ main :: proc() {
test_haval_224(&t)
test_haval_256(&t)
// "modern" crypto tests
test_chacha20(&t)
test_poly1305(&t)
test_chacha20poly1305(&t)
test_x25519(&t)
test_rand_bytes(&t)
bench_modern(&t)
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
}

View File

@@ -0,0 +1,535 @@
package test_core_crypto
import "core:testing"
import "core:fmt"
import "core:mem"
import "core:time"
import "core:crypto"
import "core:crypto/chacha20"
import "core:crypto/chacha20poly1305"
import "core:crypto/poly1305"
import "core:crypto/x25519"
_digit_value :: proc(r: rune) -> int {
ri := int(r)
v: int = 16
switch r {
case '0'..='9': v = ri-'0'
case 'a'..='z': v = ri-'a'+10
case 'A'..='Z': v = ri-'A'+10
}
return v
}
_decode_hex32 :: proc(s: string) -> [32]byte{
b: [32]byte
for i := 0; i < len(s); i = i + 2 {
hi := _digit_value(rune(s[i]))
lo := _digit_value(rune(s[i+1]))
b[i/2] = byte(hi << 4 | lo)
}
return b
}
_PLAINTEXT_SUNSCREEN_STR := "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, sunscreen would be it."
@(test)
test_chacha20 :: proc(t: ^testing.T) {
log(t, "Testing (X)ChaCha20")
// Test cases taken from RFC 8439, and draft-irtf-cfrg-xchacha-03
plaintext := transmute([]byte)(_PLAINTEXT_SUNSCREEN_STR)
key := [chacha20.KEY_SIZE]byte{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
}
nonce := [chacha20.NONCE_SIZE]byte{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a,
0x00, 0x00, 0x00, 0x00,
}
ciphertext := [114]byte{
0x6e, 0x2e, 0x35, 0x9a, 0x25, 0x68, 0xf9, 0x80,
0x41, 0xba, 0x07, 0x28, 0xdd, 0x0d, 0x69, 0x81,
0xe9, 0x7e, 0x7a, 0xec, 0x1d, 0x43, 0x60, 0xc2,
0x0a, 0x27, 0xaf, 0xcc, 0xfd, 0x9f, 0xae, 0x0b,
0xf9, 0x1b, 0x65, 0xc5, 0x52, 0x47, 0x33, 0xab,
0x8f, 0x59, 0x3d, 0xab, 0xcd, 0x62, 0xb3, 0x57,
0x16, 0x39, 0xd6, 0x24, 0xe6, 0x51, 0x52, 0xab,
0x8f, 0x53, 0x0c, 0x35, 0x9f, 0x08, 0x61, 0xd8,
0x07, 0xca, 0x0d, 0xbf, 0x50, 0x0d, 0x6a, 0x61,
0x56, 0xa3, 0x8e, 0x08, 0x8a, 0x22, 0xb6, 0x5e,
0x52, 0xbc, 0x51, 0x4d, 0x16, 0xcc, 0xf8, 0x06,
0x81, 0x8c, 0xe9, 0x1a, 0xb7, 0x79, 0x37, 0x36,
0x5a, 0xf9, 0x0b, 0xbf, 0x74, 0xa3, 0x5b, 0xe6,
0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42,
0x87, 0x4d,
}
ciphertext_str := hex_string(ciphertext[:])
derived_ciphertext: [114]byte
ctx: chacha20.Context = ---
chacha20.init(&ctx, key[:], nonce[:])
chacha20.seek(&ctx, 1) // The test vectors start the counter at 1.
chacha20.xor_bytes(&ctx, derived_ciphertext[:], plaintext[:])
derived_ciphertext_str := hex_string(derived_ciphertext[:])
expect(t, derived_ciphertext_str == ciphertext_str, fmt.tprintf("Expected %s for xor_bytes(plaintext_str), but got %s instead", ciphertext_str, derived_ciphertext_str))
xkey := [chacha20.KEY_SIZE]byte{
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
}
xnonce := [chacha20.XNONCE_SIZE]byte{
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
}
xciphertext := [114]byte{
0xbd, 0x6d, 0x17, 0x9d, 0x3e, 0x83, 0xd4, 0x3b,
0x95, 0x76, 0x57, 0x94, 0x93, 0xc0, 0xe9, 0x39,
0x57, 0x2a, 0x17, 0x00, 0x25, 0x2b, 0xfa, 0xcc,
0xbe, 0xd2, 0x90, 0x2c, 0x21, 0x39, 0x6c, 0xbb,
0x73, 0x1c, 0x7f, 0x1b, 0x0b, 0x4a, 0xa6, 0x44,
0x0b, 0xf3, 0xa8, 0x2f, 0x4e, 0xda, 0x7e, 0x39,
0xae, 0x64, 0xc6, 0x70, 0x8c, 0x54, 0xc2, 0x16,
0xcb, 0x96, 0xb7, 0x2e, 0x12, 0x13, 0xb4, 0x52,
0x2f, 0x8c, 0x9b, 0xa4, 0x0d, 0xb5, 0xd9, 0x45,
0xb1, 0x1b, 0x69, 0xb9, 0x82, 0xc1, 0xbb, 0x9e,
0x3f, 0x3f, 0xac, 0x2b, 0xc3, 0x69, 0x48, 0x8f,
0x76, 0xb2, 0x38, 0x35, 0x65, 0xd3, 0xff, 0xf9,
0x21, 0xf9, 0x66, 0x4c, 0x97, 0x63, 0x7d, 0xa9,
0x76, 0x88, 0x12, 0xf6, 0x15, 0xc6, 0x8b, 0x13,
0xb5, 0x2e,
}
xciphertext_str := hex_string(xciphertext[:])
chacha20.init(&ctx, xkey[:], xnonce[:])
chacha20.seek(&ctx, 1)
chacha20.xor_bytes(&ctx, derived_ciphertext[:], plaintext[:])
derived_ciphertext_str = hex_string(derived_ciphertext[:])
expect(t, derived_ciphertext_str == xciphertext_str, fmt.tprintf("Expected %s for xor_bytes(plaintext_str), but got %s instead", xciphertext_str, derived_ciphertext_str))
}
@(test)
test_poly1305 :: proc(t: ^testing.T) {
log(t, "Testing poly1305")
// Test cases taken from poly1305-donna.
key := [poly1305.KEY_SIZE]byte{
0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91,
0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25,
0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65,
0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80,
}
msg := [131]byte{
0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73,
0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce,
0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4,
0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a,
0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b,
0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72,
0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2,
0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38,
0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a,
0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae,
0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea,
0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda,
0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde,
0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3,
0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6,
0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74,
0xe3,0x55,0xa5,
}
tag := [poly1305.TAG_SIZE]byte{
0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5,
0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9,
}
tag_str := hex_string(tag[:])
// Verify - oneshot + compare
ok := poly1305.verify(tag[:], msg[:], key[:])
expect(t, ok, "oneshot verify call failed")
// Sum - oneshot
derived_tag: [poly1305.TAG_SIZE]byte
poly1305.sum(derived_tag[:], msg[:], key[:])
derived_tag_str := hex_string(derived_tag[:])
expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected %s for sum(msg, key), but got %s instead", tag_str, derived_tag_str))
// Incremental
mem.zero(&derived_tag, size_of(derived_tag))
ctx: poly1305.Context = ---
poly1305.init(&ctx, key[:])
read_lengths := [11]int{32, 64, 16, 8, 4, 2, 1, 1, 1, 1, 1}
off := 0
for read_length in read_lengths {
to_read := msg[off:off+read_length]
poly1305.update(&ctx, to_read)
off = off + read_length
}
poly1305.final(&ctx, derived_tag[:])
derived_tag_str = hex_string(derived_tag[:])
expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected %s for init/update/final - incremental, but got %s instead", tag_str, derived_tag_str))
}
@(test)
test_chacha20poly1305 :: proc(t: ^testing.T) {
log(t, "Testing chacha20poly1205")
plaintext := transmute([]byte)(_PLAINTEXT_SUNSCREEN_STR)
aad := [12]byte{
0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3,
0xc4, 0xc5, 0xc6, 0xc7,
}
key := [chacha20poly1305.KEY_SIZE]byte{
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
}
nonce := [chacha20poly1305.NONCE_SIZE]byte{
0x07, 0x00, 0x00, 0x00,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
}
ciphertext := [114]byte{
0xd3, 0x1a, 0x8d, 0x34, 0x64, 0x8e, 0x60, 0xdb,
0x7b, 0x86, 0xaf, 0xbc, 0x53, 0xef, 0x7e, 0xc2,
0xa4, 0xad, 0xed, 0x51, 0x29, 0x6e, 0x08, 0xfe,
0xa9, 0xe2, 0xb5, 0xa7, 0x36, 0xee, 0x62, 0xd6,
0x3d, 0xbe, 0xa4, 0x5e, 0x8c, 0xa9, 0x67, 0x12,
0x82, 0xfa, 0xfb, 0x69, 0xda, 0x92, 0x72, 0x8b,
0x1a, 0x71, 0xde, 0x0a, 0x9e, 0x06, 0x0b, 0x29,
0x05, 0xd6, 0xa5, 0xb6, 0x7e, 0xcd, 0x3b, 0x36,
0x92, 0xdd, 0xbd, 0x7f, 0x2d, 0x77, 0x8b, 0x8c,
0x98, 0x03, 0xae, 0xe3, 0x28, 0x09, 0x1b, 0x58,
0xfa, 0xb3, 0x24, 0xe4, 0xfa, 0xd6, 0x75, 0x94,
0x55, 0x85, 0x80, 0x8b, 0x48, 0x31, 0xd7, 0xbc,
0x3f, 0xf4, 0xde, 0xf0, 0x8e, 0x4b, 0x7a, 0x9d,
0xe5, 0x76, 0xd2, 0x65, 0x86, 0xce, 0xc6, 0x4b,
0x61, 0x16,
}
ciphertext_str := hex_string(ciphertext[:])
tag := [chacha20poly1305.TAG_SIZE]byte{
0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, 0xe2, 0x6a,
0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, 0x06, 0x91,
}
tag_str := hex_string(tag[:])
derived_tag: [chacha20poly1305.TAG_SIZE]byte
derived_ciphertext: [114]byte
chacha20poly1305.encrypt(derived_ciphertext[:], derived_tag[:], key[:], nonce[:], aad[:], plaintext)
derived_ciphertext_str := hex_string(derived_ciphertext[:])
expect(t, derived_ciphertext_str == ciphertext_str, fmt.tprintf("Expected ciphertext %s for encrypt(aad, plaintext), but got %s instead", ciphertext_str, derived_ciphertext_str))
derived_tag_str := hex_string(derived_tag[:])
expect(t, derived_tag_str == tag_str, fmt.tprintf("Expected tag %s for encrypt(aad, plaintext), but got %s instead", tag_str, derived_tag_str))
derived_plaintext: [114]byte
ok := chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], ciphertext[:])
derived_plaintext_str := string(derived_plaintext[:])
expect(t, ok, "Expected true for decrypt(tag, aad, ciphertext)")
expect(t, derived_plaintext_str == _PLAINTEXT_SUNSCREEN_STR, fmt.tprintf("Expected plaintext %s for decrypt(tag, aad, ciphertext), but got %s instead", _PLAINTEXT_SUNSCREEN_STR, derived_plaintext_str))
derived_ciphertext[0] ~= 0xa5
ok = chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], derived_ciphertext[:])
expect(t, !ok, "Expected false for decrypt(tag, aad, corrupted_ciphertext)")
aad[0] ~= 0xa5
ok = chacha20poly1305.decrypt(derived_plaintext[:], tag[:], key[:], nonce[:], aad[:], ciphertext[:])
expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)")
}
TestECDH :: struct {
scalar: string,
point: string,
product: string,
}
@(test)
test_x25519 :: proc(t: ^testing.T) {
log(t, "Testing X25519")
test_vectors := [?]TestECDH {
// Test vectors from RFC 7748
TestECDH{
"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
},
TestECDH{
"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
},
}
for v, _ in test_vectors {
scalar := _decode_hex32(v.scalar)
point := _decode_hex32(v.point)
derived_point: [x25519.POINT_SIZE]byte
x25519.scalarmult(derived_point[:], scalar[:], point[:])
derived_point_str := hex_string(derived_point[:])
expect(t, derived_point_str == v.product, fmt.tprintf("Expected %s for %s * %s, but got %s instead", v.product, v.scalar, v.point, derived_point_str))
// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
p1, p2: [x25519.POINT_SIZE]byte
x25519.scalarmult_basepoint(p1[:], scalar[:])
x25519.scalarmult(p2[:], scalar[:], x25519._BASE_POINT[:])
p1_str, p2_str := hex_string(p1[:]), hex_string(p2[:])
expect(t, p1_str == p2_str, fmt.tprintf("Expected %s for %s * basepoint, but got %s instead", p2_str, v.scalar, p1_str))
}
// TODO/tests: Run the wycheproof test vectors, once I figure out
// how to work with JSON.
}
@(test)
test_rand_bytes :: proc(t: ^testing.T) {
log(t, "Testing rand_bytes")
if ODIN_OS != "linux" {
log(t, "rand_bytes not supported - skipping")
return
}
allocator := context.allocator
buf := make([]byte, 1 << 25, allocator)
defer delete(buf)
// Testing a CSPRNG for correctness is incredibly involved and
// beyond the scope of an implementation that offloads
// responsibility for correctness to the OS.
//
// Just attempt to randomize a sufficiently large buffer, where
// sufficiently large is:
// * Larger than the maximum getentropy request size (256 bytes).
// * Larger than the maximum getrandom request size (2^25 - 1 bytes).
//
// While theoretically non-deterministic, if this fails, chances
// are the CSPRNG is busted.
seems_ok := false
for i := 0; i < 256; i = i + 1 {
mem.zero_explicit(raw_data(buf), len(buf))
crypto.rand_bytes(buf)
if buf[0] != 0 && buf[len(buf)-1] != 0 {
seems_ok = true
break
}
}
expect(t, seems_ok, "Expected to randomize the head and tail of the buffer within a handful of attempts")
}
@(test)
bench_modern :: proc(t: ^testing.T) {
fmt.println("Starting benchmarks:")
bench_chacha20(t)
bench_poly1305(t)
bench_chacha20poly1305(t)
bench_x25519(t)
}
_setup_sized_buf :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
assert(options != nil)
options.input = make([]u8, options.bytes, allocator)
return nil if len(options.input) == options.bytes else .Allocation_Error
}
_teardown_sized_buf :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
assert(options != nil)
delete(options.input)
return nil
}
_benchmark_chacha20 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
buf := options.input
key := [chacha20.KEY_SIZE]byte{
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
}
nonce := [chacha20.NONCE_SIZE]byte{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
}
ctx: chacha20.Context = ---
chacha20.init(&ctx, key[:], nonce[:])
for _ in 0..=options.rounds {
chacha20.xor_bytes(&ctx, buf, buf)
}
options.count = options.rounds
options.processed = options.rounds * options.bytes
return nil
}
_benchmark_poly1305 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
buf := options.input
key := [poly1305.KEY_SIZE]byte{
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
}
tag: [poly1305.TAG_SIZE]byte = ---
for _ in 0..=options.rounds {
poly1305.sum(tag[:], buf, key[:])
}
options.count = options.rounds
options.processed = options.rounds * options.bytes
//options.hash = u128(h)
return nil
}
_benchmark_chacha20poly1305 :: proc(options: ^time.Benchmark_Options, allocator := context.allocator) -> (err: time.Benchmark_Error) {
buf := options.input
key := [chacha20.KEY_SIZE]byte{
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef,
}
nonce := [chacha20.NONCE_SIZE]byte{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
}
tag: [chacha20poly1305.TAG_SIZE]byte = ---
for _ in 0..=options.rounds {
chacha20poly1305.encrypt(buf,tag[:], key[:], nonce[:], nil, buf)
}
options.count = options.rounds
options.processed = options.rounds * options.bytes
return nil
}
benchmark_print :: proc(name: string, options: ^time.Benchmark_Options) {
fmt.printf("\t[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n",
name,
options.rounds,
options.processed,
time.duration_nanoseconds(options.duration),
options.rounds_per_second,
options.megabytes_per_second,
)
}
bench_chacha20 :: proc(t: ^testing.T) {
name := "ChaCha20 64 bytes"
options := &time.Benchmark_Options{
rounds = 1_000,
bytes = 64,
setup = _setup_sized_buf,
bench = _benchmark_chacha20,
teardown = _teardown_sized_buf,
}
err := time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
name = "ChaCha20 1024 bytes"
options.bytes = 1024
err = time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
name = "ChaCha20 65536 bytes"
options.bytes = 65536
err = time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
}
bench_poly1305 :: proc(t: ^testing.T) {
name := "Poly1305 64 zero bytes"
options := &time.Benchmark_Options{
rounds = 1_000,
bytes = 64,
setup = _setup_sized_buf,
bench = _benchmark_poly1305,
teardown = _teardown_sized_buf,
}
err := time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
name = "Poly1305 1024 zero bytes"
options.bytes = 1024
err = time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
}
bench_chacha20poly1305 :: proc(t: ^testing.T) {
name := "chacha20poly1305 64 bytes"
options := &time.Benchmark_Options{
rounds = 1_000,
bytes = 64,
setup = _setup_sized_buf,
bench = _benchmark_chacha20poly1305,
teardown = _teardown_sized_buf,
}
err := time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
name = "chacha20poly1305 1024 bytes"
options.bytes = 1024
err = time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
name = "chacha20poly1305 65536 bytes"
options.bytes = 65536
err = time.benchmark(options, context.allocator)
expect(t, err == nil, name)
benchmark_print(name, options)
}
bench_x25519 :: proc(t: ^testing.T) {
point := _decode_hex32("deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
scalar := _decode_hex32("cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe")
out: [x25519.POINT_SIZE]byte = ---
iters :: 10000
start := time.now()
for i := 0; i < iters; i = i + 1 {
x25519.scalarmult(out[:], scalar[:], point[:])
}
elapsed := time.since(start)
log(t, fmt.tprintf("x25519.scalarmult: ~%f us/op", time.duration_microseconds(elapsed) / iters))
}