core/crypto/x448: Initial import

This commit is contained in:
Yawning Angel
2024-08-20 12:03:04 +09:00
parent bb395aeb41
commit 9fdcc4e39a
6 changed files with 1542 additions and 0 deletions

View File

@@ -0,0 +1,235 @@
package field_curve448
import "core:mem"
fe_relax_cast :: #force_inline proc "contextless" (
arg1: ^Tight_Field_Element,
) -> ^Loose_Field_Element {
return (^Loose_Field_Element)(arg1)
}
fe_tighten_cast :: #force_inline proc "contextless" (
arg1: ^Loose_Field_Element,
) -> ^Tight_Field_Element {
return (^Tight_Field_Element)(arg1)
}
fe_clear :: proc "contextless" (
arg1: $T,
) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
mem.zero_explicit(arg1, size_of(arg1^))
}
fe_clear_vec :: proc "contextless" (
arg1: $T,
) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
for fe in arg1 {
fe_clear(fe)
}
}
fe_carry_mul_small :: proc "contextless" (
out1: ^Tight_Field_Element,
arg1: ^Loose_Field_Element,
arg2: u64,
) {
arg2_ := Loose_Field_Element{arg2, 0, 0, 0, 0, 0, 0, 0}
fe_carry_mul(out1, arg1, &arg2_)
}
fe_carry_pow2k :: proc "contextless" (
out1: ^Tight_Field_Element,
arg1: ^Loose_Field_Element,
arg2: uint,
) {
// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
if arg2 == 0 {
fe_one(out1)
return
}
fe_carry_square(out1, arg1)
for _ in 1 ..< arg2 {
fe_carry_square(out1, fe_relax_cast(out1))
}
}
fe_carry_inv :: proc "contextless" (
out1: ^Tight_Field_Element,
arg1: ^Loose_Field_Element,
) {
// Inversion computation is derived from the addition chain:
//
// _10 = 2*1
// _11 = 1 + _10
// _110 = 2*_11
// _111 = 1 + _110
// _111000 = _111 << 3
// _111111 = _111 + _111000
// x12 = _111111 << 6 + _111111
// x24 = x12 << 12 + x12
// i34 = x24 << 6
// x30 = _111111 + i34
// x48 = i34 << 18 + x24
// x96 = x48 << 48 + x48
// x192 = x96 << 96 + x96
// x222 = x192 << 30 + x30
// x223 = 2*x222 + 1
// return (x223 << 223 + x222) << 2 + 1
//
// Operations: 447 squares 13 multiplies
//
// Generated by github.com/mmcloughlin/addchain v0.4.0.
t0, t1, t2: Tight_Field_Element = ---, ---, ---
// Step 1: t0 = x^0x2
fe_carry_square(&t0, arg1)
// Step 2: t0 = x^0x3
fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
// t0.Sqr(t0)
fe_carry_square(&t0, fe_relax_cast(&t0))
// Step 4: t0 = x^0x7
fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
// Step 7: t1 = x^0x38
fe_carry_pow2k(&t1, fe_relax_cast(&t0), 3)
// Step 8: t0 = x^0x3f
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
// Step 14: t1 = x^0xfc0
fe_carry_pow2k(&t1, fe_relax_cast(&t0), 6)
// Step 15: t1 = x^0xfff
fe_carry_mul(&t1, fe_relax_cast(&t0), fe_relax_cast(&t1))
// Step 27: t2 = x^0xfff000
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 12)
// Step 28: t1 = x^0xffffff
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
// Step 34: t2 = x^0x3fffffc0
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 6)
// Step 35: t0 = x^0x3fffffff
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t2))
// Step 53: t2 = x^0xffffff000000
fe_carry_pow2k(&t2, fe_relax_cast(&t2), 18)
// Step 54: t1 = x^0xffffffffffff
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
// Step 102: t2 = x^0xffffffffffff000000000000
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 48)
// Step 103: t1 = x^0xffffffffffffffffffffffff
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
// Step 199: t2 = x^0xffffffffffffffffffffffff000000000000000000000000
fe_carry_pow2k(&t2, fe_relax_cast(&t1), 96)
// Step 200: t1 = x^0xffffffffffffffffffffffffffffffffffffffffffffffff
fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
// Step 230: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffc0000000
fe_carry_pow2k(&t1, fe_relax_cast(&t1), 30)
// Step 231: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
// Step 232: t1 = x^0x7ffffffffffffffffffffffffffffffffffffffffffffffffffffffe
fe_carry_square(&t1, fe_relax_cast(&t0))
// Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff
fe_carry_mul(&t1, arg1, fe_relax_cast(&t1))
// Step 456: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000
fe_carry_pow2k(&t1, fe_relax_cast(&t1), 223)
// Step 457: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff
fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
// Step 459: t0 = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffc
fe_carry_pow2k(&t0, fe_relax_cast(&t0), 2)
// Step 460: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd
fe_carry_mul(out1, arg1, fe_relax_cast(&t0))
fe_clear_vec([]^Tight_Field_Element{&t0, &t1, &t2})
}
fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
out1[0] = 0
out1[1] = 0
out1[2] = 0
out1[3] = 0
out1[4] = 0
out1[5] = 0
out1[6] = 0
out1[7] = 0
}
fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
out1[0] = 1
out1[1] = 0
out1[2] = 0
out1[3] = 0
out1[4] = 0
out1[5] = 0
out1[6] = 0
out1[7] = 0
}
fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
x1 := arg1[0]
x2 := arg1[1]
x3 := arg1[2]
x4 := arg1[3]
x5 := arg1[4]
x6 := arg1[5]
x7 := arg1[6]
x8 := arg1[7]
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
out1[5] = x6
out1[6] = x7
out1[7] = x8
}
@(optimization_mode = "none")
fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
mask := (u64(arg1) * 0xffffffffffffffff)
x := (out1[0] ~ out2[0]) & mask
x1, y1 := out1[0] ~ x, out2[0] ~ x
x = (out1[1] ~ out2[1]) & mask
x2, y2 := out1[1] ~ x, out2[1] ~ x
x = (out1[2] ~ out2[2]) & mask
x3, y3 := out1[2] ~ x, out2[2] ~ x
x = (out1[3] ~ out2[3]) & mask
x4, y4 := out1[3] ~ x, out2[3] ~ x
x = (out1[4] ~ out2[4]) & mask
x5, y5 := out1[4] ~ x, out2[4] ~ x
x = (out1[5] ~ out2[5]) & mask
x6, y6 := out1[5] ~ x, out2[5] ~ x
x = (out1[6] ~ out2[6]) & mask
x7, y7 := out1[6] ~ x, out2[6] ~ x
x = (out1[7] ~ out2[7]) & mask
x8, y8 := out1[7] ~ x, out2[7] ~ x
out1[0], out2[0] = x1, y1
out1[1], out2[1] = x2, y2
out1[2], out2[2] = x3, y3
out1[3], out2[3] = x4, y4
out1[4], out2[4] = x5, y5
out1[5], out2[5] = x6, y6
out1[6], out2[6] = x7, y7
out1[7], out2[7] = x8, y8
}

File diff suppressed because it is too large Load Diff

161
core/crypto/x448/x448.odin Normal file
View File

@@ -0,0 +1,161 @@
/*
package x448 implements the X448 (aka curve448) Elliptic-Curve
Diffie-Hellman key exchange protocol.
See:
- [[ https://www.rfc-editor.org/rfc/rfc7748 ]]
*/
package x448
import field "core:crypto/_fiat/field_curve448"
import "core:mem"
// SCALAR_SIZE is the size of a X448 scalar (private key) in bytes.
SCALAR_SIZE :: 56
// POINT_SIZE is the size of a X448 point (public key/shared secret) in bytes.
POINT_SIZE :: 56
@(private, rodata)
_BASE_POINT: [56]byte = {
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
}
@(private)
_scalar_bit :: #force_inline proc "contextless" (s: ^[56]byte, i: int) -> u8 {
if i < 0 {
return 0
}
return (s[i >> 3] >> uint(i & 7)) & 1
}
@(private)
_scalarmult :: proc "contextless" (out, scalar, point: ^[56]byte) {
// Montgomery pseudo-multiplication, using the RFC 7748 formula.
t1, t2: field.Loose_Field_Element = ---, ---
// x_1 = u
// x_2 = 1
// z_2 = 0
// x_3 = u
// z_3 = 1
x1: field.Tight_Field_Element = ---
field.fe_from_bytes(&x1, point)
x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, ---
field.fe_one(&x2)
field.fe_zero(&z2)
field.fe_set(&x3, &x1)
field.fe_one(&z3)
// swap = 0
swap: int
// For t = bits-1 down to 0:a
for t := 448 - 1; t >= 0; t -= 1 {
// k_t = (k >> t) & 1
k_t := int(_scalar_bit(scalar, t))
// swap ^= k_t
swap ~= k_t
// Conditional swap; see text below.
// (x_2, x_3) = cswap(swap, x_2, x_3)
field.fe_cond_swap(&x2, &x3, swap)
// (z_2, z_3) = cswap(swap, z_2, z_3)
field.fe_cond_swap(&z2, &z3, swap)
// swap = k_t
swap = k_t
// Note: This deliberately omits reductions after add/sub operations
// if the result is only ever used as the input to a mul/square since
// the implementations of those can deal with non-reduced inputs.
//
// fe_tighten_cast is only used to store a fully reduced
// output in a Loose_Field_Element, or to provide such a
// Loose_Field_Element as a Tight_Field_Element argument.
// A = x_2 + z_2
field.fe_add(&t1, &x2, &z2)
// B = x_2 - z_2
field.fe_sub(&t2, &x2, &z2)
// D = x_3 - z_3
field.fe_sub(field.fe_relax_cast(&z2), &x3, &z3) // (z2 unreduced)
// DA = D * A
field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
// C = x_3 + z_3
field.fe_add(field.fe_relax_cast(&z3), &x3, &z3) // (z3 unreduced)
// CB = C * B
field.fe_carry_mul(&x3, &t2, field.fe_relax_cast(&z3))
// z_3 = x_1 * (DA - CB)^2
field.fe_sub(field.fe_relax_cast(&z3), &x2, &x3) // (z3 unreduced)
field.fe_carry_square(&z3, field.fe_relax_cast(&z3))
field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z3))
// x_3 = (DA + CB)^2
field.fe_add(field.fe_relax_cast(&z2), &x2, &x3) // (z2 unreduced)
field.fe_carry_square(&x3, field.fe_relax_cast(&z2))
// AA = A^2
field.fe_carry_square(&z2, &t1)
// BB = B^2
field.fe_carry_square(field.fe_tighten_cast(&t1), &t2) // (t1 reduced)
// x_2 = AA * BB
field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
// E = AA - BB
field.fe_sub(&t2, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
// z_2 = E * (AA + a24 * E)
field.fe_carry_mul_small(field.fe_tighten_cast(&t1), &t2, 39081) // (t1 reduced)
field.fe_add(&t1, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
field.fe_carry_mul(&z2, &t2, &t1)
}
// Conditional swap; see text below.
// (x_2, x_3) = cswap(swap, x_2, x_3)
field.fe_cond_swap(&x2, &x3, swap)
// (z_2, z_3) = cswap(swap, z_2, z_3)
field.fe_cond_swap(&z2, &z3, swap)
// Return x_2 * (z_2^(p - 2))
field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
field.fe_to_bytes(out, &x2)
field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
field.fe_clear_vec([]^field.Loose_Field_Element{&t1, &t2})
}
// scalarmult "multiplies" the provided scalar and point, and writes the
// resulting point to dst.
scalarmult :: proc(dst, scalar, point: []byte) {
if len(scalar) != SCALAR_SIZE {
panic("crypto/x448: invalid scalar size")
}
if len(point) != POINT_SIZE {
panic("crypto/x448: invalid point size")
}
if len(dst) != POINT_SIZE {
panic("crypto/x448: invalid destination point size")
}
// "clamp" the scalar
e: [56]byte = ---
copy_slice(e[:], scalar)
e[0] &= 252
e[55] |= 128
p: [56]byte = ---
copy_slice(p[:], point)
d: [56]byte = ---
_scalarmult(&d, &e, &p)
copy_slice(dst, d[:])
mem.zero_explicit(&e, size_of(e))
mem.zero_explicit(&d, size_of(d))
}
// scalarmult_basepoint "multiplies" the provided scalar with the X448
// base point and writes the resulting point to dst.
scalarmult_basepoint :: proc(dst, scalar: []byte) {
scalarmult(dst, scalar, _BASE_POINT[:])
}

View File

@@ -48,6 +48,7 @@ import shake "core:crypto/shake"
import sm3 "core:crypto/sm3"
import tuplehash "core:crypto/tuplehash"
import x25519 "core:crypto/x25519"
import x448 "core:crypto/x448"
import pe "core:debug/pe"
import trace "core:debug/trace"
@@ -190,6 +191,7 @@ _ :: shake
_ :: sm3
_ :: tuplehash
_ :: x25519
_ :: x448
_ :: pe
_ :: trace
_ :: dynlib

View File

@@ -14,6 +14,7 @@ import "core:crypto/chacha20poly1305"
import "core:crypto/ed25519"
import "core:crypto/poly1305"
import "core:crypto/x25519"
import "core:crypto/x448"
// Cryptographic primitive benchmarks.
@@ -237,6 +238,26 @@ benchmark_crypto :: proc(t: ^testing.T) {
time.duration_microseconds(elapsed) / iters,
)
}
{
point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator)
scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator)
out: [x448.POINT_SIZE]byte = ---
iters :: 10000
start := time.now()
for i := 0; i < iters; i = i + 1 {
x448.scalarmult(out[:], scalar[:], point[:])
}
elapsed := time.since(start)
fmt.sbprintfln(&str,
"x448.scalarmult: ~%f us/op",
time.duration_microseconds(elapsed) / iters,
)
}
}
@(private)

View File

@@ -7,6 +7,7 @@ import field "core:crypto/_fiat/field_curve25519"
import "core:crypto/ed25519"
import "core:crypto/ristretto255"
import "core:crypto/x25519"
import "core:crypto/x448"
@(test)
test_sqrt_ratio_m1 :: proc(t: ^testing.T) {
@@ -684,6 +685,68 @@ test_x25519 :: proc(t: ^testing.T) {
}
}
@(test)
test_x448 :: proc(t: ^testing.T) {
// Local copy of this so that the base point doesn't need to be exported.
_BASE_POINT: [56]byte = {
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
}
test_vectors := []struct {
scalar: string,
point: string,
product: string,
} {
// Test vectors from RFC 7748
{
"3d262fddf9ec8e88495266fea19a34d28882acef045104d0d1aae121700a779c984c24f8cdd78fbff44943eba368f54b29259a4f1c600ad3",
"06fce640fa3487bfda5f6cf2d5263f8aad88334cbd07437f020f08f9814dc031ddbdc38c19c6da2583fa5429db94ada18aa7a7fb4ef8a086",
"ce3e4ff95a60dc6697da1db1d85e6afbdf79b50a2412d7546d5f239fe14fbaadeb445fc66a01b0779d98223961111e21766282f73dd96b6f",
},
{
"203d494428b8399352665ddca42f9de8fef600908e0d461cb021f8c538345dd77c3e4806e25f46d3315c44e0a5b4371282dd2c8d5be3095f",
"0fbcc2f993cd56d3305b0b7d9e55d4c1a8fb5dbb52f8e9a1e9b6201b165d015894e56c4d3570bee52fe205e28a78b91cdfbde71ce8d157db",
"884a02576239ff7a2f2f63b2db6a9ff37047ac13568e1e30fe63c4a7ad1b3ee3a5700df34321d62077e63633c575c1c954514e99da7c179d",
},
}
for v, _ in test_vectors {
scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
derived_point: [x448.POINT_SIZE]byte
x448.scalarmult(derived_point[:], scalar[:], point[:])
derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
testing.expectf(
t,
derived_point_str == v.product,
"Expected %s for %s * %s, but got %s instead",
v.product,
v.scalar,
v.point,
derived_point_str,
)
// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
p1, p2: [x448.POINT_SIZE]byte
x448.scalarmult_basepoint(p1[:], scalar[:])
x448.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
p1_str := string(hex.encode(p1[:], context.temp_allocator))
p2_str := string(hex.encode(p2[:], context.temp_allocator))
testing.expectf(
t,
p1_str == p2_str,
"Expected %s for %s * basepoint, but got %s instead",
p2_str,
v.scalar,
p1_str,
)
}
}
@(private)
ge_str :: proc(ge: ^ristretto255.Group_Element) -> string {
b: [ristretto255.ELEMENT_SIZE]byte