core/crypto/x448: Initial import

2026-01-07 21:43:15 +00:00 · 2024-08-20 12:03:04 +09:00
parent bb395aeb41
commit 9fdcc4e39a
6 changed files with 1542 additions and 0 deletions
--- a/core/crypto/_fiat/field_curve448/field.odin
+++ b/core/crypto/_fiat/field_curve448/field.odin
@@ -0,0 +1,235 @@
+package field_curve448
+
+import "core:mem"
+
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
+	return (^Loose_Field_Element)(arg1)
+}
+
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
+	return (^Tight_Field_Element)(arg1)
+}
+
+fe_clear :: proc "contextless" (
+	arg1: $T,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mem.zero_explicit(arg1, size_of(arg1^))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: $T,
+) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
+fe_carry_mul_small :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: u64,
+) {
+	arg2_ := Loose_Field_Element{arg2, 0, 0, 0, 0, 0, 0, 0}
+	fe_carry_mul(out1, arg1, &arg2_)
+}
+
+fe_carry_pow2k :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: uint,
+) {
+	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
+	if arg2 == 0 {
+		fe_one(out1)
+		return
+	}
+
+	fe_carry_square(out1, arg1)
+	for _ in 1 ..< arg2 {
+		fe_carry_square(out1, fe_relax_cast(out1))
+	}
+}
+
+fe_carry_inv :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+) {
+	// Inversion computation is derived from the addition chain:
+	//
+	//	_10     = 2*1
+	//	_11     = 1 + _10
+	//	_110    = 2*_11
+	//	_111    = 1 + _110
+	//	_111000 = _111 << 3
+	//	_111111 = _111 + _111000
+	//	x12     = _111111 << 6 + _111111
+	//	x24     = x12 << 12 + x12
+	//	i34     = x24 << 6
+	//	x30     = _111111 + i34
+	//	x48     = i34 << 18 + x24
+	//	x96     = x48 << 48 + x48
+	//	x192    = x96 << 96 + x96
+	//	x222    = x192 << 30 + x30
+	//	x223    = 2*x222 + 1
+	//	return    (x223 << 223 + x222) << 2 + 1
+	//
+	// Operations: 447 squares 13 multiplies
+	//
+	// Generated by github.com/mmcloughlin/addchain v0.4.0.
+
+	t0, t1, t2: Tight_Field_Element = ---, ---, ---
+
+	// Step 1: t0 = x^0x2
+	fe_carry_square(&t0, arg1)
+
+	// Step 2: t0 = x^0x3
+	fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
+
+	// t0.Sqr(t0)
+	fe_carry_square(&t0, fe_relax_cast(&t0))
+
+	// Step 4: t0 = x^0x7
+	fe_carry_mul(&t0, arg1, fe_relax_cast(&t0))
+
+	// Step 7: t1 = x^0x38
+	fe_carry_pow2k(&t1, fe_relax_cast(&t0), 3)
+
+	// Step 8: t0 = x^0x3f
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 14: t1 = x^0xfc0
+	fe_carry_pow2k(&t1, fe_relax_cast(&t0), 6)
+
+	// Step 15: t1 = x^0xfff
+	fe_carry_mul(&t1, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 27: t2 = x^0xfff000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 12)
+
+	// Step 28: t1 = x^0xffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 34: t2 = x^0x3fffffc0
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 6)
+
+	// Step 35: t0 = x^0x3fffffff
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t2))
+
+	// Step 53: t2 = x^0xffffff000000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t2), 18)
+
+	// Step 54: t1 = x^0xffffffffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 102: t2 = x^0xffffffffffff000000000000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 48)
+
+	// Step 103: t1 = x^0xffffffffffffffffffffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 199: t2 = x^0xffffffffffffffffffffffff000000000000000000000000
+	fe_carry_pow2k(&t2, fe_relax_cast(&t1), 96)
+
+	// Step 200: t1 = x^0xffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t1, fe_relax_cast(&t1), fe_relax_cast(&t2))
+
+	// Step 230: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffc0000000
+	fe_carry_pow2k(&t1, fe_relax_cast(&t1), 30)
+
+	// Step 231: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 232: t1 = x^0x7ffffffffffffffffffffffffffffffffffffffffffffffffffffffe
+	fe_carry_square(&t1, fe_relax_cast(&t0))
+
+	// Step 233: t1 = x^0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t1, arg1, fe_relax_cast(&t1))
+
+	// Step 456: t1 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffff80000000000000000000000000000000000000000000000000000000
+	fe_carry_pow2k(&t1, fe_relax_cast(&t1), 223)
+
+	// Step 457: t0 = x^0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffffffffffffffffffffffffffffffffffffffffff
+	fe_carry_mul(&t0, fe_relax_cast(&t0), fe_relax_cast(&t1))
+
+	// Step 459: t0 = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffc
+	fe_carry_pow2k(&t0, fe_relax_cast(&t0), 2)
+
+	// Step 460: z = x^0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffffffffffffffffffffffffffffffffffffffffffffffffffffd
+	fe_carry_mul(out1, arg1, fe_relax_cast(&t0))
+
+	fe_clear_vec([]^Tight_Field_Element{&t0, &t1, &t2})
+}
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+	out1[5] = 0
+	out1[6] = 0
+	out1[7] = 0
+}
+
+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 1
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+	out1[5] = 0
+	out1[6] = 0
+	out1[7] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	x5 := arg1[4]
+	x6 := arg1[5]
+	x7 := arg1[6]
+	x8 := arg1[7]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+	out1[5] = x6
+	out1[6] = x7
+	out1[7] = x8
+}
+
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	x = (out1[4] ~ out2[4]) & mask
+	x5, y5 := out1[4] ~ x, out2[4] ~ x
+	x = (out1[5] ~ out2[5]) & mask
+	x6, y6 := out1[5] ~ x, out2[5] ~ x
+	x = (out1[6] ~ out2[6]) & mask
+	x7, y7 := out1[6] ~ x, out2[6] ~ x
+	x = (out1[7] ~ out2[7]) & mask
+	x8, y8 := out1[7] ~ x, out2[7] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+	out1[4], out2[4] = x5, y5
+	out1[5], out2[5] = x6, y6
+	out1[6], out2[6] = x7, y7
+	out1[7], out2[7] = x8, y8
+}
--- a/core/crypto/_fiat/field_curve448/field51.odin
+++ b/core/crypto/_fiat/field_curve448/field51.odin
--- a/core/crypto/x448/x448.odin
+++ b/core/crypto/x448/x448.odin
@@ -0,0 +1,161 @@
+/*
+package x448 implements the X448 (aka curve448) Elliptic-Curve
+Diffie-Hellman key exchange protocol.
+
+See:
+- [[ https://www.rfc-editor.org/rfc/rfc7748 ]]
+*/
+package x448
+
+import field "core:crypto/_fiat/field_curve448"
+import "core:mem"
+
+// SCALAR_SIZE is the size of a X448 scalar (private key) in bytes.
+SCALAR_SIZE :: 56
+// POINT_SIZE is the size of a X448 point (public key/shared secret) in bytes.
+POINT_SIZE :: 56
+
+@(private, rodata)
+_BASE_POINT: [56]byte = {
+	5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0,
+}
+
+@(private)
+_scalar_bit :: #force_inline proc "contextless" (s: ^[56]byte, i: int) -> u8 {
+	if i < 0 {
+		return 0
+	}
+	return (s[i >> 3] >> uint(i & 7)) & 1
+}
+
+@(private)
+_scalarmult :: proc "contextless" (out, scalar, point: ^[56]byte) {
+	// Montgomery pseudo-multiplication, using the RFC 7748 formula.
+	t1, t2: field.Loose_Field_Element = ---, ---
+
+	// x_1 = u
+	// x_2 = 1
+	// z_2 = 0
+	// x_3 = u
+	// z_3 = 1
+	x1: field.Tight_Field_Element = ---
+	field.fe_from_bytes(&x1, point)
+
+	x2, x3, z2, z3: field.Tight_Field_Element = ---, ---, ---, ---
+	field.fe_one(&x2)
+	field.fe_zero(&z2)
+	field.fe_set(&x3, &x1)
+	field.fe_one(&z3)
+
+	// swap = 0
+	swap: int
+
+	// For t = bits-1 down to 0:a
+	for t := 448 - 1; t >= 0; t -= 1 {
+		// k_t = (k >> t) & 1
+		k_t := int(_scalar_bit(scalar, t))
+		// swap ^= k_t
+		swap ~= k_t
+		// Conditional swap; see text below.
+		// (x_2, x_3) = cswap(swap, x_2, x_3)
+		field.fe_cond_swap(&x2, &x3, swap)
+		// (z_2, z_3) = cswap(swap, z_2, z_3)
+		field.fe_cond_swap(&z2, &z3, swap)
+		// swap = k_t
+		swap = k_t
+
+		// Note: This deliberately omits reductions after add/sub operations
+		// if the result is only ever used as the input to a mul/square since
+		// the implementations of those can deal with non-reduced inputs.
+		//
+		// fe_tighten_cast is only used to store a fully reduced
+		// output in a Loose_Field_Element, or to provide such a
+		// Loose_Field_Element as a Tight_Field_Element argument.
+
+		// A = x_2 + z_2
+		field.fe_add(&t1, &x2, &z2)
+		// B = x_2 - z_2
+		field.fe_sub(&t2, &x2, &z2)
+		// D = x_3 - z_3
+		field.fe_sub(field.fe_relax_cast(&z2), &x3, &z3) // (z2 unreduced)
+		// DA = D * A
+		field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
+		// C = x_3 + z_3
+		field.fe_add(field.fe_relax_cast(&z3), &x3, &z3) // (z3 unreduced)
+		// CB = C * B
+		field.fe_carry_mul(&x3, &t2, field.fe_relax_cast(&z3))
+		// z_3 = x_1 * (DA - CB)^2
+		field.fe_sub(field.fe_relax_cast(&z3), &x2, &x3) // (z3 unreduced)
+		field.fe_carry_square(&z3, field.fe_relax_cast(&z3))
+		field.fe_carry_mul(&z3, field.fe_relax_cast(&x1), field.fe_relax_cast(&z3))
+		// x_3 = (DA + CB)^2
+		field.fe_add(field.fe_relax_cast(&z2), &x2, &x3) // (z2 unreduced)
+		field.fe_carry_square(&x3, field.fe_relax_cast(&z2))
+
+		// AA = A^2
+		field.fe_carry_square(&z2, &t1)
+		// BB = B^2
+		field.fe_carry_square(field.fe_tighten_cast(&t1), &t2) // (t1 reduced)
+		// x_2 = AA * BB
+		field.fe_carry_mul(&x2, field.fe_relax_cast(&z2), &t1)
+		// E = AA - BB
+		field.fe_sub(&t2, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
+		// z_2 = E * (AA + a24 * E)
+		field.fe_carry_mul_small(field.fe_tighten_cast(&t1), &t2, 39081) // (t1 reduced)
+		field.fe_add(&t1, &z2, field.fe_tighten_cast(&t1)) // (t1 (input) is reduced)
+		field.fe_carry_mul(&z2, &t2, &t1)
+	}
+
+	// Conditional swap; see text below.
+	// (x_2, x_3) = cswap(swap, x_2, x_3)
+	field.fe_cond_swap(&x2, &x3, swap)
+	// (z_2, z_3) = cswap(swap, z_2, z_3)
+	field.fe_cond_swap(&z2, &z3, swap)
+
+	// Return x_2 * (z_2^(p - 2))
+	field.fe_carry_inv(&z2, field.fe_relax_cast(&z2))
+	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
+	field.fe_to_bytes(out, &x2)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&t1, &t2})
+}
+
+// scalarmult "multiplies" the provided scalar and point, and writes the
+// resulting point to dst.
+scalarmult :: proc(dst, scalar, point: []byte) {
+	if len(scalar) != SCALAR_SIZE {
+		panic("crypto/x448: invalid scalar size")
+	}
+	if len(point) != POINT_SIZE {
+		panic("crypto/x448: invalid point size")
+	}
+	if len(dst) != POINT_SIZE {
+		panic("crypto/x448: invalid destination point size")
+	}
+
+	// "clamp" the scalar
+	e: [56]byte = ---
+	copy_slice(e[:], scalar)
+	e[0] &= 252
+	e[55] |= 128
+
+	p: [56]byte = ---
+	copy_slice(p[:], point)
+
+	d: [56]byte = ---
+	_scalarmult(&d, &e, &p)
+	copy_slice(dst, d[:])
+
+	mem.zero_explicit(&e, size_of(e))
+	mem.zero_explicit(&d, size_of(d))
+}
+
+// scalarmult_basepoint "multiplies" the provided scalar with the X448
+// base point and writes the resulting point to dst.
+scalarmult_basepoint :: proc(dst, scalar: []byte) {
+	scalarmult(dst, scalar, _BASE_POINT[:])
+}
--- a/examples/all/all_main.odin
+++ b/examples/all/all_main.odin
@@ -48,6 +48,7 @@ import shake            "core:crypto/shake"
 import sm3              "core:crypto/sm3"
 import tuplehash        "core:crypto/tuplehash"
 import x25519           "core:crypto/x25519"
+import x448             "core:crypto/x448"

 import pe               "core:debug/pe"
 import trace            "core:debug/trace"
@@ -190,6 +191,7 @@ _ :: shake
 _ :: sm3
 _ :: tuplehash
 _ :: x25519
+_ :: x448
 _ :: pe
 _ :: trace
 _ :: dynlib
--- a/tests/benchmark/crypto/benchmark_crypto.odin
+++ b/tests/benchmark/crypto/benchmark_crypto.odin
@@ -14,6 +14,7 @@ import "core:crypto/chacha20poly1305"
 import "core:crypto/ed25519"
 import "core:crypto/poly1305"
 import "core:crypto/x25519"
+import "core:crypto/x448"

 // Cryptographic primitive benchmarks.

@@ -237,6 +238,26 @@ benchmark_crypto :: proc(t: ^testing.T) {
 			time.duration_microseconds(elapsed) / iters,
 		)
 	}
+	{
+		point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
+		scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
+
+		point, _ := hex.decode(transmute([]byte)(point_str), context.temp_allocator)
+		scalar, _ := hex.decode(transmute([]byte)(scalar_str), context.temp_allocator)
+		out: [x448.POINT_SIZE]byte = ---
+
+		iters :: 10000
+		start := time.now()
+		for i := 0; i < iters; i = i + 1 {
+			x448.scalarmult(out[:], scalar[:], point[:])
+		}
+		elapsed := time.since(start)
+
+		fmt.sbprintfln(&str,
+			"x448.scalarmult: ~%f us/op",
+			time.duration_microseconds(elapsed) / iters,
+		)
+	}
 }

@(private)
--- a/tests/core/crypto/test_core_crypto_ecc25519.odin
+++ b/tests/core/crypto/test_core_crypto_ecc25519.odin
@@ -7,6 +7,7 @@ import field "core:crypto/_fiat/field_curve25519"
 import "core:crypto/ed25519"
 import "core:crypto/ristretto255"
 import "core:crypto/x25519"
+import "core:crypto/x448"

@(test)
 test_sqrt_ratio_m1 :: proc(t: ^testing.T) {
@@ -684,6 +685,68 @@ test_x25519 :: proc(t: ^testing.T) {
 	}
 }

+@(test)
+test_x448 :: proc(t: ^testing.T) {
+	// Local copy of this so that the base point doesn't need to be exported.
+	_BASE_POINT: [56]byte = {
+		5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+	}
+
+	test_vectors := []struct {
+		scalar:  string,
+		point:   string,
+		product: string,
+	} {
+		// Test vectors from RFC 7748
+		{
+			"3d262fddf9ec8e88495266fea19a34d28882acef045104d0d1aae121700a779c984c24f8cdd78fbff44943eba368f54b29259a4f1c600ad3",
+			"06fce640fa3487bfda5f6cf2d5263f8aad88334cbd07437f020f08f9814dc031ddbdc38c19c6da2583fa5429db94ada18aa7a7fb4ef8a086",
+			"ce3e4ff95a60dc6697da1db1d85e6afbdf79b50a2412d7546d5f239fe14fbaadeb445fc66a01b0779d98223961111e21766282f73dd96b6f",
+		},
+		{
+			"203d494428b8399352665ddca42f9de8fef600908e0d461cb021f8c538345dd77c3e4806e25f46d3315c44e0a5b4371282dd2c8d5be3095f",
+			"0fbcc2f993cd56d3305b0b7d9e55d4c1a8fb5dbb52f8e9a1e9b6201b165d015894e56c4d3570bee52fe205e28a78b91cdfbde71ce8d157db",
+			"884a02576239ff7a2f2f63b2db6a9ff37047ac13568e1e30fe63c4a7ad1b3ee3a5700df34321d62077e63633c575c1c954514e99da7c179d",
+		},
+	}
+	for v, _ in test_vectors {
+		scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
+		point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
+
+		derived_point: [x448.POINT_SIZE]byte
+		x448.scalarmult(derived_point[:], scalar[:], point[:])
+		derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
+
+		testing.expectf(
+			t,
+			derived_point_str == v.product,
+			"Expected %s for %s * %s, but got %s instead",
+			v.product,
+			v.scalar,
+			v.point,
+			derived_point_str,
+			)
+
+		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
+		p1, p2: [x448.POINT_SIZE]byte
+		x448.scalarmult_basepoint(p1[:], scalar[:])
+		x448.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
+		p1_str := string(hex.encode(p1[:], context.temp_allocator))
+		p2_str := string(hex.encode(p2[:], context.temp_allocator))
+		testing.expectf(
+			t,
+			p1_str == p2_str,
+			"Expected %s for %s * basepoint, but got %s instead",
+			p2_str,
+			v.scalar,
+			p1_str,
+		)
+	}
+}
+
@(private)
 ge_str :: proc(ge: ^ristretto255.Group_Element) -> string {
 	b: [ristretto255.ELEMENT_SIZE]byte