From a43a5b053c1d1e931eeb56d65e6a40f634a0b94f Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sun, 24 Mar 2024 22:52:21 +0900 Subject: [PATCH 01/14] core/crypto: Add more documentation about assumptions (NFC) --- core/crypto/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/crypto/README.md b/core/crypto/README.md index 1e4e41fb8..303b1f625 100644 --- a/core/crypto/README.md +++ b/core/crypto/README.md @@ -14,6 +14,14 @@ constant-time byte comparison. - Best-effort is make to mitigate timing side-channels on reasonable architectures. Architectures that are known to be unreasonable include but are not limited to i386, i486, and WebAssembly. +- Implementations assume a 64-bit architecture (64-bit integer arithmetic + is fast, and includes add-with-carry, sub-with-borrow, and full-result + multiply). +- Hardware sidechannels are explicitly out of scope for this package. + Notable examples include but are not limited to: + - Power/RF side-channels etc. + - Fault injection attacks etc. + - Hardware vulnerabilities ("apply mitigations or buy a new CPU"). - The packages attempt to santize sensitive data, however this is, and will remain a "best-effort" implementation decision. As Thomas Pornin puts it "In general, such memory cleansing is a fool's quest." From b155fdf8c96d6269fe0f56a3fda76a3df1e5a7c8 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sun, 17 Mar 2024 10:29:59 +0900 Subject: [PATCH 02/14] core/crypto: Add `has_rand_bytes` This allows runtime detection as to if `rand_bytes` is supported or not, and lets us enable the test-case on all of the supported targets. --- core/crypto/crypto.odin | 10 ++++++++++ core/crypto/rand_bsd.odin | 4 ++++ core/crypto/rand_darwin.odin | 4 ++++ core/crypto/rand_generic.odin | 4 ++++ core/crypto/rand_js.odin | 4 ++++ core/crypto/rand_linux.odin | 4 ++++ core/crypto/rand_windows.odin | 4 ++++ tests/core/crypto/test_core_crypto.odin | 2 +- 8 files changed, 35 insertions(+), 1 deletion(-) diff --git a/core/crypto/crypto.odin b/core/crypto/crypto.odin index 6cdcacb9c..05f25111a 100644 --- a/core/crypto/crypto.odin +++ b/core/crypto/crypto.odin @@ -1,3 +1,7 @@ +/* +package crypto implements a selection of cryptography algorithms and useful +helper routines. +*/ package crypto import "core:mem" @@ -51,3 +55,9 @@ rand_bytes :: proc (dst: []byte) { _rand_bytes(dst) } + +// has_rand_bytes returns true iff the target has support for accessing the +// system entropty source. +has_rand_bytes :: proc () -> bool { + return _has_rand_bytes() +} diff --git a/core/crypto/rand_bsd.odin b/core/crypto/rand_bsd.odin index 8e2be1d95..61eaf652f 100644 --- a/core/crypto/rand_bsd.odin +++ b/core/crypto/rand_bsd.odin @@ -10,3 +10,7 @@ foreign libc { _rand_bytes :: proc(dst: []byte) { arc4random_buf(raw_data(dst), len(dst)) } + +_has_rand_bytes :: proc () -> bool { + return true +} diff --git a/core/crypto/rand_darwin.odin b/core/crypto/rand_darwin.odin index ec44c1491..2864b46dd 100644 --- a/core/crypto/rand_darwin.odin +++ b/core/crypto/rand_darwin.odin @@ -10,3 +10,7 @@ _rand_bytes :: proc(dst: []byte) { panic(fmt.tprintf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", res, msg)) } } + +_has_rand_bytes :: proc () -> bool { + return true +} diff --git a/core/crypto/rand_generic.odin b/core/crypto/rand_generic.odin index bf7abbbe2..006ca51fe 100644 --- a/core/crypto/rand_generic.odin +++ b/core/crypto/rand_generic.odin @@ -9,3 +9,7 @@ package crypto _rand_bytes :: proc(dst: []byte) { unimplemented("crypto: rand_bytes not supported on this OS") } + +_has_rand_bytes :: proc () -> bool { + return false +} diff --git a/core/crypto/rand_js.odin b/core/crypto/rand_js.odin index 353b1e6b9..cb2711404 100644 --- a/core/crypto/rand_js.odin +++ b/core/crypto/rand_js.odin @@ -18,3 +18,7 @@ _rand_bytes :: proc(dst: []byte) { dst = dst[to_read:] } } + +_has_rand_bytes :: proc () -> bool { + return true +} diff --git a/core/crypto/rand_linux.odin b/core/crypto/rand_linux.odin index 86fc425d6..05c05597d 100644 --- a/core/crypto/rand_linux.odin +++ b/core/crypto/rand_linux.odin @@ -34,3 +34,7 @@ _rand_bytes :: proc (dst: []byte) { dst = dst[n_read:] } } + +_has_rand_bytes :: proc () -> bool { + return true +} diff --git a/core/crypto/rand_windows.odin b/core/crypto/rand_windows.odin index 53b58c776..e1d9f6118 100644 --- a/core/crypto/rand_windows.odin +++ b/core/crypto/rand_windows.odin @@ -21,3 +21,7 @@ _rand_bytes :: proc(dst: []byte) { } } } + +_has_rand_bytes :: proc () -> bool { + return true +} diff --git a/tests/core/crypto/test_core_crypto.odin b/tests/core/crypto/test_core_crypto.odin index a6d399097..742e3cc04 100644 --- a/tests/core/crypto/test_core_crypto.odin +++ b/tests/core/crypto/test_core_crypto.odin @@ -347,7 +347,7 @@ test_x25519 :: proc(t: ^testing.T) { test_rand_bytes :: proc(t: ^testing.T) { tc.log(t, "Testing rand_bytes") - if ODIN_OS != .Linux { + if !crypto.has_rand_bytes() { tc.log(t, "rand_bytes not supported - skipping") return } From f9b9521bf07ffece22b24ac02ae4261e3d8b3c50 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sun, 17 Mar 2024 10:39:10 +0900 Subject: [PATCH 03/14] core/crypto/_fiat/field_curve25519: Use multiply to calculate the mask Largely for consistency with the generic code, either is valid with Odin semantics, but this is easier to comprehend. --- core/crypto/_fiat/field_curve25519/field51.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index 0be94eb51..1a731b31b 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -599,7 +599,7 @@ fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { @(optimization_mode="none") fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { - mask := -u64(arg1) + mask := (u64(arg1) * 0xffffffffffffffff) x := (out1[0] ~ out2[0]) & mask x1, y1 := out1[0] ~ x, out2[0] ~ x x = (out1[1] ~ out2[1]) & mask From 9a418fd27bcf5600ac16d74649f01e35bb8e626c Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sun, 17 Mar 2024 19:02:11 +0900 Subject: [PATCH 04/14] core/crypto/_fiat/field_curve25519: Move routines (NFC) --- core/crypto/_fiat/field_curve25519/field.odin | 49 +++++++++++++++++ .../_fiat/field_curve25519/field51.odin | 53 ------------------- 2 files changed, 49 insertions(+), 53 deletions(-) diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin index faf8ae3f7..a8e0a0316 100644 --- a/core/crypto/_fiat/field_curve25519/field.odin +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -136,3 +136,52 @@ fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { mem.zero_explicit(&tmp1, size_of(tmp1)) } + +fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 0 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 + out1[4] = 0 +} + +fe_one :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 1 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 + out1[4] = 0 +} + +fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + x4 := arg1[3] + x5 := arg1[4] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} + +@(optimization_mode="none") +fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { + mask := (u64(arg1) * 0xffffffffffffffff) + x := (out1[0] ~ out2[0]) & mask + x1, y1 := out1[0] ~ x, out2[0] ~ x + x = (out1[1] ~ out2[1]) & mask + x2, y2 := out1[1] ~ x, out2[1] ~ x + x = (out1[2] ~ out2[2]) & mask + x3, y3 := out1[2] ~ x, out2[2] ~ x + x = (out1[3] ~ out2[3]) & mask + x4, y4 := out1[3] ~ x, out2[3] ~ x + x = (out1[4] ~ out2[4]) & mask + x5, y5 := out1[4] ~ x, out2[4] ~ x + out1[0], out2[0] = x1, y1 + out1[1], out2[1] = x2, y2 + out1[2], out2[2] = x3, y3 + out1[3], out2[3] = x4, y4 + out1[4], out2[4] = x5, y5 +} diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index 1a731b31b..3cbc296b7 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -30,8 +30,6 @@ package field_curve25519 // // While the base implementation is provably correct, this implementation // makes no such claims as the port and optimizations were done by hand. -// At some point, it may be worth adding support to fiat-crypto for -// generating Odin output. // // TODO: // * When fiat-crypto supports it, using a saturated 64-bit limbs @@ -565,54 +563,3 @@ fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_El out1[3] = x27 out1[4] = x32 } - -// The following routines were added by hand, and do not come from fiat-crypto. - -fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { - out1[0] = 0 - out1[1] = 0 - out1[2] = 0 - out1[3] = 0 - out1[4] = 0 -} - -fe_one :: proc "contextless" (out1: ^Tight_Field_Element) { - out1[0] = 1 - out1[1] = 0 - out1[2] = 0 - out1[3] = 0 - out1[4] = 0 -} - -fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { - x1 := arg1[0] - x2 := arg1[1] - x3 := arg1[2] - x4 := arg1[3] - x5 := arg1[4] - out1[0] = x1 - out1[1] = x2 - out1[2] = x3 - out1[3] = x4 - out1[4] = x5 -} - -@(optimization_mode="none") -fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { - mask := (u64(arg1) * 0xffffffffffffffff) - x := (out1[0] ~ out2[0]) & mask - x1, y1 := out1[0] ~ x, out2[0] ~ x - x = (out1[1] ~ out2[1]) & mask - x2, y2 := out1[1] ~ x, out2[1] ~ x - x = (out1[2] ~ out2[2]) & mask - x3, y3 := out1[2] ~ x, out2[2] ~ x - x = (out1[3] ~ out2[3]) & mask - x4, y4 := out1[3] ~ x, out2[3] ~ x - x = (out1[4] ~ out2[4]) & mask - x5, y5 := out1[4] ~ x, out2[4] ~ x - out1[0], out2[0] = x1, y1 - out1[1], out2[1] = x2, y2 - out1[2], out2[2] = x3, y3 - out1[3], out2[3] = x4, y4 - out1[4], out2[4] = x5, y5 -} From 31aba5a7280dd5c8fe70d960058002fd682baa57 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sun, 17 Mar 2024 19:06:50 +0900 Subject: [PATCH 05/14] core/crypto/_fiat/field_poly1305: Move routines (NFC) --- core/crypto/_fiat/field_poly1305/field.odin | 29 +++++++++++++++++ .../_fiat/field_poly1305/field4344.odin | 31 ------------------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin index a103f6fc7..9b00ff3ec 100644 --- a/core/crypto/_fiat/field_poly1305/field.odin +++ b/core/crypto/_fiat/field_poly1305/field.odin @@ -51,3 +51,32 @@ fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) { // This routine is only used to deserialize `r` which is confidential. mem.zero_explicit(&tmp, size_of(tmp)) } + +fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { + out1[0] = 0 + out1[1] = 0 + out1[2] = 0 +} + +fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 +} + +@(optimization_mode="none") +fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) { + mask := -u64(arg1) + x := (out1[0] ~ out2[0]) & mask + x1, y1 := out1[0] ~ x, out2[0] ~ x + x = (out1[1] ~ out2[1]) & mask + x2, y2 := out1[1] ~ x, out2[1] ~ x + x = (out1[2] ~ out2[2]) & mask + x3, y3 := out1[2] ~ x, out2[2] ~ x + out1[0], out2[0] = x1, y1 + out1[1], out2[1] = x2, y2 + out1[2], out2[2] = x3, y3 +} diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin index 8e8a7cc78..ddc10de52 100644 --- a/core/crypto/_fiat/field_poly1305/field4344.odin +++ b/core/crypto/_fiat/field_poly1305/field4344.odin @@ -325,34 +325,3 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E out1[1] = x2 out1[2] = x3 } - -// The following routines were added by hand, and do not come from fiat-crypto. - -fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { - out1[0] = 0 - out1[1] = 0 - out1[2] = 0 -} - -fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { - x1 := arg1[0] - x2 := arg1[1] - x3 := arg1[2] - out1[0] = x1 - out1[1] = x2 - out1[2] = x3 -} - -@(optimization_mode="none") -fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) { - mask := -u64(arg1) - x := (out1[0] ~ out2[0]) & mask - x1, y1 := out1[0] ~ x, out2[0] ~ x - x = (out1[1] ~ out2[1]) & mask - x2, y2 := out1[1] ~ x, out2[1] ~ x - x = (out1[2] ~ out2[2]) & mask - x3, y3 := out1[2] ~ x, out2[2] ~ x - out1[0], out2[0] = x1, y1 - out1[1], out2[1] = x2, y2 - out1[2], out2[2] = x3, y3 -} From c951cbdbbcb1bce484ee79ffcf3c288fde64b802 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Sun, 17 Mar 2024 19:08:30 +0900 Subject: [PATCH 06/14] core/crypto/_fiat: odinfmt (NFC) --- core/crypto/_fiat/fiat.odin | 4 +- core/crypto/_fiat/field_curve25519/field.odin | 20 +++++---- .../_fiat/field_curve25519/field51.odin | 31 +++++++++---- core/crypto/_fiat/field_poly1305/field.odin | 17 +++++--- .../_fiat/field_poly1305/field4344.odin | 43 +++++++++++++++---- 5 files changed, 84 insertions(+), 31 deletions(-) diff --git a/core/crypto/_fiat/fiat.odin b/core/crypto/_fiat/fiat.odin index f0551722f..cc73c6927 100644 --- a/core/crypto/_fiat/fiat.odin +++ b/core/crypto/_fiat/fiat.odin @@ -9,7 +9,7 @@ package fiat u1 :: distinct u8 i1 :: distinct i8 -@(optimization_mode="none") +@(optimization_mode = "none") cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) { x1 := (u64(arg1) * 0xffffffffffffffff) x2 := ((x1 & arg3) | ((~x1) & arg2)) @@ -17,7 +17,7 @@ cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) { return } -@(optimization_mode="none") +@(optimization_mode = "none") cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) { x1 := (u32(arg1) * 0xffffffff) x2 := ((x1 & arg3) | ((~x1) & arg2)) diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin index a8e0a0316..cf7f694bc 100644 --- a/core/crypto/_fiat/field_curve25519/field.odin +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -3,11 +3,15 @@ package field_curve25519 import "core:crypto" import "core:mem" -fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element { +fe_relax_cast :: #force_inline proc "contextless" ( + arg1: ^Tight_Field_Element, +) -> ^Loose_Field_Element { return transmute(^Loose_Field_Element)(arg1) } -fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element { +fe_tighten_cast :: #force_inline proc "contextless" ( + arg1: ^Loose_Field_Element, +) -> ^Tight_Field_Element { return transmute(^Tight_Field_Element)(arg1) } @@ -46,7 +50,7 @@ fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byt return ret } -fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) { +fe_carry_pow2k :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) { // Special case: `arg1^(2 * 0) = 1`, though this should never happen. if arg2 == 0 { fe_one(out1) @@ -54,7 +58,7 @@ fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, } fe_carry_square(out1, arg1) - for _ in 1.. int { +fe_carry_invsqrt :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int { // Inverse square root taken from Monocypher. tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, --- @@ -116,7 +120,7 @@ fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element // then isr = x^((p-1)/4) * sqrt(-1) // else isr = x^((p-1)/4) fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1)) - fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1) + fe_cond_assign(out1, &tmp1, (m1 | ms) ~ 1) mem.zero_explicit(&tmp1, size_of(tmp1)) mem.zero_explicit(&tmp2, size_of(tmp2)) @@ -126,7 +130,7 @@ fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element return p1 | m1 } -fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_inv :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { tmp1: Tight_Field_Element fe_carry_square(&tmp1, arg1) @@ -166,7 +170,7 @@ fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) { out1[4] = x5 } -@(optimization_mode="none") +@(optimization_mode = "none") fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) { mask := (u64(arg1) * 0xffffffffffffffff) x := (out1[0] ~ out2[0]) & mask diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index 3cbc296b7..4cda96c81 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -42,7 +42,7 @@ import "core:math/bits" Loose_Field_Element :: distinct [5]u64 Tight_Field_Element :: distinct [5]u64 -SQRT_M1 := Tight_Field_Element{ +SQRT_M1 := Tight_Field_Element { 1718705420411056, 234908883556509, 2233514472574048, @@ -50,7 +50,13 @@ SQRT_M1 := Tight_Field_Element{ 765476049583133, } -_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { +_addcarryx_u51 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { x1 := ((u64(arg1) + arg2) + arg3) x2 := (x1 & 0x7ffffffffffff) x3 := fiat.u1((x1 >> 51)) @@ -59,7 +65,13 @@ _addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u return } -_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { +_subborrowx_u51 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { x1 := ((i64(arg2) - i64(arg1)) - i64(arg3)) x2 := fiat.i1((x1 >> 51)) x3 := (u64(x1) & 0x7ffffffffffff) @@ -68,7 +80,7 @@ _subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: return } -fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { +fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13)) x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13)) x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13)) @@ -167,7 +179,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme out1[4] = x152 } -fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { x1 := (arg1[4] * 0x13) x2 := (x1 * 0x2) x3 := (arg1[4] * 0x2) @@ -303,8 +315,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele out1[4] = x5 } -@(optimization_mode="none") -fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) { +@(optimization_mode = "none") +fe_cond_assign :: #force_no_inline proc "contextless" ( + out1, arg1: ^Tight_Field_Element, + arg2: int, +) { x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) @@ -525,7 +540,7 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E out1[4] = x5 } -fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_scmul_121666 :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { x2, x1 := bits.mul_u64(0x1db42, arg1[4]) x4, x3 := bits.mul_u64(0x1db42, arg1[3]) x6, x5 := bits.mul_u64(0x1db42, arg1[2]) diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin index 9b00ff3ec..f5557cf5f 100644 --- a/core/crypto/_fiat/field_poly1305/field.odin +++ b/core/crypto/_fiat/field_poly1305/field.odin @@ -3,15 +3,19 @@ package field_poly1305 import "core:encoding/endian" import "core:mem" -fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element { +fe_relax_cast :: #force_inline proc "contextless" ( + arg1: ^Tight_Field_Element, +) -> ^Loose_Field_Element { return transmute(^Loose_Field_Element)(arg1) } -fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element { +fe_tighten_cast :: #force_inline proc "contextless" ( + arg1: ^Loose_Field_Element, +) -> ^Tight_Field_Element { return transmute(^Tight_Field_Element)(arg1) } -fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) { +fe_from_bytes :: #force_inline proc(out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) { // fiat-crypto's deserialization routine effectively processes a // single byte at a time, and wants 256-bits of input for a value // that will be 128-bits or 129-bits. @@ -67,8 +71,11 @@ fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { out1[2] = x3 } -@(optimization_mode="none") -fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) { +@(optimization_mode = "none") +fe_cond_swap :: #force_no_inline proc "contextless" ( + out1, out2: ^Tight_Field_Element, + arg1: bool, +) { mask := -u64(arg1) x := (out1[0] ~ out2[0]) & mask x1, y1 := out1[0] ~ x, out2[0] ~ x diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin index ddc10de52..bddb0c322 100644 --- a/core/crypto/_fiat/field_poly1305/field4344.odin +++ b/core/crypto/_fiat/field_poly1305/field4344.odin @@ -39,7 +39,13 @@ import "core:math/bits" Loose_Field_Element :: distinct [3]u64 Tight_Field_Element :: distinct [3]u64 -_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { +_addcarryx_u44 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { x1 := ((u64(arg1) + arg2) + arg3) x2 := (x1 & 0xfffffffffff) x3 := fiat.u1((x1 >> 44)) @@ -48,7 +54,13 @@ _addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u return } -_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { +_subborrowx_u44 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { x1 := ((i64(arg2) - i64(arg1)) - i64(arg3)) x2 := fiat.i1((x1 >> 44)) x3 := (u64(x1) & 0xfffffffffff) @@ -57,7 +69,13 @@ _subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: return } -_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { +_addcarryx_u43 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { x1 := ((u64(arg1) + arg2) + arg3) x2 := (x1 & 0x7ffffffffff) x3 := fiat.u1((x1 >> 43)) @@ -66,7 +84,13 @@ _addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u return } -_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) { +_subborrowx_u43 :: #force_inline proc "contextless" ( + arg1: fiat.u1, + arg2, arg3: u64, +) -> ( + out1: u64, + out2: fiat.u1, +) { x1 := ((i64(arg2) - i64(arg1)) - i64(arg3)) x2 := fiat.i1((x1 >> 43)) x3 := (u64(x1) & 0x7ffffffffff) @@ -75,7 +99,7 @@ _subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: return } -fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { +fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5)) x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa)) x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa)) @@ -120,7 +144,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme out1[2] = x62 } -fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { x1 := (arg1[2] * 0x5) x2 := (x1 * 0x2) x3 := (arg1[2] * 0x2) @@ -201,8 +225,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele out1[2] = x3 } -@(optimization_mode="none") -fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) { +@(optimization_mode = "none") +fe_cond_assign :: #force_no_inline proc "contextless" ( + out1, arg1: ^Tight_Field_Element, + arg2: bool, +) { x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) From 1ce279e6a1dd59f4bffc33acc4cc281e4c45d441 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Wed, 20 Mar 2024 23:17:05 +0900 Subject: [PATCH 07/14] core/crypto/_fiat/field_curve25519: Mark more functions contextless --- core/crypto/_fiat/field_curve25519/field.odin | 13 ++++++++++--- core/crypto/_fiat/field_curve25519/field51.odin | 9 ++++++--- core/crypto/x25519/x25519.odin | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin index cf7f694bc..64f9f8a1f 100644 --- a/core/crypto/_fiat/field_curve25519/field.odin +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -50,7 +50,11 @@ fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byt return ret } -fe_carry_pow2k :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) { +fe_carry_pow2k :: proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: ^Loose_Field_Element, + arg2: uint, +) { // Special case: `arg1^(2 * 0) = 1`, though this should never happen. if arg2 == 0 { fe_one(out1) @@ -68,7 +72,10 @@ fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Eleme fe_carry(out1, fe_relax_cast(out1)) } -fe_carry_invsqrt :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int { +fe_carry_invsqrt :: proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: ^Loose_Field_Element, +) -> int { // Inverse square root taken from Monocypher. tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, --- @@ -130,7 +137,7 @@ fe_carry_invsqrt :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) return p1 | m1 } -fe_carry_inv :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { tmp1: Tight_Field_Element fe_carry_square(&tmp1, arg1) diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index 4cda96c81..81dca19e2 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -80,7 +80,7 @@ _subborrowx_u51 :: #force_inline proc "contextless" ( return } -fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { +fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13)) x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13)) x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13)) @@ -179,7 +179,7 @@ fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Elemen out1[4] = x152 } -fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { x1 := (arg1[4] * 0x13) x2 := (x1 * 0x2) x3 := (arg1[4] * 0x2) @@ -540,7 +540,10 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E out1[4] = x5 } -fe_carry_scmul_121666 :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_scmul_121666 :: proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: ^Loose_Field_Element, +) { x2, x1 := bits.mul_u64(0x1db42, arg1[4]) x4, x3 := bits.mul_u64(0x1db42, arg1[3]) x6, x5 := bits.mul_u64(0x1db42, arg1[2]) diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin index 285666a32..3cd247cf8 100644 --- a/core/crypto/x25519/x25519.odin +++ b/core/crypto/x25519/x25519.odin @@ -27,7 +27,7 @@ _scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 { } @(private) -_scalarmult :: proc(out, scalar, point: ^[32]byte) { +_scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) { // Montgomery pseduo-multiplication taken from Monocypher. // computes the scalar product From 36f3001d59f0c4e1d00f3f75431830c3b463e9f6 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Wed, 20 Mar 2024 08:02:20 +0900 Subject: [PATCH 08/14] core/crypto/_fiat/field_poly1305: Use multiply to calculate the mask --- core/crypto/_fiat/field_poly1305/field.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin index f5557cf5f..f4eccc476 100644 --- a/core/crypto/_fiat/field_poly1305/field.odin +++ b/core/crypto/_fiat/field_poly1305/field.odin @@ -76,7 +76,7 @@ fe_cond_swap :: #force_no_inline proc "contextless" ( out1, out2: ^Tight_Field_Element, arg1: bool, ) { - mask := -u64(arg1) + mask := (u64(arg1) * 0xffffffffffffffff) x := (out1[0] ~ out2[0]) & mask x1, y1 := out1[0] ~ x, out2[0] ~ x x = (out1[1] ~ out2[1]) & mask From 4defe88decb740e73aaac3f5fe197a84d32b4c1e Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Wed, 20 Mar 2024 23:21:27 +0900 Subject: [PATCH 09/14] core/crypto/_fiat/field_poly1305: Mark more functions contextless --- core/crypto/_fiat/field_poly1305/field.odin | 11 +++++++++-- core/crypto/_fiat/field_poly1305/field4344.odin | 4 ++-- core/crypto/poly1305/poly1305.odin | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin index f4eccc476..c50a56b0c 100644 --- a/core/crypto/_fiat/field_poly1305/field.odin +++ b/core/crypto/_fiat/field_poly1305/field.odin @@ -1,5 +1,6 @@ package field_poly1305 +import "base:intrinsics" import "core:encoding/endian" import "core:mem" @@ -15,7 +16,11 @@ fe_tighten_cast :: #force_inline proc "contextless" ( return transmute(^Tight_Field_Element)(arg1) } -fe_from_bytes :: #force_inline proc(out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) { +fe_from_bytes :: #force_inline proc "contextless" ( + out1: ^Tight_Field_Element, + arg1: []byte, + arg2: byte, +) { // fiat-crypto's deserialization routine effectively processes a // single byte at a time, and wants 256-bits of input for a value // that will be 128-bits or 129-bits. @@ -24,7 +29,9 @@ fe_from_bytes :: #force_inline proc(out1: ^Tight_Field_Element, arg1: []byte, ar // makes implementing the actual MAC block processing considerably // neater. - assert(len(arg1) == 16) + if len(arg1) != 16 { + intrinsics.trap() + } // While it may be unwise to do deserialization here on our // own when fiat-crypto provides equivalent functionality, diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin index bddb0c322..6a7a19d69 100644 --- a/core/crypto/_fiat/field_poly1305/field4344.odin +++ b/core/crypto/_fiat/field_poly1305/field4344.odin @@ -99,7 +99,7 @@ _subborrowx_u43 :: #force_inline proc "contextless" ( return } -fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { +fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) { x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5)) x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa)) x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa)) @@ -144,7 +144,7 @@ fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Elemen out1[2] = x62 } -fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { +fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { x1 := (arg1[2] * 0x5) x2 := (x1 * 0x2) x3 := (arg1[2] * 0x2) diff --git a/core/crypto/poly1305/poly1305.odin b/core/crypto/poly1305/poly1305.odin index 4ca4f75e1..443917a6a 100644 --- a/core/crypto/poly1305/poly1305.odin +++ b/core/crypto/poly1305/poly1305.odin @@ -168,7 +168,7 @@ reset :: proc(ctx: ^Context) { } @(private) -_blocks :: proc(ctx: ^Context, msg: []byte, final := false) { +_blocks :: proc "contextless" (ctx: ^Context, msg: []byte, final := false) { n: field.Tight_Field_Element = --- final_byte := byte(!final) From fec42a6d741bfda489a5ab2423644f3169ff4128 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Tue, 19 Mar 2024 12:23:16 +0900 Subject: [PATCH 10/14] core/crypto/_fiat/field_scalar25519: Initial import --- .../crypto/_fiat/field_scalar25519/field.odin | 149 +++++ .../_fiat/field_scalar25519/field64.odin | 535 ++++++++++++++++++ 2 files changed, 684 insertions(+) create mode 100644 core/crypto/_fiat/field_scalar25519/field.odin create mode 100644 core/crypto/_fiat/field_scalar25519/field64.odin diff --git a/core/crypto/_fiat/field_scalar25519/field.odin b/core/crypto/_fiat/field_scalar25519/field.odin new file mode 100644 index 000000000..c741d30cf --- /dev/null +++ b/core/crypto/_fiat/field_scalar25519/field.odin @@ -0,0 +1,149 @@ +package field_scalar25519 + +import "base:intrinsics" +import "core:encoding/endian" +import "core:math/bits" +import "core:mem" + +@(private) +_TWO_168 := Montgomery_Domain_Field_Element { + 0x5b8ab432eac74798, + 0x38afddd6de59d5d7, + 0xa2c131b399411b7c, + 0x6329a7ed9ce5a30, +} +@(private) +_TWO_336 := Montgomery_Domain_Field_Element { + 0xbd3d108e2b35ecc5, + 0x5c3a3718bdf9c90b, + 0x63aa97a331b4f2ee, + 0x3d217f5be65cb5c, +} + +fe_from_bytes :: proc "contextless" ( + out1: ^Montgomery_Domain_Field_Element, + arg1: ^[32]byte, + unsafe_assume_canonical := false, +) -> bool { + tmp := Non_Montgomery_Domain_Field_Element { + endian.unchecked_get_u64le(arg1[0:]), + endian.unchecked_get_u64le(arg1[8:]), + endian.unchecked_get_u64le(arg1[16:]), + endian.unchecked_get_u64le(arg1[24:]), + } + defer mem.zero_explicit(&tmp, size_of(tmp)) + + // Check that tmp is in the the range [0, ELL). + if !unsafe_assume_canonical { + _, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0) + _, borrow = bits.sub_u64(ELL[1], tmp[1], borrow) + _, borrow = bits.sub_u64(ELL[2], tmp[2], borrow) + _, borrow = bits.sub_u64(ELL[3], tmp[3], borrow) + if borrow != 0 { + return false + } + } + + fe_to_montgomery(out1, &tmp) + + return true +} + +fe_from_bytes_rfc8032 :: proc "contextless" ( + out1: ^Montgomery_Domain_Field_Element, + arg1: ^[32]byte, +) { + tmp: [64]byte + copy(tmp[:], arg1[:]) + + // Apply "clamping" as in RFC 8032. + tmp[0] &= 248 + tmp[31] &= 127 + tmp[31] |= 64 // Sets the 254th bit, so the encoding is non-canonical. + + fe_from_bytes_wide(out1, &tmp) + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +fe_from_bytes_wide :: proc "contextless" ( + out1: ^Montgomery_Domain_Field_Element, + arg1: ^[64]byte, +) { + tmp: Montgomery_Domain_Field_Element + // Use Frank Denis' trick, as documented by Filippo Valsorda + // at https://words.filippo.io/dispatches/wide-reduction/ + // + // x = c * 2^336 + b * 2^168 + a mod l + _fe_from_bytes_short(out1, arg1[:21]) // a + + _fe_from_bytes_short(&tmp, arg1[21:42]) // b + fe_mul(&tmp, &tmp, &_TWO_168) // b * 2^168 + fe_add(out1, out1, &tmp) // a + b * 2^168 + + _fe_from_bytes_short(&tmp, arg1[42:]) // c + fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336 + fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336 + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +@(private) +_fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) { + // INVARIANT: len(arg1) < 32. + if len(arg1) >= 32 { + intrinsics.trap() + } + tmp: [32]byte + copy(tmp[:], arg1) + + _ = fe_from_bytes(out1, &tmp, true) + mem.zero_explicit(&tmp, size_of(tmp)) +} + +fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) { + if len(out1) != 32 { + intrinsics.trap() + } + + tmp: Non_Montgomery_Domain_Field_Element + fe_from_montgomery(&tmp, arg1) + + endian.unchecked_put_u64le(out1[0:], tmp[0]) + endian.unchecked_put_u64le(out1[8:], tmp[1]) + endian.unchecked_put_u64le(out1[16:], tmp[2]) + endian.unchecked_put_u64le(out1[24:], tmp[3]) + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int { + tmp: Montgomery_Domain_Field_Element + fe_sub(&tmp, arg1, arg2) + + // This will only underflow iff arg1 == arg2, and we return the borrow, + // which will be 1. + _, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0) + + mem.zero_explicit(&tmp, size_of(tmp)) + + return int(borrow) +} + +fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) { + out1[0] = 0 + out1[1] = 0 + out1[2] = 0 + out1[3] = 0 +} + +fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) { + x1 := arg1[0] + x2 := arg1[1] + x3 := arg1[2] + x4 := arg1[3] + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 +} diff --git a/core/crypto/_fiat/field_scalar25519/field64.odin b/core/crypto/_fiat/field_scalar25519/field64.odin new file mode 100644 index 000000000..268752e5b --- /dev/null +++ b/core/crypto/_fiat/field_scalar25519/field64.odin @@ -0,0 +1,535 @@ +// The BSD 1-Clause License (BSD-1-Clause) +// +// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file) +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design, +// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package field_scalar25519 + +// The file provides arithmetic on the field Z/(2^252+27742317777372353535851937790883648493) +// using a 64-bit Montgomery form internal representation. It is derived +// primarily from the machine generated Golang output from the fiat-crypto +// project. +// +// While the base implementation is provably correct, this implementation +// makes no such claims as the port and optimizations were done by hand. + +import fiat "core:crypto/_fiat" +import "core:math/bits" + +// ELL is the saturated representation of the field order, least-significant +// limb first. +ELL :: [4]u64{0x5812631a5cf5d3ed, 0x14def9dea2f79cd6, 0x0, 0x1000000000000000} + +Montgomery_Domain_Field_Element :: distinct [4]u64 +Non_Montgomery_Domain_Field_Element :: distinct [4]u64 + +fe_mul :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) { + x1 := arg1[1] + x2 := arg1[2] + x3 := arg1[3] + x4 := arg1[0] + x6, x5 := bits.mul_u64(x4, arg2[3]) + x8, x7 := bits.mul_u64(x4, arg2[2]) + x10, x9 := bits.mul_u64(x4, arg2[1]) + x12, x11 := bits.mul_u64(x4, arg2[0]) + x13, x14 := bits.add_u64(x12, x9, u64(0x0)) + x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14))) + x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16))) + x19 := (u64(fiat.u1(x18)) + x6) + _, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b) + x23, x22 := bits.mul_u64(x20, 0x1000000000000000) + x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6) + x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed) + x28, x29 := bits.add_u64(x27, x24, u64(0x0)) + x30 := (u64(fiat.u1(x29)) + x25) + _, x32 := bits.add_u64(x11, x26, u64(0x0)) + x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32))) + x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34))) + x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36))) + x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38))) + x42, x41 := bits.mul_u64(x1, arg2[3]) + x44, x43 := bits.mul_u64(x1, arg2[2]) + x46, x45 := bits.mul_u64(x1, arg2[1]) + x48, x47 := bits.mul_u64(x1, arg2[0]) + x49, x50 := bits.add_u64(x48, x45, u64(0x0)) + x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50))) + x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52))) + x55 := (u64(fiat.u1(x54)) + x42) + x56, x57 := bits.add_u64(x33, x47, u64(0x0)) + x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57))) + x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59))) + x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61))) + x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63))) + _, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b) + x69, x68 := bits.mul_u64(x66, 0x1000000000000000) + x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6) + x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed) + x74, x75 := bits.add_u64(x73, x70, u64(0x0)) + x76 := (u64(fiat.u1(x75)) + x71) + _, x78 := bits.add_u64(x56, x72, u64(0x0)) + x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78))) + x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80))) + x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82))) + x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84))) + x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65))) + x89, x88 := bits.mul_u64(x2, arg2[3]) + x91, x90 := bits.mul_u64(x2, arg2[2]) + x93, x92 := bits.mul_u64(x2, arg2[1]) + x95, x94 := bits.mul_u64(x2, arg2[0]) + x96, x97 := bits.add_u64(x95, x92, u64(0x0)) + x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97))) + x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99))) + x102 := (u64(fiat.u1(x101)) + x89) + x103, x104 := bits.add_u64(x79, x94, u64(0x0)) + x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104))) + x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106))) + x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108))) + x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110))) + _, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b) + x116, x115 := bits.mul_u64(x113, 0x1000000000000000) + x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6) + x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed) + x121, x122 := bits.add_u64(x120, x117, u64(0x0)) + x123 := (u64(fiat.u1(x122)) + x118) + _, x125 := bits.add_u64(x103, x119, u64(0x0)) + x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125))) + x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127))) + x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129))) + x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131))) + x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112))) + x136, x135 := bits.mul_u64(x3, arg2[3]) + x138, x137 := bits.mul_u64(x3, arg2[2]) + x140, x139 := bits.mul_u64(x3, arg2[1]) + x142, x141 := bits.mul_u64(x3, arg2[0]) + x143, x144 := bits.add_u64(x142, x139, u64(0x0)) + x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144))) + x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146))) + x149 := (u64(fiat.u1(x148)) + x136) + x150, x151 := bits.add_u64(x126, x141, u64(0x0)) + x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151))) + x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153))) + x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155))) + x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157))) + _, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b) + x163, x162 := bits.mul_u64(x160, 0x1000000000000000) + x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6) + x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed) + x168, x169 := bits.add_u64(x167, x164, u64(0x0)) + x170 := (u64(fiat.u1(x169)) + x165) + _, x172 := bits.add_u64(x150, x166, u64(0x0)) + x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172))) + x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174))) + x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176))) + x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178))) + x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159))) + x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0)) + x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183))) + x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185))) + x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187))) + _, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189))) + x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173) + x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175) + x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177) + x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179) + out1[0] = x192 + out1[1] = x193 + out1[2] = x194 + out1[3] = x195 +} + +fe_square :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) { + x1 := arg1[1] + x2 := arg1[2] + x3 := arg1[3] + x4 := arg1[0] + x6, x5 := bits.mul_u64(x4, arg1[3]) + x8, x7 := bits.mul_u64(x4, arg1[2]) + x10, x9 := bits.mul_u64(x4, arg1[1]) + x12, x11 := bits.mul_u64(x4, arg1[0]) + x13, x14 := bits.add_u64(x12, x9, u64(0x0)) + x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14))) + x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16))) + x19 := (u64(fiat.u1(x18)) + x6) + _, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b) + x23, x22 := bits.mul_u64(x20, 0x1000000000000000) + x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6) + x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed) + x28, x29 := bits.add_u64(x27, x24, u64(0x0)) + x30 := (u64(fiat.u1(x29)) + x25) + _, x32 := bits.add_u64(x11, x26, u64(0x0)) + x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32))) + x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34))) + x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36))) + x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38))) + x42, x41 := bits.mul_u64(x1, arg1[3]) + x44, x43 := bits.mul_u64(x1, arg1[2]) + x46, x45 := bits.mul_u64(x1, arg1[1]) + x48, x47 := bits.mul_u64(x1, arg1[0]) + x49, x50 := bits.add_u64(x48, x45, u64(0x0)) + x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50))) + x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52))) + x55 := (u64(fiat.u1(x54)) + x42) + x56, x57 := bits.add_u64(x33, x47, u64(0x0)) + x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57))) + x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59))) + x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61))) + x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63))) + _, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b) + x69, x68 := bits.mul_u64(x66, 0x1000000000000000) + x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6) + x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed) + x74, x75 := bits.add_u64(x73, x70, u64(0x0)) + x76 := (u64(fiat.u1(x75)) + x71) + _, x78 := bits.add_u64(x56, x72, u64(0x0)) + x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78))) + x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80))) + x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82))) + x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84))) + x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65))) + x89, x88 := bits.mul_u64(x2, arg1[3]) + x91, x90 := bits.mul_u64(x2, arg1[2]) + x93, x92 := bits.mul_u64(x2, arg1[1]) + x95, x94 := bits.mul_u64(x2, arg1[0]) + x96, x97 := bits.add_u64(x95, x92, u64(0x0)) + x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97))) + x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99))) + x102 := (u64(fiat.u1(x101)) + x89) + x103, x104 := bits.add_u64(x79, x94, u64(0x0)) + x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104))) + x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106))) + x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108))) + x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110))) + _, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b) + x116, x115 := bits.mul_u64(x113, 0x1000000000000000) + x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6) + x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed) + x121, x122 := bits.add_u64(x120, x117, u64(0x0)) + x123 := (u64(fiat.u1(x122)) + x118) + _, x125 := bits.add_u64(x103, x119, u64(0x0)) + x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125))) + x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127))) + x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129))) + x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131))) + x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112))) + x136, x135 := bits.mul_u64(x3, arg1[3]) + x138, x137 := bits.mul_u64(x3, arg1[2]) + x140, x139 := bits.mul_u64(x3, arg1[1]) + x142, x141 := bits.mul_u64(x3, arg1[0]) + x143, x144 := bits.add_u64(x142, x139, u64(0x0)) + x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144))) + x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146))) + x149 := (u64(fiat.u1(x148)) + x136) + x150, x151 := bits.add_u64(x126, x141, u64(0x0)) + x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151))) + x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153))) + x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155))) + x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157))) + _, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b) + x163, x162 := bits.mul_u64(x160, 0x1000000000000000) + x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6) + x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed) + x168, x169 := bits.add_u64(x167, x164, u64(0x0)) + x170 := (u64(fiat.u1(x169)) + x165) + _, x172 := bits.add_u64(x150, x166, u64(0x0)) + x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172))) + x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174))) + x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176))) + x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178))) + x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159))) + x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0)) + x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183))) + x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185))) + x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187))) + _, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189))) + x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173) + x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175) + x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177) + x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179) + out1[0] = x192 + out1[1] = x193 + out1[2] = x194 + out1[3] = x195 +} + +fe_add :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) { + x1, x2 := bits.add_u64(arg1[0], arg2[0], u64(0x0)) + x3, x4 := bits.add_u64(arg1[1], arg2[1], u64(fiat.u1(x2))) + x5, x6 := bits.add_u64(arg1[2], arg2[2], u64(fiat.u1(x4))) + x7, x8 := bits.add_u64(arg1[3], arg2[3], u64(fiat.u1(x6))) + x9, x10 := bits.sub_u64(x1, 0x5812631a5cf5d3ed, u64(0x0)) + x11, x12 := bits.sub_u64(x3, 0x14def9dea2f79cd6, u64(fiat.u1(x10))) + x13, x14 := bits.sub_u64(x5, u64(0x0), u64(fiat.u1(x12))) + x15, x16 := bits.sub_u64(x7, 0x1000000000000000, u64(fiat.u1(x14))) + _, x18 := bits.sub_u64(u64(fiat.u1(x8)), u64(0x0), u64(fiat.u1(x16))) + x19 := fiat.cmovznz_u64(fiat.u1(x18), x9, x1) + x20 := fiat.cmovznz_u64(fiat.u1(x18), x11, x3) + x21 := fiat.cmovznz_u64(fiat.u1(x18), x13, x5) + x22 := fiat.cmovznz_u64(fiat.u1(x18), x15, x7) + out1[0] = x19 + out1[1] = x20 + out1[2] = x21 + out1[3] = x22 +} + +fe_sub :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) { + x1, x2 := bits.sub_u64(arg1[0], arg2[0], u64(0x0)) + x3, x4 := bits.sub_u64(arg1[1], arg2[1], u64(fiat.u1(x2))) + x5, x6 := bits.sub_u64(arg1[2], arg2[2], u64(fiat.u1(x4))) + x7, x8 := bits.sub_u64(arg1[3], arg2[3], u64(fiat.u1(x6))) + x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff) + x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0)) + x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11))) + x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13))) + x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15))) + out1[0] = x10 + out1[1] = x12 + out1[2] = x14 + out1[3] = x16 +} + +fe_opp :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) { + x1, x2 := bits.sub_u64(u64(0x0), arg1[0], u64(0x0)) + x3, x4 := bits.sub_u64(u64(0x0), arg1[1], u64(fiat.u1(x2))) + x5, x6 := bits.sub_u64(u64(0x0), arg1[2], u64(fiat.u1(x4))) + x7, x8 := bits.sub_u64(u64(0x0), arg1[3], u64(fiat.u1(x6))) + x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff) + x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0)) + x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11))) + x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13))) + x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15))) + out1[0] = x10 + out1[1] = x12 + out1[2] = x14 + out1[3] = x16 +} + +fe_one :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) { + out1[0] = 0xd6ec31748d98951d + out1[1] = 0xc6ef5bf4737dcf70 + out1[2] = 0xfffffffffffffffe + out1[3] = 0xfffffffffffffff +} + +fe_non_zero :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> u64 { + return arg1[0] | (arg1[1] | (arg1[2] | arg1[3])) +} + +@(optimization_mode = "none") +fe_cond_assign :: #force_no_inline proc "contextless" ( + out1, arg1: ^Montgomery_Domain_Field_Element, + arg2: int, +) { + x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0]) + x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1]) + x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2]) + x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3]) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 +} + +fe_from_montgomery :: proc "contextless" ( + out1: ^Non_Montgomery_Domain_Field_Element, + arg1: ^Montgomery_Domain_Field_Element, +) { + x1 := arg1[0] + _, x2 := bits.mul_u64(x1, 0xd2b51da312547e1b) + x5, x4 := bits.mul_u64(x2, 0x1000000000000000) + x7, x6 := bits.mul_u64(x2, 0x14def9dea2f79cd6) + x9, x8 := bits.mul_u64(x2, 0x5812631a5cf5d3ed) + x10, x11 := bits.add_u64(x9, x6, u64(0x0)) + _, x13 := bits.add_u64(x1, x8, u64(0x0)) + x14, x15 := bits.add_u64(u64(0x0), x10, u64(fiat.u1(x13))) + x16, x17 := bits.add_u64(x14, arg1[1], u64(0x0)) + _, x18 := bits.mul_u64(x16, 0xd2b51da312547e1b) + x21, x20 := bits.mul_u64(x18, 0x1000000000000000) + x23, x22 := bits.mul_u64(x18, 0x14def9dea2f79cd6) + x25, x24 := bits.mul_u64(x18, 0x5812631a5cf5d3ed) + x26, x27 := bits.add_u64(x25, x22, u64(0x0)) + _, x29 := bits.add_u64(x16, x24, u64(0x0)) + x30, x31 := bits.add_u64( + (u64(fiat.u1(x17)) + (u64(fiat.u1(x15)) + (u64(fiat.u1(x11)) + x7))), + x26, + u64(fiat.u1(x29)), + ) + x32, x33 := bits.add_u64(x4, (u64(fiat.u1(x27)) + x23), u64(fiat.u1(x31))) + x34, x35 := bits.add_u64(x5, x20, u64(fiat.u1(x33))) + x36, x37 := bits.add_u64(x30, arg1[2], u64(0x0)) + x38, x39 := bits.add_u64(x32, u64(0x0), u64(fiat.u1(x37))) + x40, x41 := bits.add_u64(x34, u64(0x0), u64(fiat.u1(x39))) + _, x42 := bits.mul_u64(x36, 0xd2b51da312547e1b) + x45, x44 := bits.mul_u64(x42, 0x1000000000000000) + x47, x46 := bits.mul_u64(x42, 0x14def9dea2f79cd6) + x49, x48 := bits.mul_u64(x42, 0x5812631a5cf5d3ed) + x50, x51 := bits.add_u64(x49, x46, u64(0x0)) + _, x53 := bits.add_u64(x36, x48, u64(0x0)) + x54, x55 := bits.add_u64(x38, x50, u64(fiat.u1(x53))) + x56, x57 := bits.add_u64(x40, (u64(fiat.u1(x51)) + x47), u64(fiat.u1(x55))) + x58, x59 := bits.add_u64( + (u64(fiat.u1(x41)) + (u64(fiat.u1(x35)) + x21)), + x44, + u64(fiat.u1(x57)), + ) + x60, x61 := bits.add_u64(x54, arg1[3], u64(0x0)) + x62, x63 := bits.add_u64(x56, u64(0x0), u64(fiat.u1(x61))) + x64, x65 := bits.add_u64(x58, u64(0x0), u64(fiat.u1(x63))) + _, x66 := bits.mul_u64(x60, 0xd2b51da312547e1b) + x69, x68 := bits.mul_u64(x66, 0x1000000000000000) + x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6) + x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed) + x74, x75 := bits.add_u64(x73, x70, u64(0x0)) + _, x77 := bits.add_u64(x60, x72, u64(0x0)) + x78, x79 := bits.add_u64(x62, x74, u64(fiat.u1(x77))) + x80, x81 := bits.add_u64(x64, (u64(fiat.u1(x75)) + x71), u64(fiat.u1(x79))) + x82, x83 := bits.add_u64( + (u64(fiat.u1(x65)) + (u64(fiat.u1(x59)) + x45)), + x68, + u64(fiat.u1(x81)), + ) + x84 := (u64(fiat.u1(x83)) + x69) + x85, x86 := bits.sub_u64(x78, 0x5812631a5cf5d3ed, u64(0x0)) + x87, x88 := bits.sub_u64(x80, 0x14def9dea2f79cd6, u64(fiat.u1(x86))) + x89, x90 := bits.sub_u64(x82, u64(0x0), u64(fiat.u1(x88))) + x91, x92 := bits.sub_u64(x84, 0x1000000000000000, u64(fiat.u1(x90))) + _, x94 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x92))) + x95 := fiat.cmovznz_u64(fiat.u1(x94), x85, x78) + x96 := fiat.cmovznz_u64(fiat.u1(x94), x87, x80) + x97 := fiat.cmovznz_u64(fiat.u1(x94), x89, x82) + x98 := fiat.cmovznz_u64(fiat.u1(x94), x91, x84) + out1[0] = x95 + out1[1] = x96 + out1[2] = x97 + out1[3] = x98 +} + +fe_to_montgomery :: proc "contextless" ( + out1: ^Montgomery_Domain_Field_Element, + arg1: ^Non_Montgomery_Domain_Field_Element, +) { + x1 := arg1[1] + x2 := arg1[2] + x3 := arg1[3] + x4 := arg1[0] + x6, x5 := bits.mul_u64(x4, 0x399411b7c309a3d) + x8, x7 := bits.mul_u64(x4, 0xceec73d217f5be65) + x10, x9 := bits.mul_u64(x4, 0xd00e1ba768859347) + x12, x11 := bits.mul_u64(x4, 0xa40611e3449c0f01) + x13, x14 := bits.add_u64(x12, x9, u64(0x0)) + x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14))) + x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16))) + _, x19 := bits.mul_u64(x11, 0xd2b51da312547e1b) + x22, x21 := bits.mul_u64(x19, 0x1000000000000000) + x24, x23 := bits.mul_u64(x19, 0x14def9dea2f79cd6) + x26, x25 := bits.mul_u64(x19, 0x5812631a5cf5d3ed) + x27, x28 := bits.add_u64(x26, x23, u64(0x0)) + _, x30 := bits.add_u64(x11, x25, u64(0x0)) + x31, x32 := bits.add_u64(x13, x27, u64(fiat.u1(x30))) + x33, x34 := bits.add_u64(x15, (u64(fiat.u1(x28)) + x24), u64(fiat.u1(x32))) + x35, x36 := bits.add_u64(x17, x21, u64(fiat.u1(x34))) + x38, x37 := bits.mul_u64(x1, 0x399411b7c309a3d) + x40, x39 := bits.mul_u64(x1, 0xceec73d217f5be65) + x42, x41 := bits.mul_u64(x1, 0xd00e1ba768859347) + x44, x43 := bits.mul_u64(x1, 0xa40611e3449c0f01) + x45, x46 := bits.add_u64(x44, x41, u64(0x0)) + x47, x48 := bits.add_u64(x42, x39, u64(fiat.u1(x46))) + x49, x50 := bits.add_u64(x40, x37, u64(fiat.u1(x48))) + x51, x52 := bits.add_u64(x31, x43, u64(0x0)) + x53, x54 := bits.add_u64(x33, x45, u64(fiat.u1(x52))) + x55, x56 := bits.add_u64(x35, x47, u64(fiat.u1(x54))) + x57, x58 := bits.add_u64( + ((u64(fiat.u1(x36)) + (u64(fiat.u1(x18)) + x6)) + x22), + x49, + u64(fiat.u1(x56)), + ) + _, x59 := bits.mul_u64(x51, 0xd2b51da312547e1b) + x62, x61 := bits.mul_u64(x59, 0x1000000000000000) + x64, x63 := bits.mul_u64(x59, 0x14def9dea2f79cd6) + x66, x65 := bits.mul_u64(x59, 0x5812631a5cf5d3ed) + x67, x68 := bits.add_u64(x66, x63, u64(0x0)) + _, x70 := bits.add_u64(x51, x65, u64(0x0)) + x71, x72 := bits.add_u64(x53, x67, u64(fiat.u1(x70))) + x73, x74 := bits.add_u64(x55, (u64(fiat.u1(x68)) + x64), u64(fiat.u1(x72))) + x75, x76 := bits.add_u64(x57, x61, u64(fiat.u1(x74))) + x78, x77 := bits.mul_u64(x2, 0x399411b7c309a3d) + x80, x79 := bits.mul_u64(x2, 0xceec73d217f5be65) + x82, x81 := bits.mul_u64(x2, 0xd00e1ba768859347) + x84, x83 := bits.mul_u64(x2, 0xa40611e3449c0f01) + x85, x86 := bits.add_u64(x84, x81, u64(0x0)) + x87, x88 := bits.add_u64(x82, x79, u64(fiat.u1(x86))) + x89, x90 := bits.add_u64(x80, x77, u64(fiat.u1(x88))) + x91, x92 := bits.add_u64(x71, x83, u64(0x0)) + x93, x94 := bits.add_u64(x73, x85, u64(fiat.u1(x92))) + x95, x96 := bits.add_u64(x75, x87, u64(fiat.u1(x94))) + x97, x98 := bits.add_u64( + ((u64(fiat.u1(x76)) + (u64(fiat.u1(x58)) + (u64(fiat.u1(x50)) + x38))) + x62), + x89, + u64(fiat.u1(x96)), + ) + _, x99 := bits.mul_u64(x91, 0xd2b51da312547e1b) + x102, x101 := bits.mul_u64(x99, 0x1000000000000000) + x104, x103 := bits.mul_u64(x99, 0x14def9dea2f79cd6) + x106, x105 := bits.mul_u64(x99, 0x5812631a5cf5d3ed) + x107, x108 := bits.add_u64(x106, x103, u64(0x0)) + _, x110 := bits.add_u64(x91, x105, u64(0x0)) + x111, x112 := bits.add_u64(x93, x107, u64(fiat.u1(x110))) + x113, x114 := bits.add_u64(x95, (u64(fiat.u1(x108)) + x104), u64(fiat.u1(x112))) + x115, x116 := bits.add_u64(x97, x101, u64(fiat.u1(x114))) + x118, x117 := bits.mul_u64(x3, 0x399411b7c309a3d) + x120, x119 := bits.mul_u64(x3, 0xceec73d217f5be65) + x122, x121 := bits.mul_u64(x3, 0xd00e1ba768859347) + x124, x123 := bits.mul_u64(x3, 0xa40611e3449c0f01) + x125, x126 := bits.add_u64(x124, x121, u64(0x0)) + x127, x128 := bits.add_u64(x122, x119, u64(fiat.u1(x126))) + x129, x130 := bits.add_u64(x120, x117, u64(fiat.u1(x128))) + x131, x132 := bits.add_u64(x111, x123, u64(0x0)) + x133, x134 := bits.add_u64(x113, x125, u64(fiat.u1(x132))) + x135, x136 := bits.add_u64(x115, x127, u64(fiat.u1(x134))) + x137, x138 := bits.add_u64( + ((u64(fiat.u1(x116)) + (u64(fiat.u1(x98)) + (u64(fiat.u1(x90)) + x78))) + x102), + x129, + u64(fiat.u1(x136)), + ) + _, x139 := bits.mul_u64(x131, 0xd2b51da312547e1b) + x142, x141 := bits.mul_u64(x139, 0x1000000000000000) + x144, x143 := bits.mul_u64(x139, 0x14def9dea2f79cd6) + x146, x145 := bits.mul_u64(x139, 0x5812631a5cf5d3ed) + x147, x148 := bits.add_u64(x146, x143, u64(0x0)) + _, x150 := bits.add_u64(x131, x145, u64(0x0)) + x151, x152 := bits.add_u64(x133, x147, u64(fiat.u1(x150))) + x153, x154 := bits.add_u64(x135, (u64(fiat.u1(x148)) + x144), u64(fiat.u1(x152))) + x155, x156 := bits.add_u64(x137, x141, u64(fiat.u1(x154))) + x157 := ((u64(fiat.u1(x156)) + (u64(fiat.u1(x138)) + (u64(fiat.u1(x130)) + x118))) + x142) + x158, x159 := bits.sub_u64(x151, 0x5812631a5cf5d3ed, u64(0x0)) + x160, x161 := bits.sub_u64(x153, 0x14def9dea2f79cd6, u64(fiat.u1(x159))) + x162, x163 := bits.sub_u64(x155, u64(0x0), u64(fiat.u1(x161))) + x164, x165 := bits.sub_u64(x157, 0x1000000000000000, u64(fiat.u1(x163))) + _, x167 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x165))) + x168 := fiat.cmovznz_u64(fiat.u1(x167), x158, x151) + x169 := fiat.cmovznz_u64(fiat.u1(x167), x160, x153) + x170 := fiat.cmovznz_u64(fiat.u1(x167), x162, x155) + x171 := fiat.cmovznz_u64(fiat.u1(x167), x164, x157) + out1[0] = x168 + out1[1] = x169 + out1[2] = x170 + out1[3] = x171 +} From 563c52741903d3a930fd4c4f8128c275fefc1399 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Thu, 21 Mar 2024 02:17:59 +0900 Subject: [PATCH 11/14] core/crypto/_edwards25519: Initial import --- core/crypto/_edwards25519/edwards25519.odin | 428 ++++++++++++++++++ .../_edwards25519/edwards25519_scalar.odin | 61 +++ .../edwards25519_scalar_mul.odin | 288 ++++++++++++ core/crypto/_fiat/field_curve25519/field.odin | 134 ++++-- .../_fiat/field_curve25519/field51.odin | 5 +- .../crypto/_fiat/field_scalar25519/field.odin | 8 +- core/crypto/x25519/x25519.odin | 10 +- 7 files changed, 885 insertions(+), 49 deletions(-) create mode 100644 core/crypto/_edwards25519/edwards25519.odin create mode 100644 core/crypto/_edwards25519/edwards25519_scalar.odin create mode 100644 core/crypto/_edwards25519/edwards25519_scalar_mul.odin diff --git a/core/crypto/_edwards25519/edwards25519.odin b/core/crypto/_edwards25519/edwards25519.odin new file mode 100644 index 000000000..952bb9ef8 --- /dev/null +++ b/core/crypto/_edwards25519/edwards25519.odin @@ -0,0 +1,428 @@ +package _edwards25519 + +/* +This implements the edwards25519 composite-order group, primarily for +the purpose of implementing X25519, Ed25519, and ristretto255. Use of +this package for other purposes is NOT RECOMMENDED. + +See: +- https://eprint.iacr.org/2011/368.pdf +- https://datatracker.ietf.org/doc/html/rfc8032 +- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html +*/ + +import "base:intrinsics" +import "core:crypto" +import field "core:crypto/_fiat/field_curve25519" +import "core:mem" + +// Group_Element is an edwards25519 group element, as extended homogenous +// coordinates, which represents the affine point `(x, y)` as `(X, Y, Z, T)`, +// with the relations `x = X/Z`, `y = Y/Z`, and `x * y = T/Z`. +// +// d = -121665/121666 = 37095705934669439343138083508754565189542113879843219016388785533085940283555 +// a = -1 +// +// Notes: +// - There is considerable scope for optimization, however that +// will not change the external API, and this is simple and reasonably +// performant. +// - The API delibarately makes it hard to create arbitrary group +// elements that are not on the curve. +// - The group element decoding routine takes the opinionated stance of +// rejecting non-canonical encodings. + +FE_D := field.Tight_Field_Element { + 929955233495203, + 466365720129213, + 1662059464998953, + 2033849074728123, + 1442794654840575, +} +@(private) +FE_A := field.Tight_Field_Element { + 2251799813685228, + 2251799813685247, + 2251799813685247, + 2251799813685247, + 2251799813685247, +} +@(private) +FE_D2 := field.Tight_Field_Element { + 1859910466990425, + 932731440258426, + 1072319116312658, + 1815898335770999, + 633789495995903, +} +@(private) +GE_BASEPOINT := Group_Element { + field.Tight_Field_Element { + 1738742601995546, + 1146398526822698, + 2070867633025821, + 562264141797630, + 587772402128613, + }, + field.Tight_Field_Element { + 1801439850948184, + 1351079888211148, + 450359962737049, + 900719925474099, + 1801439850948198, + }, + field.Tight_Field_Element{1, 0, 0, 0, 0}, + field.Tight_Field_Element { + 1841354044333475, + 16398895984059, + 755974180946558, + 900171276175154, + 1821297809914039, + }, +} +GE_IDENTITY := Group_Element { + field.Tight_Field_Element{0, 0, 0, 0, 0}, + field.Tight_Field_Element{1, 0, 0, 0, 0}, + field.Tight_Field_Element{1, 0, 0, 0, 0}, + field.Tight_Field_Element{0, 0, 0, 0, 0}, +} + +Group_Element :: struct { + x: field.Tight_Field_Element, + y: field.Tight_Field_Element, + z: field.Tight_Field_Element, + t: field.Tight_Field_Element, +} + +ge_clear :: proc "contextless" (ge: ^Group_Element) { + mem.zero_explicit(ge, size_of(Group_Element)) +} + +ge_set :: proc "contextless" (ge, a: ^Group_Element) { + field.fe_set(&ge.x, &a.x) + field.fe_set(&ge.y, &a.y) + field.fe_set(&ge.z, &a.z) + field.fe_set(&ge.t, &a.t) +} + +@(require_results) +ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool { + if len(b) != 32 { + intrinsics.trap() + } + b_ := transmute(^[32]byte)(raw_data(b)) + + // Do the work in a scratch element, so that ge is unchanged on + // failure. + tmp: Group_Element = --- + defer ge_clear(&tmp) + field.fe_one(&tmp.z) // Z = 1 + + // The encoding is the y-coordinate, with the x-coordinate polarity + // (odd/even) encoded in the MSB. + field.fe_from_bytes(&tmp.y, b_) // ignores high bit + + // Recover the candidate x-coordinate via the curve equation: + // x^2 = (y^2 - 1) / (d * y^2 + 1) (mod p) + + fe_tmp := &tmp.t // Use this to store intermediaries. + fe_one := &tmp.z + + // x = num = y^2 - 1 + field.fe_carry_square(fe_tmp, field.fe_relax_cast(&tmp.y)) // fe_tmp = y^2 + field.fe_carry_sub(&tmp.x, fe_tmp, fe_one) + + // den = d * y^2 + 1 + field.fe_carry_mul(fe_tmp, field.fe_relax_cast(fe_tmp), field.fe_relax_cast(&FE_D)) + field.fe_carry_add(fe_tmp, fe_tmp, fe_one) + + // x = invsqrt(den/num) + is_square := field.fe_carry_sqrt_ratio_m1( + &tmp.x, + field.fe_relax_cast(&tmp.x), + field.fe_relax_cast(fe_tmp), + ) + if is_square == 0 { + return false + } + + // Pick the right x-coordinate. + field.fe_cond_negate(&tmp.x, &tmp.x, int(b[31] >> 7)) + + // t = x * y + field.fe_carry_mul(&tmp.t, field.fe_relax_cast(&tmp.x), field.fe_relax_cast(&tmp.y)) + + // Reject non-canonical encodings of ge. + buf: [32]byte = --- + field.fe_to_bytes(&buf, &tmp.y) + buf[31] |= byte(field.fe_is_negative(&tmp.x)) << 7 + is_canonical := crypto.compare_constant_time(b, buf[:]) + + ge_cond_assign(ge, &tmp, is_canonical) + + mem.zero_explicit(&buf, size_of(buf)) + + return is_canonical == 1 +} + +ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) { + if len(dst) != 32 { + intrinsics.trap() + } + dst_ := transmute(^[32]byte)(raw_data(dst)) + + // Convert the element to affine (x, y) representation. + x, y, z_inv: field.Tight_Field_Element = ---, ---, --- + field.fe_carry_inv(&z_inv, field.fe_relax_cast(&ge.z)) + field.fe_carry_mul(&x, field.fe_relax_cast(&ge.x), field.fe_relax_cast(&z_inv)) + field.fe_carry_mul(&y, field.fe_relax_cast(&ge.y), field.fe_relax_cast(&z_inv)) + + // Encode the y-coordinate. + field.fe_to_bytes(dst_, &y) + + // Copy the least significant bit of the x-coordinate to the most + // significant bit of the encoded y-coordinate. + dst_[31] |= byte((x[0] & 1) << 7) + + field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &z_inv}) +} + +ge_identity :: proc "contextless" (ge: ^Group_Element) { + field.fe_zero(&ge.x) + field.fe_one(&ge.y) + field.fe_one(&ge.z) + field.fe_zero(&ge.t) +} + +ge_generator :: proc "contextless" (ge: ^Group_Element) { + ge_set(ge, &GE_BASEPOINT) +} + +@(private) +Addend_Group_Element :: struct { + y2_minus_x2: field.Loose_Field_Element, // t1 + y2_plus_x2: field.Loose_Field_Element, // t3 + k_times_t2: field.Tight_Field_Element, // t4 + two_times_z2: field.Loose_Field_Element, // t5 +} + +@(private) +ge_addend_set :: proc "contextless" (ge_a: ^Addend_Group_Element, ge: ^Group_Element) { + field.fe_sub(&ge_a.y2_minus_x2, &ge.y, &ge.x) + field.fe_add(&ge_a.y2_plus_x2, &ge.y, &ge.x) + field.fe_carry_mul(&ge_a.k_times_t2, field.fe_relax_cast(&FE_D2), field.fe_relax_cast(&ge.t)) + field.fe_add(&ge_a.two_times_z2, &ge.z, &ge.z) +} + +@(private) +ge_addend_conditional_assign :: proc "contextless" (ge_a, a: ^Addend_Group_Element, ctrl: int) { + field.fe_cond_select(&ge_a.y2_minus_x2, &ge_a.y2_minus_x2, &a.y2_minus_x2, ctrl) + field.fe_cond_select(&ge_a.y2_plus_x2, &ge_a.y2_plus_x2, &a.y2_plus_x2, ctrl) + field.fe_cond_select(&ge_a.k_times_t2, &ge_a.k_times_t2, &a.k_times_t2, ctrl) + field.fe_cond_select(&ge_a.two_times_z2, &ge_a.two_times_z2, &a.two_times_z2, ctrl) +} + +@(private) +Add_Scratch :: struct { + A, B, C, D: field.Tight_Field_Element, + E, F, G, H: field.Loose_Field_Element, + t0, t2: field.Loose_Field_Element, +} + +ge_add :: proc "contextless" (ge, a, b: ^Group_Element) { + b_: Addend_Group_Element = --- + ge_addend_set(&b_, b) + + scratch: Add_Scratch = --- + ge_add_addend(ge, a, &b_, &scratch) + + mem.zero_explicit(&b_, size_of(Addend_Group_Element)) + mem.zero_explicit(&scratch, size_of(Add_Scratch)) +} + +@(private) +ge_add_addend :: proc "contextless" ( + ge, a: ^Group_Element, + b: ^Addend_Group_Element, + scratch: ^Add_Scratch, +) { + // https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3 + // Assumptions: k=2*d. + // + // t0 = Y1-X1 + // t1 = Y2-X2 + // A = t0*t1 + // t2 = Y1+X1 + // t3 = Y2+X2 + // B = t2*t3 + // t4 = k*T2 + // C = T1*t4 + // t5 = 2*Z2 + // D = Z1*t5 + // E = B-A + // F = D-C + // G = D+C + // H = B+A + // X3 = E*F + // Y3 = G*H + // T3 = E*H + // Z3 = F*G + // + // In order to make the scalar multiply faster, the addend is provided + // as a `Addend_Group_Element` with t1, t3, t4, and t5 precomputed, as + // it is trivially obvious that those are the only values used by the + // formula that are directly dependent on `b`, and are only dependent + // on `b` and constants. This saves 1 sub, 2 adds, and 1 multiply, + // each time the intermediate representation can be reused. + + A, B, C, D := &scratch.A, &scratch.B, &scratch.C, &scratch.D + E, F, G, H := &scratch.E, &scratch.F, &scratch.G, &scratch.H + t0, t2 := &scratch.t0, &scratch.t2 + + field.fe_sub(t0, &a.y, &a.x) + t1 := &b.y2_minus_x2 + field.fe_carry_mul(A, t0, t1) + field.fe_add(t2, &a.y, &a.x) + t3 := &b.y2_plus_x2 + field.fe_carry_mul(B, t2, t3) + t4 := &b.k_times_t2 + field.fe_carry_mul(C, field.fe_relax_cast(&a.t), field.fe_relax_cast(t4)) + t5 := &b.two_times_z2 + field.fe_carry_mul(D, field.fe_relax_cast(&a.z), t5) + field.fe_sub(E, B, A) + field.fe_sub(F, D, C) + field.fe_add(G, D, C) + field.fe_add(H, B, A) + field.fe_carry_mul(&ge.x, E, F) + field.fe_carry_mul(&ge.y, G, H) + field.fe_carry_mul(&ge.t, E, H) + field.fe_carry_mul(&ge.z, F, G) +} + +@(private) +Double_Scratch :: struct { + A, B, C, D, G: field.Tight_Field_Element, + t0, t2, t3: field.Tight_Field_Element, + E, F, H: field.Loose_Field_Element, + t1: field.Loose_Field_Element, +} + +ge_double :: proc "contextless" (ge, a: ^Group_Element, scratch: ^Double_Scratch = nil) { + // https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd + // + // A = X1^2 + // B = Y1^2 + // t0 = Z1^2 + // C = 2*t0 + // D = a*A + // t1 = X1+Y1 + // t2 = t1^2 + // t3 = t2-A + // E = t3-B + // G = D+B + // F = G-C + // H = D-B + // X3 = E*F + // Y3 = G*H + // T3 = E*H + // Z3 = F*G + + sanitize, scratch := scratch == nil, scratch + if sanitize { + tmp: Double_Scratch = --- + scratch = &tmp + } + + A, B, C, D, G := &scratch.A, &scratch.B, &scratch.C, &scratch.D, &scratch.G + t0, t2, t3 := &scratch.t0, &scratch.t2, &scratch.t3 + E, F, H := &scratch.E, &scratch.F, &scratch.H + t1 := &scratch.t1 + + field.fe_carry_square(A, field.fe_relax_cast(&a.x)) + field.fe_carry_square(B, field.fe_relax_cast(&a.y)) + field.fe_carry_square(t0, field.fe_relax_cast(&a.z)) + field.fe_carry_add(C, t0, t0) + field.fe_carry_mul(D, field.fe_relax_cast(&FE_A), field.fe_relax_cast(A)) + field.fe_add(t1, &a.x, &a.y) + field.fe_carry_square(t2, t1) + field.fe_carry_sub(t3, t2, A) + field.fe_sub(E, t3, B) + field.fe_carry_add(G, D, B) + field.fe_sub(F, G, C) + field.fe_sub(H, D, B) + G_ := field.fe_relax_cast(G) + field.fe_carry_mul(&ge.x, E, F) + field.fe_carry_mul(&ge.y, G_, H) + field.fe_carry_mul(&ge.t, E, H) + field.fe_carry_mul(&ge.z, F, G_) + + if sanitize { + mem.zero_explicit(scratch, size_of(Double_Scratch)) + } +} + +ge_negate :: proc "contextless" (ge, a: ^Group_Element) { + field.fe_carry_opp(&ge.x, &a.x) + field.fe_set(&ge.y, &a.y) + field.fe_set(&ge.z, &a.z) + field.fe_carry_opp(&ge.t, &a.t) +} + +ge_cond_negate :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) { + tmp: Group_Element = --- + ge_negate(&tmp, a) + ge_cond_assign(ge, &tmp, ctrl) + + ge_clear(&tmp) +} + +ge_cond_assign :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) { + field.fe_cond_assign(&ge.x, &a.x, ctrl) + field.fe_cond_assign(&ge.y, &a.y, ctrl) + field.fe_cond_assign(&ge.z, &a.z, ctrl) + field.fe_cond_assign(&ge.t, &a.t, ctrl) +} + +ge_cond_select :: proc "contextless" (ge, a, b: ^Group_Element, ctrl: int) { + field.fe_cond_select(&ge.x, &a.x, &b.x, ctrl) + field.fe_cond_select(&ge.y, &a.y, &b.y, ctrl) + field.fe_cond_select(&ge.z, &a.z, &b.z, ctrl) + field.fe_cond_select(&ge.t, &a.t, &b.t, ctrl) +} + +@(require_results) +ge_equal :: proc "contextless" (a, b: ^Group_Element) -> int { + // (x, y) ?= (x', y') -> (X/Z, Y/Z) ?= (X'/Z', Y'/Z') + // X/Z ?= X'/Z', Y/Z ?= Y'/Z' -> X*Z' ?= X'*Z, Y*Z' ?= Y'*Z + ax_bz, bx_az, ay_bz, by_az: field.Tight_Field_Element = ---, ---, ---, --- + field.fe_carry_mul(&ax_bz, field.fe_relax_cast(&a.x), field.fe_relax_cast(&b.z)) + field.fe_carry_mul(&bx_az, field.fe_relax_cast(&b.x), field.fe_relax_cast(&a.z)) + field.fe_carry_mul(&ay_bz, field.fe_relax_cast(&a.y), field.fe_relax_cast(&b.z)) + field.fe_carry_mul(&by_az, field.fe_relax_cast(&b.y), field.fe_relax_cast(&a.z)) + + ret := field.fe_equal(&ax_bz, &bx_az) & field.fe_equal(&ay_bz, &by_az) + + field.fe_clear_vec([]^field.Tight_Field_Element{&ax_bz, &ay_bz, &bx_az, &by_az}) + + return ret +} + +@(require_results) +ge_is_small_order :: proc "contextless" (ge: ^Group_Element) -> bool { + tmp: Group_Element = --- + ge_double(&tmp, ge) + ge_double(&tmp, &tmp) + ge_double(&tmp, &tmp) + return ge_equal(&tmp, &GE_IDENTITY) == 1 +} + +@(require_results) +ge_in_prime_order_subgroup_vartime :: proc "contextless" (ge: ^Group_Element) -> bool { + // This is currently *very* expensive. The faster method would be + // something like (https://eprint.iacr.org/2022/1164.pdf), however + // that is a ~50% speedup, and a lot of added complexity for something + // that is better solved by "just use ristretto255". + tmp: Group_Element = --- + _ge_scalarmult(&tmp, ge, &SC_ELL, true) + return ge_equal(&tmp, &GE_IDENTITY) == 1 +} diff --git a/core/crypto/_edwards25519/edwards25519_scalar.odin b/core/crypto/_edwards25519/edwards25519_scalar.odin new file mode 100644 index 000000000..2644fe5f7 --- /dev/null +++ b/core/crypto/_edwards25519/edwards25519_scalar.odin @@ -0,0 +1,61 @@ +package _edwards25519 + +import "base:intrinsics" +import field "core:crypto/_fiat/field_scalar25519" +import "core:mem" + +Scalar :: field.Montgomery_Domain_Field_Element + +// WARNING: This is non-canonical and only to be used when checking if +// a group element is on the prime-order subgroup. +@(private) +SC_ELL := field.Non_Montgomery_Domain_Field_Element { + field.ELL[0], + field.ELL[1], + field.ELL[2], + field.ELL[3], +} + +sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) { + tmp := field.Non_Montgomery_Domain_Field_Element{i, 0, 0, 0} + field.fe_to_montgomery(sc, &tmp) + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +@(require_results) +sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool { + if len(b) != 32 { + intrinsics.trap() + } + b_ := transmute(^[32]byte)(raw_data(b)) + return field.fe_from_bytes(sc, b_) +} + +sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) { + if len(b) != 32 { + intrinsics.trap() + } + b_ := transmute(^[32]byte)(raw_data(b)) + field.fe_from_bytes_rfc8032(sc, b_) +} + +sc_clear :: proc "contextless" (sc: ^Scalar) { + mem.zero_explicit(sc, size_of(Scalar)) +} + +sc_set :: field.fe_set +sc_set_bytes_wide :: field.fe_from_bytes_wide +sc_bytes :: field.fe_to_bytes + +sc_zero :: field.fe_zero +sc_one :: field.fe_one + +sc_add :: field.fe_add +sc_sub :: field.fe_sub +sc_negate :: field.fe_opp +sc_mul :: field.fe_mul +sc_square :: field.fe_square + +sc_cond_assign :: field.fe_cond_assign +sc_equal :: field.fe_equal diff --git a/core/crypto/_edwards25519/edwards25519_scalar_mul.odin b/core/crypto/_edwards25519/edwards25519_scalar_mul.odin new file mode 100644 index 000000000..757a51257 --- /dev/null +++ b/core/crypto/_edwards25519/edwards25519_scalar_mul.odin @@ -0,0 +1,288 @@ +package _edwards25519 + +import field "core:crypto/_fiat/field_scalar25519" +import "core:math/bits" +import "core:mem" + +// GE_BASEPOINT_TABLE is 1 * G, ... 15 * G, in precomputed format. +// +// Note: When generating, the values were reduced to Tight_Field_Element +// ranges, even though that is not required. +@(private) +GE_BASEPOINT_TABLE := Multiply_Table { + { + {62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585}, + {1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563}, + {301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142}, + {2, 0, 0, 0, 0}, + }, + { + {1519297034332653, 1098796920435767, 1823476547744119, 808144629470969, 2110930855619772}, + {338005982828284, 1667856962156925, 100399270107451, 1604566703601691, 1950338038771369}, + {1920505767731247, 1443759578976892, 1659852098357048, 1484431291070208, 275018744912646}, + {763163817085987, 2195095074806923, 2167883174351839, 1868059999999762, 911071066608705}, + }, + { + {960627541894068, 1314966688943942, 1126875971034044, 2059608312958945, 605975666152586}, + {1714478358025626, 2209607666607510, 1600912834284834, 496072478982142, 481970031861896}, + {851735079403194, 1088965826757164, 141569479297499, 602804610059257, 2004026468601520}, + {197585529552380, 324719066578543, 564481854250498, 1173818332764578, 35452976395676}, + }, + { + {1152980410747203, 2196804280851952, 25745194962557, 1915167295473129, 1266299690309224}, + {809905889679060, 979732230071345, 1509972345538142, 188492426534402, 818965583123815}, + {997685409185036, 1451818320876327, 2126681166774509, 2000509606057528, 235432372486854}, + {887734189279642, 1460338685162044, 877378220074262, 102436391401299, 153369156847490}, + }, + { + {2056621900836770, 1821657694132497, 1627986892909426, 1163363868678833, 1108873376459226}, + {1187697490593623, 1066539945237335, 885654531892000, 1357534489491782, 359370291392448}, + {1509033452137525, 1305318174298508, 613642471748944, 1987256352550234, 1044283663101541}, + {220105720697037, 387661783287620, 328296827867762, 360035589590664, 795213236824054}, + }, + { + {1820794733038396, 1612235121681074, 757405923441402, 1094031020892801, 231025333128907}, + {1639067873254194, 1484176557946322, 300800382144789, 1329915446659183, 1211704578730455}, + {641900794791527, 1711751746971612, 179044712319955, 576455585963824, 1852617592509865}, + {743549047192397, 685091042550147, 1952415336873496, 1965124675654685, 513364998442917}, + }, + { + {1004557076870448, 1762911374844520, 1330807633622723, 384072910939787, 953849032243810}, + {2178275058221458, 257933183722891, 376684351537894, 2010189102001786, 1981824297484148}, + {1332915663881114, 1286540505502549, 1741691283561518, 977214932156314, 1764059494778091}, + {429702949064027, 1368332611650677, 2019867176450999, 2212258376161746, 526160996742554}, + }, + { + {2098932988258576, 2203688382075948, 2120400160059479, 1748488020948146, 1203264167282624}, + {677131386735829, 1850249298025188, 672782146532031, 2144145693078904, 2088656272813787}, + {1065622343976192, 1573853211848116, 223560413590068, 333846833073379, 27832122205830}, + {1781008836504573, 917619542051793, 544322748939913, 882577394308384, 1720521246471195}, + }, + { + {660120928379860, 2081944024858618, 1878411111349191, 424587356517195, 2111317439894005}, + {1834193977811532, 1864164086863319, 797334633289424, 150410812403062, 2085177078466389}, + {1438117271371866, 783915531014482, 388731514584658, 292113935417795, 1945855002546714}, + {1678140823166658, 679103239148744, 614102761596238, 1052962498997885, 1863983323810390}, + }, + { + {1690309392496233, 1116333140326275, 1377242323631039, 717196888780674, 82724646713353}, + {1722370213432106, 74265192976253, 264239578448472, 1714909985012994, 2216984958602173}, + {2010482366920922, 1294036471886319, 566466395005815, 1631955803657320, 1751698647538458}, + {1073230604155753, 1159087041338551, 1664057985455483, 127472702826203, 1339591128522371}, + }, + { + {478053307175577, 2179515791720985, 21146535423512, 1831683844029536, 462805561553981}, + {1945267486565588, 1298536818409655, 2214511796262989, 1904981051429012, 252904800782086}, + {268945954671210, 222740425595395, 1208025911856230, 1080418823003555, 75929831922483}, + {1884784014268948, 643868448202966, 978736549726821, 46385971089796, 1296884812292320}, + }, + { + {1861159462859103, 7077532564710, 963010365896826, 1938780006785270, 766241051941647}, + {1778966986051906, 1713995999765361, 1394565822271816, 1366699246468722, 1213407027149475}, + {1978989286560907, 2135084162045594, 1951565508865477, 671788336314416, 293123929458176}, + {902608944504080, 2167765718046481, 1285718473078022, 1222562171329269, 492109027844479}, + }, + { + {1820807832746213, 1029220580458586, 1101997555432203, 1039081975563572, 202477981158221}, + {1866134980680205, 2222325502763386, 1830284629571201, 1046966214478970, 418381946936795}, + {1783460633291322, 1719505443254998, 1810489639976220, 877049370713018, 2187801198742619}, + {197118243000763, 305493867565736, 518814410156522, 1656246186645170, 901894734874934}, + }, + { + {225454942125915, 478410476654509, 600524586037746, 643450007230715, 1018615928259319}, + {1733330584845708, 881092297970296, 507039890129464, 496397090721598, 2230888519577628}, + {690155664737246, 1010454785646677, 753170144375012, 1651277613844874, 1622648796364156}, + {1321310321891618, 1089655277873603, 235891750867089, 815878279563688, 1709264240047556}, + }, + { + {805027036551342, 1387174275567452, 1156538511461704, 1465897486692171, 1208567094120903}, + {2228417017817483, 202885584970535, 2182114782271881, 2077405042592934, 1029684358182774}, + {460447547653983, 627817697755692, 524899434670834, 1228019344939427, 740684787777653}, + {849757462467675, 447476306919899, 422618957298818, 302134659227815, 675831828440895}, + }, +} + +ge_scalarmult :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) { + tmp: field.Non_Montgomery_Domain_Field_Element + field.fe_from_montgomery(&tmp, sc) + + _ge_scalarmult(ge, p, &tmp) + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +ge_scalarmult_basepoint :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) { + // Something like the comb method from "Fast and compact elliptic-curve + // cryptography" Section 3.3, would be more performant, but more + // complex. + // + // - https://eprint.iacr.org/2012/309 + ge_scalarmult(ge, &GE_BASEPOINT, sc) +} + +ge_scalarmult_vartime :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) { + tmp: field.Non_Montgomery_Domain_Field_Element + field.fe_from_montgomery(&tmp, sc) + + _ge_scalarmult(ge, p, &tmp, true) +} + +ge_double_scalarmult_basepoint_vartime :: proc "contextless" ( + ge: ^Group_Element, + a: ^Scalar, + A: ^Group_Element, + b: ^Scalar, +) { + // Strauss-Shamir, commonly referred to as the "Shamir trick", + // saves half the doublings, relative to doing this the naive way. + // + // ABGLSV-Pornin (https://eprint.iacr.org/2020/454) is faster, + // but significantly more complex, and has incompatibilities with + // mixed-order group elements. + + tmp_add: Add_Scratch = --- + tmp_addend: Addend_Group_Element = --- + tmp_dbl: Double_Scratch = --- + tmp: Group_Element = --- + + A_tbl: Multiply_Table = --- + mul_tbl_set(&A_tbl, A, &tmp_add) + + sc_a, sc_b: field.Non_Montgomery_Domain_Field_Element + field.fe_from_montgomery(&sc_a, a) + field.fe_from_montgomery(&sc_b, b) + + ge_identity(&tmp) + for i := 31; i >= 0; i = i - 1 { + limb := i / 8 + shift := uint(i & 7) * 8 + + limb_byte_a := sc_a[limb] >> shift + limb_byte_b := sc_b[limb] >> shift + + hi_a, lo_a := (limb_byte_a >> 4) & 0x0f, limb_byte_a & 0x0f + hi_b, lo_b := (limb_byte_b >> 4) & 0x0f, limb_byte_b & 0x0f + + if i != 31 { + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + } + mul_tbl_add(&tmp, &A_tbl, hi_a, &tmp_add, &tmp_addend, true) + mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, hi_b, &tmp_add, &tmp_addend, true) + + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + mul_tbl_add(&tmp, &A_tbl, lo_a, &tmp_add, &tmp_addend, true) + mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, lo_b, &tmp_add, &tmp_addend, true) + } + + ge_set(ge, &tmp) +} + +@(private) +_ge_scalarmult :: proc "contextless" ( + ge, p: ^Group_Element, + sc: ^field.Non_Montgomery_Domain_Field_Element, + unsafe_is_vartime := false, +) { + // Do the simplest possible thing that works and provides adequate, + // performance, which is windowed add-then-multiply. + + tmp_add: Add_Scratch = --- + tmp_addend: Addend_Group_Element = --- + tmp_dbl: Double_Scratch = --- + tmp: Group_Element = --- + + p_tbl: Multiply_Table = --- + mul_tbl_set(&p_tbl, p, &tmp_add) + + ge_identity(&tmp) + for i := 31; i >= 0; i = i - 1 { + limb := i / 8 + shift := uint(i & 7) * 8 + limb_byte := sc[limb] >> shift + + hi, lo := (limb_byte >> 4) & 0x0f, limb_byte & 0x0f + + if i != 31 { + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + } + mul_tbl_add(&tmp, &p_tbl, hi, &tmp_add, &tmp_addend, unsafe_is_vartime) + + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + mul_tbl_add(&tmp, &p_tbl, lo, &tmp_add, &tmp_addend, unsafe_is_vartime) + } + + ge_set(ge, &tmp) + + if !unsafe_is_vartime { + ge_clear(&tmp) + mem.zero_explicit(&tmp_add, size_of(Add_Scratch)) + mem.zero_explicit(&tmp_addend, size_of(Addend_Group_Element)) + mem.zero_explicit(&tmp_dbl, size_of(Double_Scratch)) + } +} + +@(private) +Multiply_Table :: [15]Addend_Group_Element // 0 = inf, which is implicit. + +@(private) +mul_tbl_set :: proc "contextless" ( + tbl: ^Multiply_Table, + ge: ^Group_Element, + tmp_add: ^Add_Scratch, +) { + tmp: Group_Element = --- + ge_set(&tmp, ge) + + ge_addend_set(&tbl[0], ge) + for i := 1; i < 15; i = i + 1 { + ge_add_addend(&tmp, &tmp, &tbl[0], tmp_add) + ge_addend_set(&tbl[i], &tmp) + } + + ge_clear(&tmp) +} + +@(private) +mul_tbl_add :: proc "contextless" ( + ge: ^Group_Element, + tbl: ^Multiply_Table, + idx: u64, + tmp_add: ^Add_Scratch, + tmp_addend: ^Addend_Group_Element, + unsafe_is_vartime: bool, +) { + // Variable time lookup, with the addition omitted entirely if idx == 0. + if unsafe_is_vartime { + // Skip adding the point at infinity. + if idx != 0 { + ge_add_addend(ge, ge, &tbl[idx - 1], tmp_add) + } + return + } + + // Constant time lookup. + tmp_addend^ = { + // Point at infinity (0, 1, 1, 0) in precomputed form + {1, 0, 0, 0, 0}, // y - x + {1, 0, 0, 0, 0}, // y + x + {0, 0, 0, 0, 0}, // t * 2d + {2, 0, 0, 0, 0}, // z * 2 + } + for i := u64(1); i < 16; i = i + 1 { + _, ctrl := bits.sub_u64(0, (i ~ idx), 0) + ge_addend_conditional_assign(tmp_addend, &tbl[i - 1], int(~ctrl) & 1) + } + ge_add_addend(ge, ge, tmp_addend, tmp_add) +} diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin index 64f9f8a1f..6b2d3b595 100644 --- a/core/crypto/_fiat/field_curve25519/field.odin +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -15,6 +15,20 @@ fe_tighten_cast :: #force_inline proc "contextless" ( return transmute(^Tight_Field_Element)(arg1) } +fe_clear :: proc "contextless" ( + arg1: $T, +) where T == ^Tight_Field_Element || T == ^Loose_Field_Element { + mem.zero_explicit(arg1, size_of(arg1^)) +} + +fe_clear_vec :: proc "contextless" ( + arg1: $T, +) where T == []^Tight_Field_Element || T == []^Loose_Field_Element { + for fe in arg1 { + fe_clear(fe) + } +} + fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) { // Ignore the unused bit by copying the input and masking the bit off // prior to deserialization. @@ -27,12 +41,25 @@ fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte mem.zero_explicit(&tmp1, size_of(tmp1)) } +fe_is_negative :: proc "contextless" (arg1: ^Tight_Field_Element) -> int { + tmp1: [32]byte = --- + + fe_to_bytes(&tmp1, arg1) + ret := tmp1[0] & 1 + + mem.zero_explicit(&tmp1, size_of(tmp1)) + + return int(ret) +} + fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int { - tmp2: [32]byte = --- + tmp1, tmp2: [32]byte = ---, --- + fe_to_bytes(&tmp1, arg1) fe_to_bytes(&tmp2, arg2) - ret := fe_equal_bytes(arg1, &tmp2) + ret := crypto.compare_constant_time(tmp1[:], tmp2[:]) + mem.zero_explicit(&tmp1, size_of(tmp1)) mem.zero_explicit(&tmp2, size_of(tmp2)) return ret @@ -67,25 +94,37 @@ fe_carry_pow2k :: proc "contextless" ( } } +fe_carry_add :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) { + fe_add(fe_relax_cast(out1), arg1, arg2) + fe_carry(out1, fe_relax_cast(out1)) +} + +fe_carry_sub :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) { + fe_sub(fe_relax_cast(out1), arg1, arg2) + fe_carry(out1, fe_relax_cast(out1)) +} + fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { fe_opp(fe_relax_cast(out1), arg1) fe_carry(out1, fe_relax_cast(out1)) } -fe_carry_invsqrt :: proc "contextless" ( +fe_carry_sqrt_ratio_m1 :: proc "contextless" ( out1: ^Tight_Field_Element, - arg1: ^Loose_Field_Element, + arg1: ^Loose_Field_Element, // u + arg2: ^Loose_Field_Element, // v ) -> int { - // Inverse square root taken from Monocypher. + // SQRT_RATIO_M1(u, v) from RFC 9496 - 4.2, based on the inverse + // square root from Monocypher. + w: Tight_Field_Element = --- + fe_carry_mul(&w, arg1, arg2) // u * v + + // r = tmp1 = u * w^((p-5)/8) tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, --- - - // t0 = x^((p-5)/8) - // Can be achieved with a simple double & add ladder, - // but it would be slower. - fe_carry_pow2k(&tmp1, arg1, 1) + fe_carry_pow2k(&tmp1, fe_relax_cast(&w), 1) fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2) - fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2)) + fe_carry_mul(&tmp2, fe_relax_cast(&w), fe_relax_cast(&tmp2)) fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2)) fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1) fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) @@ -104,48 +143,49 @@ fe_carry_invsqrt :: proc "contextless" ( fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50) fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2) - fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&w)) // w^((p-5)/8) - // quartic = x^((p-1)/4) - quartic := &tmp2 - fe_carry_square(quartic, fe_relax_cast(&tmp1)) - fe_carry_mul(quartic, fe_relax_cast(quartic), arg1) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) // u * w^((p-5)/8) - // Serialize quartic once to save on repeated serialization/sanitization. - quartic_buf: [32]byte = --- - fe_to_bytes(&quartic_buf, quartic) - check := &tmp3 + // Serialize `check` once to save on repeated serialization. + r, check := &tmp1, &tmp2 + b: [32]byte = --- + fe_carry_square(check, fe_relax_cast(r)) + fe_carry_mul(check, fe_relax_cast(check), arg2) // check * v + fe_to_bytes(&b, check) - fe_one(check) - p1 := fe_equal_bytes(check, &quartic_buf) - fe_carry_opp(check, check) - m1 := fe_equal_bytes(check, &quartic_buf) - fe_carry_opp(check, &SQRT_M1) - ms := fe_equal_bytes(check, &quartic_buf) + u, neg_u, neg_u_i := &tmp3, &w, check + fe_carry(u, arg1) + fe_carry_opp(neg_u, u) + fe_carry_mul(neg_u_i, fe_relax_cast(neg_u), fe_relax_cast(&FE_SQRT_M1)) - // if quartic == -1 or sqrt(-1) - // then isr = x^((p-1)/4) * sqrt(-1) - // else isr = x^((p-1)/4) - fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1)) - fe_cond_assign(out1, &tmp1, (m1 | ms) ~ 1) + correct_sign_sqrt := fe_equal_bytes(u, &b) + flipped_sign_sqrt := fe_equal_bytes(neg_u, &b) + flipped_sign_sqrt_i := fe_equal_bytes(neg_u_i, &b) - mem.zero_explicit(&tmp1, size_of(tmp1)) - mem.zero_explicit(&tmp2, size_of(tmp2)) - mem.zero_explicit(&tmp3, size_of(tmp3)) - mem.zero_explicit(&quartic_buf, size_of(quartic_buf)) + r_prime := check + fe_carry_mul(r_prime, fe_relax_cast(r), fe_relax_cast(&FE_SQRT_M1)) + fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i) - return p1 | m1 + // Pick the non-negative square root. + fe_carry_opp(r_prime, r) + fe_cond_select(out1, r, r_prime, fe_is_negative(r)) + + fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3}) + mem.zero_explicit(&b, size_of(b)) + + return correct_sign_sqrt | flipped_sign_sqrt } fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { tmp1: Tight_Field_Element fe_carry_square(&tmp1, arg1) - _ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1)) + _ = fe_carry_sqrt_ratio_m1(&tmp1, fe_relax_cast(&FE_ONE), fe_relax_cast(&tmp1)) fe_carry_square(&tmp1, fe_relax_cast(&tmp1)) fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1) - mem.zero_explicit(&tmp1, size_of(tmp1)) + fe_clear(&tmp1) } fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { @@ -196,3 +236,21 @@ fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_El out1[3], out2[3] = x4, y4 out1[4], out2[4] = x5, y5 } + +@(optimization_mode = "none") +fe_cond_select :: #force_no_inline proc "contextless" ( + out1, arg1, arg2: $T, + arg3: int, +) where T == ^Tight_Field_Element || T == ^Loose_Field_Element { + mask := (u64(arg3) * 0xffffffffffffffff) + x1 := ((mask & arg2[0]) | ((~mask) & arg1[0])) + x2 := ((mask & arg2[1]) | ((~mask) & arg1[1])) + x3 := ((mask & arg2[2]) | ((~mask) & arg1[2])) + x4 := ((mask & arg2[3]) | ((~mask) & arg1[3])) + x5 := ((mask & arg2[4]) | ((~mask) & arg1[4])) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index 81dca19e2..d039bd411 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -42,7 +42,10 @@ import "core:math/bits" Loose_Field_Element :: distinct [5]u64 Tight_Field_Element :: distinct [5]u64 -SQRT_M1 := Tight_Field_Element { +FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0} +FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0} + +FE_SQRT_M1 := Tight_Field_Element { 1718705420411056, 234908883556509, 2233514472574048, diff --git a/core/crypto/_fiat/field_scalar25519/field.odin b/core/crypto/_fiat/field_scalar25519/field.odin index c741d30cf..9b40661b7 100644 --- a/core/crypto/_fiat/field_scalar25519/field.odin +++ b/core/crypto/_fiat/field_scalar25519/field.odin @@ -20,6 +20,10 @@ _TWO_336 := Montgomery_Domain_Field_Element { 0x3d217f5be65cb5c, } +fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) { + mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element)) +} + fe_from_bytes :: proc "contextless" ( out1: ^Montgomery_Domain_Field_Element, arg1: ^[32]byte, @@ -85,7 +89,7 @@ fe_from_bytes_wide :: proc "contextless" ( fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336 fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336 - mem.zero_explicit(&tmp, size_of(tmp)) + fe_clear(&tmp) } @(private) @@ -125,7 +129,7 @@ fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> // which will be 1. _, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0) - mem.zero_explicit(&tmp, size_of(tmp)) + fe_clear(&tmp) return int(borrow) } diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin index 3cd247cf8..f8a301810 100644 --- a/core/crypto/x25519/x25519.odin +++ b/core/crypto/x25519/x25519.odin @@ -94,13 +94,8 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) { field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2)) field.fe_to_bytes(out, &x2) - mem.zero_explicit(&x1, size_of(x1)) - mem.zero_explicit(&x2, size_of(x2)) - mem.zero_explicit(&x3, size_of(x3)) - mem.zero_explicit(&z2, size_of(z2)) - mem.zero_explicit(&z3, size_of(z3)) - mem.zero_explicit(&t0, size_of(t0)) - mem.zero_explicit(&t1, size_of(t1)) + field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3}) + field.fe_clear_vec([]^field.Loose_Field_Element{&t0, &t1}) } // scalarmult "multiplies" the provided scalar and point, and writes the @@ -137,6 +132,5 @@ scalarmult :: proc(dst, scalar, point: []byte) { // scalarmult_basepoint "multiplies" the provided scalar with the X25519 // base point and writes the resulting point to dst. scalarmult_basepoint :: proc(dst, scalar: []byte) { - // TODO/perf: Switch to using a precomputed table. scalarmult(dst, scalar, _BASE_POINT[:]) } From d96f8bb5c1f5f7b24a23383f88c1b9a637b586b2 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Tue, 26 Mar 2024 13:05:50 +0900 Subject: [PATCH 12/14] core/crypto/ristretto255: Initial import --- core/crypto/_fiat/field_curve25519/field.odin | 15 +- core/crypto/ristretto255/ristretto255.odin | 510 ++++++++++++++++++ .../ristretto255/ristretto255_scalar.odin | 97 ++++ examples/all/all_main.odin | 2 + 4 files changed, 622 insertions(+), 2 deletions(-) create mode 100644 core/crypto/ristretto255/ristretto255.odin create mode 100644 core/crypto/ristretto255/ristretto255_scalar.odin diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin index 6b2d3b595..8a8202ac4 100644 --- a/core/crypto/_fiat/field_curve25519/field.odin +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -109,6 +109,10 @@ fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Eleme fe_carry(out1, fe_relax_cast(out1)) } +fe_carry_abs :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { + fe_cond_negate(out1, arg1, fe_is_negative(arg1)) +} + fe_carry_sqrt_ratio_m1 :: proc "contextless" ( out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, // u @@ -168,8 +172,7 @@ fe_carry_sqrt_ratio_m1 :: proc "contextless" ( fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i) // Pick the non-negative square root. - fe_carry_opp(r_prime, r) - fe_cond_select(out1, r, r_prime, fe_is_negative(r)) + fe_carry_abs(out1, r) fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3}) mem.zero_explicit(&b, size_of(b)) @@ -254,3 +257,11 @@ fe_cond_select :: #force_no_inline proc "contextless" ( out1[3] = x4 out1[4] = x5 } + +fe_cond_negate :: proc "contextless" (out1, arg1: ^Tight_Field_Element, ctrl: int) { + tmp1: Tight_Field_Element = --- + fe_carry_opp(&tmp1, arg1) + fe_cond_select(out1, arg1, &tmp1, ctrl) + + fe_clear(&tmp1) +} diff --git a/core/crypto/ristretto255/ristretto255.odin b/core/crypto/ristretto255/ristretto255.odin new file mode 100644 index 000000000..d1f2b6ee5 --- /dev/null +++ b/core/crypto/ristretto255/ristretto255.odin @@ -0,0 +1,510 @@ +/* +package ristretto255 implement the ristretto255 prime-order group. + +See: +- https://www.rfc-editor.org/rfc/rfc9496 +*/ +package ristretto255 + +import grp "core:crypto/_edwards25519" +import field "core:crypto/_fiat/field_curve25519" +import "core:mem" + +// ELEMENT_SIZE is the size of a byte-encoded ristretto255 group element. +ELEMENT_SIZE :: 32 +// WIDE_ELEMENT_SIZE is the side of a wide byte-encoded ristretto255 +// group element. +WIDE_ELEMENT_SIZE :: 64 + +@(private) +FE_NEG_ONE := field.Tight_Field_Element { + 2251799813685228, + 2251799813685247, + 2251799813685247, + 2251799813685247, + 2251799813685247, +} +@(private) +FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element { + 278908739862762, + 821645201101625, + 8113234426968, + 1777959178193151, + 2118520810568447, +} +@(private) +FE_ONE_MINUS_D_SQ := field.Tight_Field_Element { + 1136626929484150, + 1998550399581263, + 496427632559748, + 118527312129759, + 45110755273534, +} +@(private) +FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element { + 1507062230895904, + 1572317787530805, + 683053064812840, + 317374165784489, + 1572899562415810, +} +@(private) +FE_SQRT_AD_MINUS_ONE := field.Tight_Field_Element { + 2241493124984347, + 425987919032274, + 2207028919301688, + 1220490630685848, + 974799131293748, +} +@(private) +GE_IDENTITY := Group_Element{grp.GE_IDENTITY, true} + +// Group_Element is a ristretto255 group element. The zero-initialized +// value is invalid. +Group_Element :: struct { + // WARNING: While the internal representation is an Edwards25519 + // group element, this is not guaranteed to always be the case, + // and your code *WILL* break if you mess with `_p`. + _p: grp.Group_Element, + _is_initialized: bool, +} + +// ge_clear clears ge to the uninitialized state. +ge_clear :: proc "contextless" (ge: ^Group_Element) { + mem.zero_explicit(ge, size_of(Group_Element)) +} + +// ge_set sets `ge = a`. +ge_set :: proc(ge, a: ^Group_Element) { + _ge_assert_initialized([]^Group_Element{a}) + + grp.ge_set(&ge._p, &a._p) + ge._is_initialized = true +} + +// ge_identity sets ge to the identity (neutral) element. +ge_identity :: proc "contextless" (ge: ^Group_Element) { + grp.ge_identity(&ge._p) + ge._is_initialized = true +} + +// ge_generator sets ge to the group generator. +ge_generator :: proc "contextless" (ge: ^Group_Element) { + grp.ge_generator(&ge._p) + ge._is_initialized = true +} + +// ge_set_bytes sets ge to the result of decoding b as a ristretto255 +// group element, and returns true on success. +@(require_results) +ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool { + // 1. Interpret the string as an unsigned integer s in little-endian + // representation. If the length of the string is not 32 bytes or + // if the resulting value is >= p, decoding fails. + // + // 2. If IS_NEGATIVE(s) returns TRUE, decoding fails. + + if len(b) != ELEMENT_SIZE { + return false + } + if b[31] & 128 != 0 || b[0] & 1 != 0 { + // Fail early if b is clearly > p, or negative. + return false + } + + b_ := transmute(^[32]byte)(raw_data(b)) + + s: field.Tight_Field_Element = --- + defer field.fe_clear(&s) + + field.fe_from_bytes(&s, b_) + if field.fe_equal_bytes(&s, b_) != 1 { + // Reject non-canonical encodings of s. + return false + } + + // 3. Process s as follows: + v, u1, u2: field.Loose_Field_Element = ---, ---, --- + tmp, u2_sqr: field.Tight_Field_Element = ---, --- + + // ss = s^2 + // u1 = 1 - ss + // u2 = 1 + ss + // u2_sqr = u2^2 + field.fe_carry_square(&tmp, field.fe_relax_cast(&s)) + field.fe_sub(&u1, &field.FE_ONE, &tmp) + field.fe_add(&u2, &field.FE_ONE, &tmp) + field.fe_carry_square(&u2_sqr, &u2) + + // v = -(D * u1^2) - u2_sqr + field.fe_carry_square(&tmp, &u1) + field.fe_carry_mul(&tmp, field.fe_relax_cast(&grp.FE_D), field.fe_relax_cast(&tmp)) + field.fe_carry_add(&tmp, &tmp, &u2_sqr) + field.fe_opp(&v, &tmp) + + // (was_square, invsqrt) = SQRT_RATIO_M1(1, v * u2_sqr) + field.fe_carry_mul(&tmp, &v, field.fe_relax_cast(&u2_sqr)) + was_square := field.fe_carry_sqrt_ratio_m1( + &tmp, + field.fe_relax_cast(&field.FE_ONE), + field.fe_relax_cast(&tmp), + ) + + // den_x = invsqrt * u2 + // den_y = invsqrt * den_x * v + x, y, t: field.Tight_Field_Element = ---, ---, --- + field.fe_carry_mul(&x, field.fe_relax_cast(&tmp), &u2) + field.fe_carry_mul(&y, field.fe_relax_cast(&tmp), field.fe_relax_cast(&x)) + field.fe_carry_mul(&y, field.fe_relax_cast(&y), &v) + + // x = CT_ABS(2 * s * den_x) + field.fe_carry_mul(&x, field.fe_relax_cast(&s), field.fe_relax_cast(&x)) + field.fe_carry_add(&x, &x, &x) + field.fe_carry_abs(&x, &x) + + // y = u1 * den_y + field.fe_carry_mul(&y, &u1, field.fe_relax_cast(&y)) + + // t = x * y + field.fe_carry_mul(&t, field.fe_relax_cast(&x), field.fe_relax_cast(&y)) + + field.fe_clear_vec([]^field.Loose_Field_Element{&v, &u1, &u2}) + field.fe_clear_vec([]^field.Tight_Field_Element{&tmp, &u2_sqr}) + defer field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &t}) + + // 4. If was_square is FALSE, IS_NEGATIVE(t) returns TRUE, or y = 0, + // decoding fails. Otherwise, return the group element represented + // by the internal representation (x, y, 1, t) as the result of + // decoding. + + switch { + case was_square == 0: + // Not sure why the RFC doesn't have this just fail early. + return false + case field.fe_is_negative(&t) != 0: + return false + case field.fe_equal(&y, &field.FE_ZERO) != 0: + return false + } + + field.fe_set(&ge._p.x, &x) + field.fe_set(&ge._p.y, &y) + field.fe_one(&ge._p.z) + field.fe_set(&ge._p.t, &t) + ge._is_initialized = true + + return true +} + +// ge_set_wide_bytes sets ge to the result of deriving a ristretto255 +// group element, from a wide (512-bit) byte string. +ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) { + if len(b) != WIDE_ELEMENT_SIZE { + panic("crypto/ristretto255: invalid wide input size") + } + + // The element derivation function on an input string b proceeds as + // follows: + // + // 1. Compute P1 as MAP(b[0:32]). + // 2. Compute P2 as MAP(b[32:64]). + // 3. Return P1 + P2. + + p1, p2: Group_Element = ---, --- + ge_map(&p1, b[0:32]) + ge_map(&p2, b[32:64]) + + ge_add(ge, &p1, &p2) + + ge_clear(&p1) + ge_clear(&p2) +} + +// ge_bytes sets dst to the canonical encoding of ge. +ge_bytes :: proc(ge: ^Group_Element, dst: []byte) { + _ge_assert_initialized([]^Group_Element{ge}) + if len(dst) != ELEMENT_SIZE { + panic("crypto/ristretto255: invalid destination size") + } + + x0, y0, z0, t0 := &ge._p.x, &ge._p.y, &ge._p.z, &ge._p.t + + // 1. Process the internal representation into a field element s as + // follows: + + // u1 = (z0 + y0) * (z0 - y0) + // u2 = x0 * y0 + u1, u2: field.Tight_Field_Element = ---, --- + tmp1, tmp2: field.Loose_Field_Element = ---, --- + field.fe_add(&tmp1, z0, y0) + field.fe_sub(&tmp2, z0, y0) + field.fe_carry_mul(&u1, &tmp1, &tmp2) + field.fe_carry_mul(&u2, field.fe_relax_cast(x0), field.fe_relax_cast(y0)) + + // Ignore was_square since this is always square. + // (_, invsqrt) = SQRT_RATIO_M1(1, u1 * u2^2) + tmp: field.Tight_Field_Element = --- + field.fe_carry_square(&tmp, field.fe_relax_cast(&u2)) + field.fe_carry_mul(&tmp, field.fe_relax_cast(&u1), field.fe_relax_cast(&tmp)) + _ = field.fe_carry_sqrt_ratio_m1( + &tmp, + field.fe_relax_cast(&field.FE_ONE), + field.fe_relax_cast(&tmp), + ) + + // den1 = invsqrt * u1 + // den2 = invsqrt * u2 + // z_inv = den1 * den2 * t0 + den1, den2 := &u1, &u2 + z_inv: field.Tight_Field_Element = --- + field.fe_carry_mul(den1, field.fe_relax_cast(&tmp), field.fe_relax_cast(&u1)) + field.fe_carry_mul(den2, field.fe_relax_cast(&tmp), field.fe_relax_cast(&u2)) + field.fe_carry_mul(&z_inv, field.fe_relax_cast(den1), field.fe_relax_cast(den2)) + field.fe_carry_mul(&z_inv, field.fe_relax_cast(&z_inv), field.fe_relax_cast(t0)) + + // rotate = IS_NEGATIVE(t0 * z_inv) + // Note: Reordered from the RFC because invsqrt is no longer needed. + field.fe_carry_mul(&tmp, field.fe_relax_cast(t0), field.fe_relax_cast(&z_inv)) + rotate := field.fe_is_negative(&tmp) + + // ix0 = x0 * SQRT_M1 + // iy0 = y0 * SQRT_M1 + // enchanted_denominator = den1 * INVSQRT_A_MINUS_D + ix0, iy0: field.Tight_Field_Element = ---, --- + field.fe_carry_mul(&ix0, field.fe_relax_cast(x0), field.fe_relax_cast(&field.FE_SQRT_M1)) + field.fe_carry_mul(&iy0, field.fe_relax_cast(y0), field.fe_relax_cast(&field.FE_SQRT_M1)) + field.fe_carry_mul(&tmp, field.fe_relax_cast(den1), field.fe_relax_cast(&FE_INVSQRT_A_MINUS_D)) + + // Conditionally rotate x and y. + // x = CT_SELECT(iy0 IF rotate ELSE x0) + // y = CT_SELECT(ix0 IF rotate ELSE y0) + // z = z0 + // den_inv = CT_SELECT(enchanted_denominator IF rotate ELSE den2) + x, y: field.Tight_Field_Element = ---, --- + field.fe_cond_select(&x, x0, &iy0, rotate) + field.fe_cond_select(&y, y0, &ix0, rotate) + field.fe_cond_select(&tmp, den2, &tmp, rotate) + + // y = CT_SELECT(-y IF IS_NEGATIVE(x * z_inv) ELSE y) + field.fe_carry_mul(&x, field.fe_relax_cast(&x), field.fe_relax_cast(&z_inv)) + field.fe_cond_negate(&y, &y, field.fe_is_negative(&x)) + + // s = CT_ABS(den_inv * (z - y)) + field.fe_sub(&tmp1, z0, &y) + field.fe_carry_mul(&tmp, field.fe_relax_cast(&tmp), &tmp1) + field.fe_carry_abs(&tmp, &tmp) + + // 2. Return the 32-byte little-endian encoding of s. More + // specifically, this is the encoding of the canonical + // representation of s as an integer between 0 and p-1, inclusive. + dst_ := transmute(^[32]byte)(raw_data(dst)) + field.fe_to_bytes(dst_, &tmp) + + field.fe_clear_vec([]^field.Tight_Field_Element{&u1, &u2, &tmp, &z_inv, &ix0, &iy0, &x, &y}) + field.fe_clear_vec([]^field.Loose_Field_Element{&tmp1, &tmp2}) +} + +// ge_add sets `ge = a + b`. +ge_add :: proc(ge, a, b: ^Group_Element) { + _ge_assert_initialized([]^Group_Element{a, b}) + + grp.ge_add(&ge._p, &a._p, &b._p) + ge._is_initialized = true +} + +// ge_double sets `ge = a + a`. +ge_double :: proc(ge, a: ^Group_Element) { + _ge_assert_initialized([]^Group_Element{a}) + + grp.ge_double(&ge._p, &a._p) + ge._is_initialized = true +} + +// ge_negate sets `ge = -a`. +ge_negate :: proc(ge, a: ^Group_Element) { + _ge_assert_initialized([]^Group_Element{a}) + + grp.ge_negate(&ge._p, &a._p) + ge._is_initialized = true +} + +// ge_scalarmult sets `ge = A * sc`. +ge_scalarmult :: proc(ge, A: ^Group_Element, sc: ^Scalar) { + _ge_assert_initialized([]^Group_Element{A}) + + grp.ge_scalarmult(&ge._p, &A._p, sc) + ge._is_initialized = true +} + +// ge_scalarmult_generator sets `ge = G * sc` +ge_scalarmult_generator :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) { + grp.ge_scalarmult_basepoint(&ge._p, sc) + ge._is_initialized = true +} + +// ge_scalarmult_vartime sets `ge = A * sc` in variable time. +ge_scalarmult_vartime :: proc(ge, A: ^Group_Element, sc: ^Scalar) { + _ge_assert_initialized([]^Group_Element{A}) + + grp.ge_scalarmult_vartime(&ge._p, &A._p, sc) + ge._is_initialized = true +} + +// ge_double_scalarmult_generator_vartime sets `ge = A * a + G * b` in variable +// time. +ge_double_scalarmult_generator_vartime :: proc( + ge: ^Group_Element, + a: ^Scalar, + A: ^Group_Element, + b: ^Scalar, +) { + _ge_assert_initialized([]^Group_Element{A}) + + grp.ge_double_scalarmult_basepoint_vartime(&ge._p, a, &A._p, b) + ge._is_initialized = true +} + +// ge_cond_negate sets `ge = a` iff `ctrl == 0` and `ge = -a` iff `ctrl == 1`. +// Behavior for all other values of ctrl are undefined, +ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) { + _ge_assert_initialized([]^Group_Element{a}) + + grp.ge_cond_negate(&ge._p, &a._p, ctrl) + ge._is_initialized = true +} + +// ge_cond_assign sets `ge = ge` iff `ctrl == 0` and `ge = a` iff `ctrl == 1`. +// Behavior for all other values of ctrl are undefined, +ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) { + _ge_assert_initialized([]^Group_Element{ge, a}) + + grp.ge_cond_assign(&ge._p, &a._p, ctrl) +} + +// ge_cond_select sets `ge = a` iff `ctrl == 0` and `ge = b` iff `ctrl == 1`. +// Behavior for all other values of ctrl are undefined, +ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) { + _ge_assert_initialized([]^Group_Element{a, b}) + + grp.ge_cond_select(&ge._p, &a._p, &b._p, ctrl) + ge._is_initialized = true +} + +// ge_equal returns 1 iff `a == b`, and 0 otherwise. +@(require_results) +ge_equal :: proc(a, b: ^Group_Element) -> int { + _ge_assert_initialized([]^Group_Element{a, b}) + + // CT_EQ(x1 * y2, y1 * x2) | CT_EQ(y1 * y2, x1 * x2) + ax_by, ay_bx, ay_by, ax_bx: field.Tight_Field_Element = ---, ---, ---, --- + field.fe_carry_mul(&ax_by, field.fe_relax_cast(&a._p.x), field.fe_relax_cast(&b._p.y)) + field.fe_carry_mul(&ay_bx, field.fe_relax_cast(&a._p.y), field.fe_relax_cast(&b._p.x)) + field.fe_carry_mul(&ay_by, field.fe_relax_cast(&a._p.y), field.fe_relax_cast(&b._p.y)) + field.fe_carry_mul(&ax_bx, field.fe_relax_cast(&a._p.x), field.fe_relax_cast(&b._p.x)) + + ret := field.fe_equal(&ax_by, &ay_bx) | field.fe_equal(&ay_by, &ax_bx) + + field.fe_clear_vec([]^field.Tight_Field_Element{&ax_by, &ay_bx, &ay_by, &ax_bx}) + + return ret +} + +// ge_is_identity returns 1 iff `ge` is the identity element, and 0 otherwise. +@(require_results) +ge_is_identity :: proc(ge: ^Group_Element) -> int { + return ge_equal(ge, &GE_IDENTITY) +} + +@(private) +ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) { + b_ := transmute(^[32]byte)(raw_data(b)) + + // The MAP function is defined on 32-byte strings as: + // + // 1. Mask the most significant bit in the final byte of the string, + // and interpret the string as an unsigned integer r in little- + // endian representation. Reduce r modulo p to obtain a field + // element t. + // * Masking the most significant bit is equivalent to interpreting + // the whole string as an unsigned integer in little-endian + // representation and then reducing it modulo 2^255. + t: field.Tight_Field_Element = --- + field.fe_from_bytes(&t, b_) + + // 2. Process t as follows: + // + // r = SQRT_M1 * t^2 + // u = (r + 1) * ONE_MINUS_D_SQ + // v = (-1 - r*D) * (r + D) + tmp1: field.Loose_Field_Element = --- + r, u, v: field.Tight_Field_Element = ---, ---, --- + + field.fe_carry_square(&r, field.fe_relax_cast(&t)) + field.fe_carry_mul(&r, field.fe_relax_cast(&field.FE_SQRT_M1), field.fe_relax_cast(&r)) + + field.fe_add(&tmp1, &field.FE_ONE, &r) + field.fe_carry_mul(&u, &tmp1, field.fe_relax_cast(&FE_ONE_MINUS_D_SQ)) + + field.fe_carry_mul(&v, field.fe_relax_cast(&r), field.fe_relax_cast(&grp.FE_D)) + field.fe_carry_add(&v, &field.FE_ONE, &v) + field.fe_carry_opp(&v, &v) + field.fe_add(&tmp1, &r, &grp.FE_D) + field.fe_carry_mul(&v, field.fe_relax_cast(&v), &tmp1) + + // (was_square, s) = SQRT_RATIO_M1(u, v) + // s_prime = -CT_ABS(s*t) + // s = CT_SELECT(s IF was_square ELSE s_prime) + // c = CT_SELECT(-1 IF was_square ELSE r) + s, s_prime, c: field.Tight_Field_Element = ---, ---, --- + was_square := field.fe_carry_sqrt_ratio_m1( + &s, + field.fe_relax_cast(&u), + field.fe_relax_cast(&v), + ) + field.fe_carry_mul(&s_prime, field.fe_relax_cast(&s), field.fe_relax_cast(&t)) + field.fe_carry_abs(&s_prime, &s_prime) + field.fe_carry_opp(&s_prime, &s_prime) + field.fe_cond_select(&s, &s_prime, &s, was_square) + field.fe_cond_select(&c, &r, &FE_NEG_ONE, was_square) + + // N = c * (r - 1) * D_MINUS_ONE_SQ - v + N: field.Tight_Field_Element = --- + field.fe_sub(&tmp1, &r, &field.FE_ONE) + field.fe_carry_mul(&N, field.fe_relax_cast(&c), &tmp1) + field.fe_carry_mul(&N, field.fe_relax_cast(&N), field.fe_relax_cast(&FE_D_MINUS_ONE_SQUARED)) + field.fe_carry_sub(&N, &N, &v) + + // w0 = 2 * s * v + // w1 = N * SQRT_AD_MINUS_ONE + // w2 = 1 - s^2 + // w3 = 1 + s^2 + w0, w1: field.Tight_Field_Element = ---, --- + w2, w3: field.Loose_Field_Element = ---, --- + field.fe_carry_mul(&w0, field.fe_relax_cast(&s), field.fe_relax_cast(&v)) + field.fe_carry_add(&w0, &w0, &w0) + field.fe_carry_mul(&w1, field.fe_relax_cast(&N), field.fe_relax_cast(&FE_SQRT_AD_MINUS_ONE)) + field.fe_carry_square(&s, field.fe_relax_cast(&s)) + field.fe_sub(&w2, &field.FE_ONE, &s) + field.fe_add(&w3, &field.FE_ONE, &s) + + // 3. Return the group element represented by the internal + // representation (w0*w3, w2*w1, w1*w3, w0*w2). + + field.fe_carry_mul(&ge._p.x, field.fe_relax_cast(&w0), &w3) + field.fe_carry_mul(&ge._p.y, &w2, field.fe_relax_cast(&w1)) + field.fe_carry_mul(&ge._p.z, field.fe_relax_cast(&w1), &w3) + field.fe_carry_mul(&ge._p.t, field.fe_relax_cast(&w0), &w2) + ge._is_initialized = true + + field.fe_clear_vec([]^field.Tight_Field_Element{&r, &u, &v, &s, &s_prime, &c, &N, &w0, &w1}) + field.fe_clear_vec([]^field.Loose_Field_Element{&tmp1, &w2, &w3}) +} + +@(private) +_ge_assert_initialized :: proc(ges: []^Group_Element) { + for ge in ges { + if !ge._is_initialized { + panic("crypto/ristretto255: uninitialized group element") + } + } +} diff --git a/core/crypto/ristretto255/ristretto255_scalar.odin b/core/crypto/ristretto255/ristretto255_scalar.odin new file mode 100644 index 000000000..f581e5963 --- /dev/null +++ b/core/crypto/ristretto255/ristretto255_scalar.odin @@ -0,0 +1,97 @@ +package ristretto255 + +import grp "core:crypto/_edwards25519" + +// SCALAR_SIZE is the size of a byte-encoded ristretto255 scalar. +SCALAR_SIZE :: 32 +// WIDE_SCALAR_SIZE is the size of a wide byte-encoded ristretto255 +// scalar. +WIDE_SCALAR_SIZE :: 64 + +// Scalar is a ristretto255 scalar. The zero-initialized value is valid, +// and represents `0`. +Scalar :: grp.Scalar + +// sc_clear clears sc to the uninitialized state. +sc_clear :: proc "contextless" (sc: ^Scalar) { + grp.sc_clear(sc) +} + +// sc_set sets `sc = a`. +sc_set :: proc "contextless" (sc, a: ^Scalar) { + grp.sc_set(sc, a) +} + +// sc_set_u64 sets `sc = i`. +sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) { + grp.sc_set_u64(sc, i) +} + +// sc_set_bytes sets sc to the result of decoding b as a ristretto255 +// scalar, and returns true on success. +@(require_results) +sc_set_bytes :: proc(sc: ^Scalar, b: []byte) -> bool { + if len(b) != SCALAR_SIZE { + return false + } + + return grp.sc_set_bytes(sc, b) +} + +// sc_set_wide_bytes sets sc to the result of deriving a ristretto255 +// scalar, from a wide (512-bit) byte string by interpreting b as a +// little-endian value, and reducing it mod the group order. +sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) { + if len(b) != WIDE_SCALAR_SIZE { + panic("crypto/ristretto255: invalid wide input size") + } + + b_ := transmute(^[WIDE_SCALAR_SIZE]byte)(raw_data(b)) + grp.sc_set_bytes_wide(sc, b_) +} + +// sc_bytes sets dst to the canonical encoding of sc. +sc_bytes :: proc(sc: ^Scalar, dst: []byte) { + if len(dst) != SCALAR_SIZE { + panic("crypto/ristretto255: invalid destination size") + } + + grp.sc_bytes(dst, sc) +} + +// sc_add sets `sc = a + b`. +sc_add :: proc "contextless" (sc, a, b: ^Scalar) { + grp.sc_add(sc, a, b) +} + +// sc_sub sets `sc = a - b`. +sc_sub :: proc "contextless" (sc, a, b: ^Scalar) { + grp.sc_sub(sc, a, b) +} + +// sc_negate sets `sc = -a`. +sc_negate :: proc "contextless" (sc, a: ^Scalar) { + grp.sc_negate(sc, a) +} + +// sc_mul sets `sc = a * b`. +sc_mul :: proc "contextless" (sc, a, b: ^Scalar) { + grp.sc_mul(sc, a, b) +} + +// sc_square sets `sc = a^2`. +sc_square :: proc "contextless" (sc, a: ^Scalar) { + grp.sc_square(sc, a) +} + +// sc_cond_assign sets `sc = sc` iff `ctrl == 0` and `sc = a` iff `ctrl == 1`. +// Behavior for all other values of ctrl are undefined, +sc_cond_assign :: proc(sc, a: ^Scalar, ctrl: int) { + grp.sc_cond_assign(sc, a, ctrl) +} + +// sc_equal returns 1 iff `a == b`, and 0 otherwise. +@(require_results) +sc_equal :: proc(a, b: ^Scalar) -> int { + return grp.sc_equal(a, b) +} diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index bc1aff607..cc0005840 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -37,6 +37,7 @@ import md5 "core:crypto/legacy/md5" import sha1 "core:crypto/legacy/sha1" import pbkdf2 "core:crypto/pbkdf2" import poly1305 "core:crypto/poly1305" +import ristretto255 "core:crypto/ristretto255" import sha2 "core:crypto/sha2" import sha3 "core:crypto/sha3" import shake "core:crypto/shake" @@ -158,6 +159,7 @@ _ :: keccak _ :: md5 _ :: pbkdf2 _ :: poly1305 +_ :: ristretto255 _ :: sha1 _ :: sha2 _ :: sha3 From 893c3bef9a45fd58da38a11daa8ec9b0c6c323fe Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Fri, 5 Apr 2024 20:03:46 +0900 Subject: [PATCH 13/14] core/crypto/ed25519: Initial import --- core/crypto/ed25519/ed25519.odin | 314 +++++++++++++++++++++++++++++++ examples/all/all_main.odin | 2 + 2 files changed, 316 insertions(+) create mode 100644 core/crypto/ed25519/ed25519.odin diff --git a/core/crypto/ed25519/ed25519.odin b/core/crypto/ed25519/ed25519.odin new file mode 100644 index 000000000..86da35669 --- /dev/null +++ b/core/crypto/ed25519/ed25519.odin @@ -0,0 +1,314 @@ +/* +package ed25519 implements the Ed25519 EdDSA signature algorithm. + +See: +- https://datatracker.ietf.org/doc/html/rfc8032 +- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf +- https://eprint.iacr.org/2020/1244.pdf +*/ +package ed25519 + +import "core:crypto" +import grp "core:crypto/_edwards25519" +import "core:crypto/sha2" +import "core:mem" + +// PRIVATE_KEY_SIZE is the byte-encoded private key size. +PRIVATE_KEY_SIZE :: 32 +// PUBLIC_KEY_SIZE is the byte-encoded public key size. +PUBLIC_KEY_SIZE :: 32 +// SIGNATURE_SIZE is the byte-encoded signature size. +SIGNATURE_SIZE :: 64 + +@(private) +NONCE_SIZE :: 32 + +// Private_Key is an Ed25519 private key. +Private_Key :: struct { + // WARNING: All of the members are to be treated as internal (ie: + // the Private_Key structure is intended to be opaque). There are + // subtle vulnerabilities that can be introduced if the internal + // values are allowed to be altered. + // + // See: https://github.com/MystenLabs/ed25519-unsafe-libs + _b: [PRIVATE_KEY_SIZE]byte, + _s: grp.Scalar, + _nonce: [NONCE_SIZE]byte, + _pub_key: Public_Key, + _is_initialized: bool, +} + +// Public_Key is an Ed25519 public key. +Public_Key :: struct { + // WARNING: All of the members are to be treated as internal (ie: + // the Public_Key structure is intended to be opaque). + _b: [PUBLIC_KEY_SIZE]byte, + _neg_A: grp.Group_Element, + _is_valid: bool, + _is_initialized: bool, +} + +// private_key_set_bytes decodes a byte-encoded private key, and returns +// true iff the operation was successful. +private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool { + if len(b) != PRIVATE_KEY_SIZE { + return false + } + + // Derive the private key. + ctx: sha2.Context_512 = --- + h_bytes: [sha2.DIGEST_SIZE_512]byte = --- + sha2.init_512(&ctx) + sha2.update(&ctx, b) + sha2.final(&ctx, h_bytes[:]) + + copy(priv_key._b[:], b) + copy(priv_key._nonce[:], h_bytes[32:]) + grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32]) + + // Derive the corresponding public key. + A: grp.Group_Element = --- + grp.ge_scalarmult_basepoint(&A, &priv_key._s) + grp.ge_bytes(&A, priv_key._pub_key._b[:]) + grp.ge_negate(&priv_key._pub_key._neg_A, &A) + priv_key._pub_key._is_valid = !grp.ge_is_small_order(&A) + priv_key._pub_key._is_initialized = true + + priv_key._is_initialized = true + + return true +} + +// private_key_bytes sets dst to byte-encoding of priv_key. +private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) { + if !priv_key._is_initialized { + panic("crypto/ed25519: uninitialized private key") + } + if len(dst) != PRIVATE_KEY_SIZE { + panic("crypto/ed25519: invalid destination size") + } + + copy(dst, priv_key._b[:]) +} + +// private_key_clear clears priv_key to the uninitialized state. +private_key_clear :: proc "contextless" (priv_key: ^Private_Key) { + mem.zero_explicit(priv_key, size_of(Private_Key)) +} + +// sign writes the signature by priv_key over msg to sig. +sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) { + if !priv_key._is_initialized { + panic("crypto/ed25519: uninitialized private key") + } + if len(sig) != SIGNATURE_SIZE { + panic("crypto/ed25519: invalid destination size") + } + + // 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1) + // using SHA-512 for Ed25519. H(d) may be precomputed. + // + // 2. Using the second half of the digest hdigest2 = hb || ... || h2b-1, + // define: + // + // 2.1 For Ed25519, r = SHA-512(hdigest2 || M); Interpret r as a + // 64-octet little-endian integer. + ctx: sha2.Context_512 = --- + digest_bytes: [sha2.DIGEST_SIZE_512]byte = --- + sha2.init_512(&ctx) + sha2.update(&ctx, priv_key._nonce[:]) + sha2.update(&ctx, msg) + sha2.final(&ctx, digest_bytes[:]) + + r: grp.Scalar = --- + grp.sc_set_bytes_wide(&r, &digest_bytes) + + // 3. Compute the point [r]G. The octet string R is the encoding of + // the point [r]G. + R: grp.Group_Element = --- + R_bytes := sig[:32] + grp.ge_scalarmult_basepoint(&R, &r) + grp.ge_bytes(&R, R_bytes) + + // 4. Derive s from H(d) as in the key pair generation algorithm. + // Use octet strings R, Q, and M to define: + // + // 4.1 For Ed25519, digest = SHA-512(R || Q || M). + // Interpret digest as a little-endian integer. + sha2.init_512(&ctx) + sha2.update(&ctx, R_bytes) + sha2.update(&ctx, priv_key._pub_key._b[:]) // Q in NIST terminology. + sha2.update(&ctx, msg) + sha2.final(&ctx, digest_bytes[:]) + + sc: grp.Scalar = --- // `digest` in NIST terminology. + grp.sc_set_bytes_wide(&sc, &digest_bytes) + + // 5. Compute S = (r + digest × s) mod n. The octet string S is the + // encoding of the resultant integer. + grp.sc_mul(&sc, &sc, &priv_key._s) + grp.sc_add(&sc, &sc, &r) + + // 6. Form the signature as the concatenation of the octet strings + // R and S. + grp.sc_bytes(sig[32:], &sc) + + grp.sc_clear(&r) +} + +// public_key_set_bytes decodes a byte-encoded public key, and returns +// true iff the operation was successful. +public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) -> bool { + if len(b) != PUBLIC_KEY_SIZE { + return false + } + + A: grp.Group_Element = --- + if !grp.ge_set_bytes(&A, b) { + return false + } + + copy(pub_key._b[:], b) + grp.ge_negate(&pub_key._neg_A, &A) + pub_key._is_valid = !grp.ge_is_small_order(&A) + pub_key._is_initialized = true + + return true +} + +// public_key_set_priv sets pub_key to the public component of priv_key. +public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) { + if !priv_key._is_initialized { + panic("crypto/ed25519: uninitialized public key") + } + + src := &priv_key._pub_key + copy(pub_key._b[:], src._b[:]) + grp.ge_set(&pub_key._neg_A, &src._neg_A) + pub_key._is_valid = src._is_valid + pub_key._is_initialized = src._is_initialized +} + +// public_key_bytes sets dst to byte-encoding of pub_key. +public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) { + if !pub_key._is_initialized { + panic("crypto/ed25519: uninitialized public key") + } + if len(dst) != PUBLIC_KEY_SIZE { + panic("crypto/ed25519: invalid destination size") + } + + copy(dst, pub_key._b[:]) +} + +// public_key_equal returns true iff pub_key is equal to other. +public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool { + if !pub_key._is_initialized || !other._is_initialized { + panic("crypto/ed25519: uninitialized public key") + } + + return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1 +} + +// verify returns true iff sig is a valid signature by pub_key over msg. +// +// The optional `allow_small_order_A` parameter will make this +// implementation strictly compatible with FIPS 186-5, at the expense of +// SBS-security. Doing so is NOT recommended, and the disallowed +// public keys all have a known discrete-log. +verify :: proc(pub_key: ^Public_Key, msg, sig: []byte, allow_small_order_A := false) -> bool { + switch { + case !pub_key._is_initialized: + return false + case len(sig) != SIGNATURE_SIZE: + return false + } + + // TLDR: Just use ristretto255. + // + // While there are two "standards" for EdDSA, existing implementations + // diverge (sometimes dramatically). This implementation opts for + // "Algorithm 2" from "Taming the Many EdDSAs", which provides the + // strongest notion of security (SUF-CMA + SBS). + // + // The relevant properties are: + // - Reject non-canonical S. + // - Reject non-canonical A/R. + // - Reject small-order A (Extra non-standard check). + // - Cofactored verification equation. + // + // There are 19 possible non-canonical group element encodings of + // which: + // - 2 are small order + // - 10 are mixed order + // - 7 are not on the curve + // + // While historical implementations have been lax about enforcing + // that A/R are canonically encoded, that behavior is mandated by + // both the RFC and FIPS specification. No valid key generation + // or sign implementation will ever produce non-canonically encoded + // public keys or signatures. + // + // There are 8 small-order group elements, 1 which is in the + // prime-order sub-group, and thus the probability that a properly + // generated A is small-order is cryptographically insignificant. + // + // While both the RFC and FIPS standard allow for either the + // cofactored or non-cofactored equation. It is possible to + // artificially produce signatures that are valid for the former + // but not the latter. This will NEVER occur with a valid sign + // implementation. The choice of the latter is to be compatible + // with ABGLSV-Pornin, batch verification, and FROST (among other + // things). + + s_bytes, r_bytes := sig[32:], sig[:32] + + // 1. Reject the signature if S is not in the range [0, L). + s: grp.Scalar = --- + if !grp.sc_set_bytes(&s, s_bytes) { + return false + } + + // 2. Reject the signature if the public key A is one of 8 small + // order points. + // + // As this check is optional and not part of the standard, we allow + // the caller to bypass it if desired. Disabling the check makes + // the scheme NOT SBS-secure. + if !pub_key._is_valid && !allow_small_order_A { + return false + } + + // 3. Reject the signature if A or R are non-canonical. + // + // Note: All initialized public keys are guaranteed to be canonical. + neg_R: grp.Group_Element = --- + if !grp.ge_set_bytes(&neg_R, r_bytes) { + return false + } + grp.ge_negate(&neg_R, &neg_R) + + // 4. Compute the hash SHA512(R||A||M) and reduce it mod L to get a + // scalar h. + ctx: sha2.Context_512 = --- + h_bytes: [sha2.DIGEST_SIZE_512]byte = --- + sha2.init_512(&ctx) + sha2.update(&ctx, r_bytes) + sha2.update(&ctx, pub_key._b[:]) + sha2.update(&ctx, msg) + sha2.final(&ctx, h_bytes[:]) + + h: grp.Scalar = --- + grp.sc_set_bytes_wide(&h, &h_bytes) + + // 5. Accept if 8(s * G) - 8R - 8(h * A) = 0 + // + // > first compute V = SB − R − hA and then accept if V is one of + // > 8 small order points (or alternatively compute 8V with 3 + // > doublings and check against the neutral element) + V: grp.Group_Element = --- + grp.ge_double_scalarmult_basepoint_vartime(&V, &h, &pub_key._neg_A, &s) + grp.ge_add(&V, &V, &neg_R) + + return grp.ge_is_small_order(&V) +} diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index cc0005840..f60088823 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -29,6 +29,7 @@ import blake2s "core:crypto/blake2s" import chacha20 "core:crypto/chacha20" import chacha20poly1305 "core:crypto/chacha20poly1305" import crypto_hash "core:crypto/hash" +import ed25519 "core:crypto/ed25519" import hkdf "core:crypto/hkdf" import hmac "core:crypto/hmac" import kmac "core:crypto/kmac" @@ -152,6 +153,7 @@ _ :: blake2b _ :: blake2s _ :: chacha20 _ :: chacha20poly1305 +_ :: ed25519 _ :: hmac _ :: hkdf _ :: kmac From fa1d681e65c3a22c8f4fa45bad42c6de8b028c66 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Fri, 29 Mar 2024 19:05:13 +0900 Subject: [PATCH 14/14] tests/core/crypto: Start adding comprehensive curve25519 tests --- tests/core/crypto/test_core_crypto.odin | 72 +- .../crypto/test_core_crypto_ecc25519.odin | 766 ++++++++++++++++++ tests/core/crypto/test_core_crypto_hash.odin | 3 + tests/core/crypto/test_core_crypto_kdf.odin | 3 + tests/core/crypto/test_core_crypto_mac.odin | 3 + .../test_core_crypto_sha3_variants.odin | 3 + tests/core/crypto/test_crypto_benchmark.odin | 63 ++ 7 files changed, 842 insertions(+), 71 deletions(-) create mode 100644 tests/core/crypto/test_core_crypto_ecc25519.odin diff --git a/tests/core/crypto/test_core_crypto.odin b/tests/core/crypto/test_core_crypto.odin index 742e3cc04..72d8e7c78 100644 --- a/tests/core/crypto/test_core_crypto.odin +++ b/tests/core/crypto/test_core_crypto.odin @@ -20,7 +20,6 @@ import "core:testing" import "core:crypto" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" -import "core:crypto/x25519" import tc "tests:common" @@ -32,10 +31,10 @@ main :: proc() { test_hash(&t) test_mac(&t) test_kdf(&t) // After hash/mac tests because those should pass first. + test_ecc25519(&t) test_chacha20(&t) test_chacha20poly1305(&t) - test_x25519(&t) test_sha3_variants(&t) bench_crypto(&t) @@ -274,75 +273,6 @@ test_chacha20poly1305 :: proc(t: ^testing.T) { tc.expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)") } -@(test) -test_x25519 :: proc(t: ^testing.T) { - tc.log(t, "Testing X25519") - - // Local copy of this so that the base point doesn't need to be exported. - _BASE_POINT: [32]byte = { - 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - } - - test_vectors := []struct{ - scalar: string, - point: string, - product: string, - } { - // Test vectors from RFC 7748 - { - "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4", - "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c", - "c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552", - }, - { - "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d", - "e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493", - "95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957", - }, - } - for v, _ in test_vectors { - scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator) - point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator) - - derived_point: [x25519.POINT_SIZE]byte - x25519.scalarmult(derived_point[:], scalar[:], point[:]) - derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator)) - - tc.expect( - t, - derived_point_str == v.product, - fmt.tprintf( - "Expected %s for %s * %s, but got %s instead", - v.product, - v.scalar, - v.point, - derived_point_str, - ), - ) - - // Abuse the test vectors to sanity-check the scalar-basepoint multiply. - p1, p2: [x25519.POINT_SIZE]byte - x25519.scalarmult_basepoint(p1[:], scalar[:]) - x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:]) - p1_str := string(hex.encode(p1[:], context.temp_allocator)) - p2_str := string(hex.encode(p2[:], context.temp_allocator)) - tc.expect( - t, - p1_str == p2_str, - fmt.tprintf( - "Expected %s for %s * basepoint, but got %s instead", - p2_str, - v.scalar, - p1_str, - ), - ) - } - - // TODO/tests: Run the wycheproof test vectors, once I figure out - // how to work with JSON. -} - @(test) test_rand_bytes :: proc(t: ^testing.T) { tc.log(t, "Testing rand_bytes") diff --git a/tests/core/crypto/test_core_crypto_ecc25519.odin b/tests/core/crypto/test_core_crypto_ecc25519.odin new file mode 100644 index 000000000..5ea008f90 --- /dev/null +++ b/tests/core/crypto/test_core_crypto_ecc25519.odin @@ -0,0 +1,766 @@ +package test_core_crypto + +import "base:runtime" +import "core:encoding/hex" +import "core:fmt" +import "core:testing" + +import field "core:crypto/_fiat/field_curve25519" +import "core:crypto/ed25519" +import "core:crypto/ristretto255" +import "core:crypto/x25519" + +import tc "tests:common" + +@(test) +test_ecc25519 :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + + tc.log(t, "Testing curve25519 ECC") + + test_sqrt_ratio_m1(t) + test_ristretto255(t) + + test_ed25519(t) + test_x25519(t) +} + +@(test) +test_sqrt_ratio_m1 :: proc(t: ^testing.T) { + tc.log(t, "Testing sqrt_ratio_m1") + + test_vectors := []struct { + u: string, + v: string, + r: string, + was_square: bool, + } { + { + "0000000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000000", + true, + }, + { + "0000000000000000000000000000000000000000000000000000000000000000", + "0100000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000000", + true, + }, + { + "0100000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000000", + false, + }, + { + "0200000000000000000000000000000000000000000000000000000000000000", + "0100000000000000000000000000000000000000000000000000000000000000", + "3c5ff1b5d8e4113b871bd052f9e7bcd0582804c266ffb2d4f4203eb07fdb7c54", + false, + }, + { + "0400000000000000000000000000000000000000000000000000000000000000", + "0100000000000000000000000000000000000000000000000000000000000000", + "0200000000000000000000000000000000000000000000000000000000000000", + true, + }, + { + "0100000000000000000000000000000000000000000000000000000000000000", + "0400000000000000000000000000000000000000000000000000000000000000", + "f6ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3f", + true, + }, + } + for v, _ in test_vectors { + u_bytes, _ := hex.decode(transmute([]byte)(v.u), context.temp_allocator) + v_bytes, _ := hex.decode(transmute([]byte)(v.v), context.temp_allocator) + r_bytes, _ := hex.decode(transmute([]byte)(v.r), context.temp_allocator) + + u_ := transmute(^[32]byte)(raw_data(u_bytes)) + v_ := transmute(^[32]byte)(raw_data(v_bytes)) + r_ := transmute(^[32]byte)(raw_data(r_bytes)) + + u, vee, r: field.Tight_Field_Element + field.fe_from_bytes(&u, u_) + field.fe_from_bytes(&vee, v_) + was_square := field.fe_carry_sqrt_ratio_m1( + &r, + field.fe_relax_cast(&u), + field.fe_relax_cast(&vee), + ) + + tc.expect( + t, + (was_square == 1) == v.was_square && field.fe_equal_bytes(&r, r_) == 1, + fmt.tprintf( + "Expected (%v, %s) for SQRT_RATIO_M1(%s, %s), got %s", + v.was_square, + v.r, + v.u, + v.v, + fe_str(&r), + ), + ) + } +} + +@(test) +test_ristretto255 :: proc(t: ^testing.T) { + tc.log(t, "Testing ristretto255") + + ge_gen: ristretto255.Group_Element + ristretto255.ge_generator(&ge_gen) + + // Invalid encodings. + bad_encodings := []string { + // Non-canonical field encodings. + "00ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", + "f3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", + "edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", + + // Negative field elements. + "0100000000000000000000000000000000000000000000000000000000000000", + "01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", + "ed57ffd8c914fb201471d1c3d245ce3c746fcbe63a3679d51b6a516ebebe0e20", + "c34c4e1826e5d403b78e246e88aa051c36ccf0aafebffe137d148a2bf9104562", + "c940e5a4404157cfb1628b108db051a8d439e1a421394ec4ebccb9ec92a8ac78", + "47cfc5497c53dc8e61c91d17fd626ffb1c49e2bca94eed052281b510b1117a24", + "f1c6165d33367351b0da8f6e4511010c68174a03b6581212c71c0e1d026c3c72", + "87260f7a2f12495118360f02c26a470f450dadf34a413d21042b43b9d93e1309", + + // Non-square x^2. + "26948d35ca62e643e26a83177332e6b6afeb9d08e4268b650f1f5bbd8d81d371", + "4eac077a713c57b4f4397629a4145982c661f48044dd3f96427d40b147d9742f", + "de6a7b00deadc788eb6b6c8d20c0ae96c2f2019078fa604fee5b87d6e989ad7b", + "bcab477be20861e01e4a0e295284146a510150d9817763caf1a6f4b422d67042", + "2a292df7e32cababbd9de088d1d1abec9fc0440f637ed2fba145094dc14bea08", + "f4a9e534fc0d216c44b218fa0c42d99635a0127ee2e53c712f70609649fdff22", + "8268436f8c4126196cf64b3c7ddbda90746a378625f9813dd9b8457077256731", + "2810e5cbc2cc4d4eece54f61c6f69758e289aa7ab440b3cbeaa21995c2f4232b", + + // Negative x * y value. + "3eb858e78f5a7254d8c9731174a94f76755fd3941c0ac93735c07ba14579630e", + "a45fdc55c76448c049a1ab33f17023edfb2be3581e9c7aade8a6125215e04220", + "d483fe813c6ba647ebbfd3ec41adca1c6130c2beeee9d9bf065c8d151c5f396e", + "8a2e1d30050198c65a54483123960ccc38aef6848e1ec8f5f780e8523769ba32", + "32888462f8b486c68ad7dd9610be5192bbeaf3b443951ac1a8118419d9fa097b", + "227142501b9d4355ccba290404bde41575b037693cef1f438c47f8fbf35d1165", + "5c37cc491da847cfeb9281d407efc41e15144c876e0170b499a96a22ed31e01e", + "445425117cb8c90edcbc7c1cc0e74f747f2c1efa5630a967c64f287792a48a4b", + + // s = -1, which causes y = 0. + "ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", + } + for x, _ in bad_encodings { + b, _ := hex.decode(transmute([]byte)(x), context.temp_allocator) + + ge: ristretto255.Group_Element + ok := ristretto255.ge_set_bytes(&ge, b) + tc.expect(t, !ok, fmt.tprintf("Expected false for %s", x)) + } + + generator_multiples := []string { + "0000000000000000000000000000000000000000000000000000000000000000", + "e2f2ae0a6abc4e71a884a961c500515f58e30b6aa582dd8db6a65945e08d2d76", + "6a493210f7499cd17fecb510ae0cea23a110e8d5b901f8acadd3095c73a3b919", + "94741f5d5d52755ece4f23f044ee27d5d1ea1e2bd196b462166b16152a9d0259", + "da80862773358b466ffadfe0b3293ab3d9fd53c5ea6c955358f568322daf6a57", + "e882b131016b52c1d3337080187cf768423efccbb517bb495ab812c4160ff44e", + "f64746d3c92b13050ed8d80236a7f0007c3b3f962f5ba793d19a601ebb1df403", + "44f53520926ec81fbd5a387845beb7df85a96a24ece18738bdcfa6a7822a176d", + "903293d8f2287ebe10e2374dc1a53e0bc887e592699f02d077d5263cdd55601c", + "02622ace8f7303a31cafc63f8fc48fdc16e1c8c8d234b2f0d6685282a9076031", + "20706fd788b2720a1ed2a5dad4952b01f413bcf0e7564de8cdc816689e2db95f", + "bce83f8ba5dd2fa572864c24ba1810f9522bc6004afe95877ac73241cafdab42", + "e4549ee16b9aa03099ca208c67adafcafa4c3f3e4e5303de6026e3ca8ff84460", + "aa52e000df2e16f55fb1032fc33bc42742dad6bd5a8fc0be0167436c5948501f", + "46376b80f409b29dc2b5f6f0c52591990896e5716f41477cd30085ab7f10301e", + "e0c418f7c8d9c4cdd7395b93ea124f3ad99021bb681dfc3302a9d99a2e53e64e", + } + ges: [16]ristretto255.Group_Element + for x, i in generator_multiples { + b, _ := hex.decode(transmute([]byte)(x), context.temp_allocator) + + ge := &ges[i] + ok := ristretto255.ge_set_bytes(ge, b) + tc.expect(t, ok, fmt.tprintf("Expected true for %s", x)) + + x_check := ge_str(ge) + + tc.expect( + t, + x == x_check, + fmt.tprintf( + "Expected %s (round-trip) but got %s instead", + x, + x_check, + ), + ) + + if i == 1 { + tc.expect( + t, + ristretto255.ge_equal(ge, &ge_gen) == 1, + "Expected element 1 to be the generator", + ) + } + } + + // Addition/Multiplication. + for _, i in ges { + sc: ristretto255.Scalar + ristretto255.sc_set_u64(&sc, u64(i)) + + ge_check: ristretto255.Group_Element + + ristretto255.ge_scalarmult_generator(&ge_check, &sc) + x_check := ge_str(&ge_check) + tc.expect( + t, + x_check == generator_multiples[i], + fmt.tprintf( + "Expected %s for G * %d (specialized), got %s", + generator_multiples[i], + i, + x_check, + ), + ) + + ristretto255.ge_scalarmult(&ge_check, &ges[1], &sc) + x_check = ge_str(&ge_check) + tc.expect( + t, + x_check == generator_multiples[i], + fmt.tprintf( + "Expected %s for G * %d (generic), got %s (slow compare)", + generator_multiples[i], + i, + x_check, + ), + ) + + ristretto255.ge_scalarmult_vartime(&ge_check, &ges[1], &sc) + x_check = ge_str(&ge_check) + tc.expect( + t, + x_check == generator_multiples[i], + fmt.tprintf( + "Expected %s for G * %d (generic vartime), got %s (slow compare)", + generator_multiples[i], + i, + x_check, + ), + ) + + switch i { + case 0: + case: + ge_prev := &ges[i-1] + ristretto255.ge_add(&ge_check, ge_prev, &ge_gen) + + x_check = ge_str(&ge_check) + tc.expect( + t, + x_check == generator_multiples[i], + fmt.tprintf( + "Expected %s for ges[%d] + ges[%d], got %s (slow compare)", + generator_multiples[i], + i-1, + 1, + x_check, + ), + ) + + tc.expect( + t, + ristretto255.ge_equal(&ges[i], &ge_check) == 1, + fmt.tprintf( + "Expected %s for ges[%d] + ges[%d], got %s (fast compare)", + generator_multiples[i], + i-1, + 1, + x_check, + ), + ) + } + } + + wide_test_vectors := []struct { + input: string, + output: string, + } { + { + "5d1be09e3d0c82fc538112490e35701979d99e06ca3e2b5b54bffe8b4dc772c14d98b696a1bbfb5ca32c436cc61c16563790306c79eaca7705668b47dffe5bb6", + "3066f82a1a747d45120d1740f14358531a8f04bbffe6a819f86dfe50f44a0a46", + }, + { + "f116b34b8f17ceb56e8732a60d913dd10cce47a6d53bee9204be8b44f6678b270102a56902e2488c46120e9276cfe54638286b9e4b3cdb470b542d46c2068d38", + "f26e5b6f7d362d2d2a94c5d0e7602cb4773c95a2e5c31a64f133189fa76ed61b", + }, + { + "8422e1bbdaab52938b81fd602effb6f89110e1e57208ad12d9ad767e2e25510c27140775f9337088b982d83d7fcf0b2fa1edffe51952cbe7365e95c86eaf325c", + "006ccd2a9e6867e6a2c5cea83d3302cc9de128dd2a9a57dd8ee7b9d7ffe02826", + }, + { + "ac22415129b61427bf464e17baee8db65940c233b98afce8d17c57beeb7876c2150d15af1cb1fb824bbd14955f2b57d08d388aab431a391cfc33d5bafb5dbbaf", + "f8f0c87cf237953c5890aec3998169005dae3eca1fbb04548c635953c817f92a", + }, + { + "165d697a1ef3d5cf3c38565beefcf88c0f282b8e7dbd28544c483432f1cec7675debea8ebb4e5fe7d6f6e5db15f15587ac4d4d4a1de7191e0c1ca6664abcc413", + "ae81e7dedf20a497e10c304a765c1767a42d6e06029758d2d7e8ef7cc4c41179", + }, + { + "a836e6c9a9ca9f1e8d486273ad56a78c70cf18f0ce10abb1c7172ddd605d7fd2979854f47ae1ccf204a33102095b4200e5befc0465accc263175485f0e17ea5c", + "e2705652ff9f5e44d3e841bf1c251cf7dddb77d140870d1ab2ed64f1a9ce8628", + }, + { + "2cdc11eaeb95daf01189417cdddbf95952993aa9cb9c640eb5058d09702c74622c9965a697a3b345ec24ee56335b556e677b30e6f90ac77d781064f866a3c982", + "80bd07262511cdde4863f8a7434cef696750681cb9510eea557088f76d9e5065", + }, + // These all produce the same output. + { + "edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff1200000000000000000000000000000000000000000000000000000000000000", + "304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f", + }, + { + "edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f", + }, + { + "0000000000000000000000000000000000000000000000000000000000000080ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f", + "304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f", + }, + { + "00000000000000000000000000000000000000000000000000000000000000001200000000000000000000000000000000000000000000000000000000000080", + "304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f", + }, + } + for v, _ in wide_test_vectors { + in_bytes, _ := hex.decode(transmute([]byte)(v.input), context.temp_allocator) + + ge: ristretto255.Group_Element + ristretto255.ge_set_wide_bytes(&ge, in_bytes) + + ge_check := ge_str(&ge) + tc.expect( + t, + ge_check == v.output, + fmt.tprintf( + "Expected %s for %s, got %s", + v.output, + ge_check, + ), + ) + } +} + +@(test) +test_ed25519 :: proc(t: ^testing.T) { + tc.log(t, "Testing ed25519") + + test_vectors_rfc := []struct { + priv_key: string, + pub_key: string, + msg: string, + sig: string, + } { + // Test vectors from RFC 8032 + { + "9d61b19deffd5a60ba844af492ec2cc44449c5697b326919703bac031cae7f60", + "d75a980182b10ab7d54bfed3c964073a0ee172f3daa62325af021a68f707511a", + "", + "e5564300c360ac729086e2cc806e828a84877f1eb8e5d974d873e065224901555fb8821590a33bacc61e39701cf9b46bd25bf5f0595bbe24655141438e7a100b", + }, + { + "4ccd089b28ff96da9db6c346ec114e0f5b8a319f35aba624da8cf6ed4fb8a6fb", + "3d4017c3e843895a92b70aa74d1b7ebc9c982ccf2ec4968cc0cd55f12af4660c", + "72", + "92a009a9f0d4cab8720e820b5f642540a2b27b5416503f8fb3762223ebdb69da085ac1e43e15996e458f3613d0f11d8c387b2eaeb4302aeeb00d291612bb0c00", + }, + { + "c5aa8df43f9f837bedb7442f31dcb7b166d38535076f094b85ce3a2e0b4458f7", + "fc51cd8e6218a1a38da47ed00230f0580816ed13ba3303ac5deb911548908025", + "af82", + "6291d657deec24024827e69c3abe01a30ce548a284743a445e3680d7db5ac3ac18ff9b538d16f290ae67f760984dc6594a7c15e9716ed28dc027beceea1ec40a", + }, + // TEST 1024 omitted for brevity, because all that does is add more to SHA-512 + { + "833fe62409237b9d62ec77587520911e9a759cec1d19755b7da901b96dca3d42", + "ec172b93ad5e563bf4932c70e1245034c35467ef2efd4d64ebf819683467e2bf", + "ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f", + "dc2a4459e7369633a52b1bf277839a00201009a3efbf3ecb69bea2186c26b58909351fc9ac90b3ecfdfbc7c66431e0303dca179c138ac17ad9bef1177331a704", + }, + } + for v, _ in test_vectors_rfc { + priv_bytes, _ := hex.decode(transmute([]byte)(v.priv_key), context.temp_allocator) + pub_bytes, _ := hex.decode(transmute([]byte)(v.pub_key), context.temp_allocator) + msg_bytes, _ := hex.decode(transmute([]byte)(v.msg), context.temp_allocator) + sig_bytes, _ := hex.decode(transmute([]byte)(v.sig), context.temp_allocator) + + priv_key: ed25519.Private_Key + ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes) + tc.expect( + t, + ok, + fmt.tprintf( + "Expected %s to be a valid private key", + v.priv_key, + ), + ) + + key_bytes: [32]byte + ed25519.private_key_bytes(&priv_key, key_bytes[:]) + tc.expect( + t, + ok, + fmt.tprintf( + "Expected private key %s round-trip, got %s", + v.priv_key, + string(hex.encode(key_bytes[:], context.temp_allocator)), + ), + ) + + pub_key: ed25519.Public_Key + ok = ed25519.public_key_set_bytes(&pub_key, pub_bytes) + tc.expect( + t, + ok, + fmt.tprintf( + "Expected %s to be a valid public key (priv->pub: %s)", + v.pub_key, + string(hex.encode(priv_key._pub_key._b[:], context.temp_allocator)), + ), + ) + + ed25519.public_key_bytes(&pub_key, key_bytes[:]) + tc.expect( + t, + ok, + fmt.tprintf( + "Expected public key %s round-trip, got %s", + v.pub_key, + string(hex.encode(key_bytes[:], context.temp_allocator)), + ), + ) + + sig: [ed25519.SIGNATURE_SIZE]byte + ed25519.sign(&priv_key, msg_bytes, sig[:]) + x := string(hex.encode(sig[:], context.temp_allocator)) + tc.expect( + t, + x == v.sig, + fmt.tprintf( + "Expected %s for sign(%s, %s), got %s", + v.sig, + v.priv_key, + v.msg, + x, + ), + ) + + ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes) + tc.expect( + t, + ok, + fmt.tprintf( + "Expected true for verify(%s, %s, %s)", + v.pub_key, + v.msg, + v.sig, + ), + ) + + ok = ed25519.verify(&priv_key._pub_key, msg_bytes, sig_bytes) + tc.expect( + t, + ok, + fmt.tprintf( + "Expected true for verify(pub(%s), %s %s)", + v.priv_key, + v.msg, + v.sig, + ), + ) + + // Corrupt the message and make sure verification fails. + switch len(msg_bytes) { + case 0: + tmp_msg := []byte{69} + msg_bytes = tmp_msg[:] + case: + msg_bytes[0] = msg_bytes[0] ~ 69 + } + ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes) + tc.expect( + t, + ok == false, + fmt.tprintf( + "Expected false for verify(%s, %s (corrupted), %s)", + v.pub_key, + v.msg, + v.sig, + ), + ) + } + + // Test cases from "Taming the many EdDSAs", which aim to exercise + // all of the ed25519 edge cases/implementation differences. + // + // - https://eprint.iacr.org/2020/1244 + // - https://github.com/novifinancial/ed25519-speccheck + test_vectors_speccheck := []struct { + pub_key: string, + msg: string, + sig: string, + pub_key_ok: bool, + sig_ok: bool, + sig_ok_relaxed: bool, // Ok if the small-order A check is relaxed. + } { + // S = 0, small-order A, small-order R + { + "c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa", + "8c93255d71dcab10e8f379c26200f3c7bd5f09d9bc3068d3ef4edeb4853022b6", + "c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac037a0000000000000000000000000000000000000000000000000000000000000000", + true, + false, + true, + }, + // 0 < S < L, small-order A, mixed-order R + { + "c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa", + "9bd9f44f4dcc75bd531b56b2cd280b0bb38fc1cd6d1230e14861d861de092e79", + "f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43a5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04", + true, + false, + true, + }, + // 0 < S < L, mixed-order A, small-order R + { + "f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43", + "aebf3f2601a0c8c5d39cc7d8911642f740b78168218da8471772b35f9d35b9ab", + "c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa8c4bd45aecaca5b24fb97bc10ac27ac8751a7dfe1baff8b953ec9f5833ca260e", + true, + true, + true, + }, + // 0 < S < L, mixed-order A, mixed-order R + { + "cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d", + "9bd9f44f4dcc75bd531b56b2cd280b0bb38fc1cd6d1230e14861d861de092e79", + "9046a64750444938de19f227bb80485e92b83fdb4b6506c160484c016cc1852f87909e14428a7a1d62e9f22f3d3ad7802db02eb2e688b6c52fcd6648a98bd009", + true, + true, + true, + }, + // 0 < S < L, mixed-order A, mixed-order R + { + "cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d", + "e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec4011eaccd55b53f56c", + "160a1cb0dc9c0258cd0a7d23e94d8fa878bcb1925f2c64246b2dee1796bed5125ec6bc982a269b723e0668e540911a9a6a58921d6925e434ab10aa7940551a09", + true, + true, // cofactored-only + true, + }, + // 0 < S < L, mixed-order A, L-order R + { + "cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d", + "e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec4011eaccd55b53f56c", + "21122a84e0b5fca4052f5b1235c80a537878b38f3142356b2c2384ebad4668b7e40bc836dac0f71076f9abe3a53f9c03c1ceeeddb658d0030494ace586687405", + true, + true, // cofactored only, (fail if 8h is pre-reduced) + true, + }, + // S > L, L-order A, L-order R + { + "442aad9f089ad9e14647b1ef9099a1ff4798d78589e66f28eca69c11f582a623", + "85e241a07d148b41e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec40", + "e96f66be976d82e60150baecff9906684aebb1ef181f67a7189ac78ea23b6c0e547f7690a0e2ddcd04d87dbc3490dc19b3b3052f7ff0538cb68afb369ba3a514", + true, + false, + false, + }, + // S >> L, L-order A, L-order R + { + "442aad9f089ad9e14647b1ef9099a1ff4798d78589e66f28eca69c11f582a623", + "85e241a07d148b41e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec40", + "8ce5b96c8f26d0ab6c47958c9e68b937104cd36e13c33566acd2fe8d38aa19427e71f98a473474f2f13f06f97c20d58cc3f54b8bd0d272f42b695dd7e89a8c22", + true, + false, + false, + }, + // 0 < S < L, mixed-order A, small-order R (non-canonical R, reduced for hash) + { + "f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43", + "9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41", + "ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff03be9678ac102edcd92b0210bb34d7428d12ffc5df5f37e359941266a4e35f0f", + true, + false, + false, + }, + // 0 < S < L, mixed-order A, small-order R (non-canonical R, not reduced for hash) + { + "f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43", + "9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41", + "ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffca8c5b64cd208982aa38d4936621a4775aa233aa0505711d8fdcfdaa943d4908", + true, + false, + false, + }, + // 0 < S < L, small-order A, mixed-order R (non-canonical A, reduced for hash) + { + "ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "e96b7021eb39c1a163b6da4e3093dcd3f21387da4cc4572be588fafae23c155b", + "a9d55260f765261eb9b84e106f665e00b867287a761990d7135963ee0a7d59dca5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04", + false, + false, + false, + }, + // 0 < S < L, small-order A, mixed-order R (non-canonical A, not reduced for hash) + { + "ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "39a591f5321bbe07fd5a23dc2f39d025d74526615746727ceefd6e82ae65c06f", + "a9d55260f765261eb9b84e106f665e00b867287a761990d7135963ee0a7d59dca5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04", + false, + false, + false, + }, + } + for v, i in test_vectors_speccheck { + pub_bytes, _ := hex.decode(transmute([]byte)(v.pub_key), context.temp_allocator) + msg_bytes, _ := hex.decode(transmute([]byte)(v.msg), context.temp_allocator) + sig_bytes, _ := hex.decode(transmute([]byte)(v.sig), context.temp_allocator) + + pub_key: ed25519.Public_Key + ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes) + tc.expect( + t, + ok == v.pub_key_ok, + fmt.tprintf( + "speccheck/%d: Expected %s to be a (in)valid public key, got %v", + i, + v.pub_key, + ok, + ), + ) + + // If A is rejected for being non-canonical, skip signature check. + if !v.pub_key_ok { + continue + } + + ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes) + tc.expect( + t, + ok == v.sig_ok, + fmt.tprintf( + "speccheck/%d Expected %v for verify(%s, %s, %s)", + i, + v.sig_ok, + v.pub_key, + v.msg, + v.sig, + ), + ) + + // If the signature is accepted, skip the relaxed signature check. + if v.sig_ok { + continue + } + + ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes, true) + tc.expect( + t, + ok == v.sig_ok_relaxed, + fmt.tprintf( + "speccheck/%d Expected %v for verify(%s, %s, %s, true)", + i, + v.sig_ok_relaxed, + v.pub_key, + v.msg, + v.sig, + ), + ) + } +} + +@(test) +test_x25519 :: proc(t: ^testing.T) { + tc.log(t, "Testing X25519") + + // Local copy of this so that the base point doesn't need to be exported. + _BASE_POINT: [32]byte = { + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } + + test_vectors := []struct { + scalar: string, + point: string, + product: string, + } { + // Test vectors from RFC 7748 + { + "a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4", + "e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c", + "c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552", + }, + { + "4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d", + "e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493", + "95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957", + }, + } + for v, _ in test_vectors { + scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator) + point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator) + + derived_point: [x25519.POINT_SIZE]byte + x25519.scalarmult(derived_point[:], scalar[:], point[:]) + derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator)) + + tc.expect( + t, + derived_point_str == v.product, + fmt.tprintf( + "Expected %s for %s * %s, but got %s instead", + v.product, + v.scalar, + v.point, + derived_point_str, + ), + ) + + // Abuse the test vectors to sanity-check the scalar-basepoint multiply. + p1, p2: [x25519.POINT_SIZE]byte + x25519.scalarmult_basepoint(p1[:], scalar[:]) + x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:]) + p1_str := string(hex.encode(p1[:], context.temp_allocator)) + p2_str := string(hex.encode(p2[:], context.temp_allocator)) + tc.expect( + t, + p1_str == p2_str, + fmt.tprintf( + "Expected %s for %s * basepoint, but got %s instead", + p2_str, + v.scalar, + p1_str, + ), + ) + } +} + +@(private) +ge_str :: proc(ge: ^ristretto255.Group_Element) -> string { + b: [ristretto255.ELEMENT_SIZE]byte + ristretto255.ge_bytes(ge, b[:]) + return string(hex.encode(b[:], context.temp_allocator)) +} + +@(private) +fe_str :: proc(fe: ^field.Tight_Field_Element) -> string { + b: [32]byte + field.fe_to_bytes(&b, fe) + return string(hex.encode(b[:], context.temp_allocator)) +} diff --git a/tests/core/crypto/test_core_crypto_hash.odin b/tests/core/crypto/test_core_crypto_hash.odin index bd40a9b23..c4e8e8dd7 100644 --- a/tests/core/crypto/test_core_crypto_hash.odin +++ b/tests/core/crypto/test_core_crypto_hash.odin @@ -1,5 +1,6 @@ package test_core_crypto +import "base:runtime" import "core:bytes" import "core:encoding/hex" import "core:fmt" @@ -12,6 +13,8 @@ import tc "tests:common" @(test) test_hash :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + tc.log(t, "Testing Hashes") // TODO: diff --git a/tests/core/crypto/test_core_crypto_kdf.odin b/tests/core/crypto/test_core_crypto_kdf.odin index d9e9a8501..73177d8be 100644 --- a/tests/core/crypto/test_core_crypto_kdf.odin +++ b/tests/core/crypto/test_core_crypto_kdf.odin @@ -1,5 +1,6 @@ package test_core_crypto +import "base:runtime" import "core:encoding/hex" import "core:fmt" import "core:testing" @@ -12,6 +13,8 @@ import tc "tests:common" @(test) test_kdf :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + tc.log(t, "Testing KDFs") test_hkdf(t) diff --git a/tests/core/crypto/test_core_crypto_mac.odin b/tests/core/crypto/test_core_crypto_mac.odin index f0e6fa1bf..f2eeacb19 100644 --- a/tests/core/crypto/test_core_crypto_mac.odin +++ b/tests/core/crypto/test_core_crypto_mac.odin @@ -1,5 +1,6 @@ package test_core_crypto +import "base:runtime" import "core:encoding/hex" import "core:fmt" import "core:mem" @@ -14,6 +15,8 @@ import tc "tests:common" @(test) test_mac :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + tc.log(t, "Testing MACs") test_hmac(t) diff --git a/tests/core/crypto/test_core_crypto_sha3_variants.odin b/tests/core/crypto/test_core_crypto_sha3_variants.odin index ec2d24331..8e44996bc 100644 --- a/tests/core/crypto/test_core_crypto_sha3_variants.odin +++ b/tests/core/crypto/test_core_crypto_sha3_variants.odin @@ -1,5 +1,6 @@ package test_core_crypto +import "base:runtime" import "core:encoding/hex" import "core:fmt" import "core:testing" @@ -12,6 +13,8 @@ import tc "tests:common" @(test) test_sha3_variants :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + tc.log(t, "Testing SHA3 derived functions") test_shake(t) diff --git a/tests/core/crypto/test_crypto_benchmark.odin b/tests/core/crypto/test_crypto_benchmark.odin index 494913b6b..cc69cb16d 100644 --- a/tests/core/crypto/test_crypto_benchmark.odin +++ b/tests/core/crypto/test_crypto_benchmark.odin @@ -1,5 +1,6 @@ package test_core_crypto +import "base:runtime" import "core:encoding/hex" import "core:fmt" import "core:testing" @@ -7,6 +8,7 @@ import "core:time" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" +import "core:crypto/ed25519" import "core:crypto/poly1305" import "core:crypto/x25519" @@ -16,11 +18,14 @@ import tc "tests:common" @(test) bench_crypto :: proc(t: ^testing.T) { + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + fmt.println("Starting benchmarks:") bench_chacha20(t) bench_poly1305(t) bench_chacha20poly1305(t) + bench_ed25519(t) bench_x25519(t) } @@ -216,6 +221,64 @@ bench_chacha20poly1305 :: proc(t: ^testing.T) { benchmark_print(name, options) } +bench_ed25519 :: proc(t: ^testing.T) { + iters :: 10000 + + priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe" + priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator) + priv_key: ed25519.Private_Key + start := time.now() + for i := 0; i < iters; i = i + 1 { + ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes) + assert(ok, "private key should deserialize") + } + elapsed := time.since(start) + tc.log( + t, + fmt.tprintf( + "ed25519.private_key_set_bytes: ~%f us/op", + time.duration_microseconds(elapsed) / iters, + ), + ) + + pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing" + pub_key: ed25519.Public_Key + start = time.now() + for i := 0; i < iters; i = i + 1 { + ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:]) + assert(ok, "public key should deserialize") + } + elapsed = time.since(start) + tc.log( + t, + fmt.tprintf( + "ed25519.public_key_set_bytes: ~%f us/op", + time.duration_microseconds(elapsed) / iters, + ), + ) + + msg := "Got a job for you, 621." + sig_bytes: [ed25519.SIGNATURE_SIZE]byte + msg_bytes := transmute([]byte)(msg) + start = time.now() + for i := 0; i < iters; i = i + 1 { + ed25519.sign(&priv_key, msg_bytes, sig_bytes[:]) + } + elapsed = time.since(start) + tc.log(t, fmt.tprintf("ed25519.sign: ~%f us/op", time.duration_microseconds(elapsed) / iters)) + + start = time.now() + for i := 0; i < iters; i = i + 1 { + ok := ed25519.verify(&pub_key, msg_bytes, sig_bytes[:]) + assert(ok, "signature should validate") + } + elapsed = time.since(start) + tc.log( + t, + fmt.tprintf("ed25519.verify: ~%f us/op", time.duration_microseconds(elapsed) / iters), + ) +} + bench_x25519 :: proc(t: ^testing.T) { point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef" scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"