From a43a5b053c1d1e931eeb56d65e6a40f634a0b94f Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Sun, 24 Mar 2024 22:52:21 +0900
Subject: [PATCH 01/14] core/crypto: Add more documentation about assumptions
 (NFC)

---
 core/crypto/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/core/crypto/README.md b/core/crypto/README.md
index 1e4e41fb8..303b1f625 100644
--- a/core/crypto/README.md
+++ b/core/crypto/README.md
@@ -14,6 +14,14 @@ constant-time byte comparison.
 - Best-effort is make to mitigate timing side-channels on reasonable
   architectures.  Architectures that are known to be unreasonable include
   but are not limited to i386, i486, and WebAssembly.
+- Implementations assume a 64-bit architecture (64-bit integer arithmetic
+  is fast, and includes add-with-carry, sub-with-borrow, and full-result
+  multiply).
+- Hardware sidechannels are explicitly out of scope for this package.
+  Notable examples include but are not limited to:
+  - Power/RF side-channels etc.
+  - Fault injection attacks etc.
+  - Hardware vulnerabilities ("apply mitigations or buy a new CPU").
 - The packages attempt to santize sensitive data, however this is, and
   will remain a "best-effort" implementation decision.  As Thomas Pornin
   puts it "In general, such memory cleansing is a fool's quest."

From b155fdf8c96d6269fe0f56a3fda76a3df1e5a7c8 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Sun, 17 Mar 2024 10:29:59 +0900
Subject: [PATCH 02/14] core/crypto: Add `has_rand_bytes`

This allows runtime detection as to if `rand_bytes` is supported or not,
and lets us enable the test-case on all of the supported targets.
---
 core/crypto/crypto.odin                 | 10 ++++++++++
 core/crypto/rand_bsd.odin               |  4 ++++
 core/crypto/rand_darwin.odin            |  4 ++++
 core/crypto/rand_generic.odin           |  4 ++++
 core/crypto/rand_js.odin                |  4 ++++
 core/crypto/rand_linux.odin             |  4 ++++
 core/crypto/rand_windows.odin           |  4 ++++
 tests/core/crypto/test_core_crypto.odin |  2 +-
 8 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/core/crypto/crypto.odin b/core/crypto/crypto.odin
index 6cdcacb9c..05f25111a 100644
--- a/core/crypto/crypto.odin
+++ b/core/crypto/crypto.odin
@@ -1,3 +1,7 @@
+/*
+package crypto implements a selection of cryptography algorithms and useful
+helper routines.
+*/
 package crypto
 
 import "core:mem"
@@ -51,3 +55,9 @@ rand_bytes :: proc (dst: []byte) {
 
 	_rand_bytes(dst)
 }
+
+// has_rand_bytes returns true iff the target has support for accessing the
+// system entropty source.
+has_rand_bytes :: proc () -> bool {
+	return _has_rand_bytes()
+}
diff --git a/core/crypto/rand_bsd.odin b/core/crypto/rand_bsd.odin
index 8e2be1d95..61eaf652f 100644
--- a/core/crypto/rand_bsd.odin
+++ b/core/crypto/rand_bsd.odin
@@ -10,3 +10,7 @@ foreign libc {
 _rand_bytes :: proc(dst: []byte) {
 	arc4random_buf(raw_data(dst), len(dst))
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}
diff --git a/core/crypto/rand_darwin.odin b/core/crypto/rand_darwin.odin
index ec44c1491..2864b46dd 100644
--- a/core/crypto/rand_darwin.odin
+++ b/core/crypto/rand_darwin.odin
@@ -10,3 +10,7 @@ _rand_bytes :: proc(dst: []byte) {
 		panic(fmt.tprintf("crypto/rand_bytes: SecRandomCopyBytes returned non-zero result: %v %s", res, msg))
 	}
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}
diff --git a/core/crypto/rand_generic.odin b/core/crypto/rand_generic.odin
index bf7abbbe2..006ca51fe 100644
--- a/core/crypto/rand_generic.odin
+++ b/core/crypto/rand_generic.odin
@@ -9,3 +9,7 @@ package crypto
 _rand_bytes :: proc(dst: []byte) {
 	unimplemented("crypto: rand_bytes not supported on this OS")
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return false
+}
diff --git a/core/crypto/rand_js.odin b/core/crypto/rand_js.odin
index 353b1e6b9..cb2711404 100644
--- a/core/crypto/rand_js.odin
+++ b/core/crypto/rand_js.odin
@@ -18,3 +18,7 @@ _rand_bytes :: proc(dst: []byte) {
 		dst = dst[to_read:]
 	}
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}
diff --git a/core/crypto/rand_linux.odin b/core/crypto/rand_linux.odin
index 86fc425d6..05c05597d 100644
--- a/core/crypto/rand_linux.odin
+++ b/core/crypto/rand_linux.odin
@@ -34,3 +34,7 @@ _rand_bytes :: proc (dst: []byte) {
 		dst = dst[n_read:]
 	}
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}
diff --git a/core/crypto/rand_windows.odin b/core/crypto/rand_windows.odin
index 53b58c776..e1d9f6118 100644
--- a/core/crypto/rand_windows.odin
+++ b/core/crypto/rand_windows.odin
@@ -21,3 +21,7 @@ _rand_bytes :: proc(dst: []byte) {
 		}
 	}
 }
+
+_has_rand_bytes :: proc () -> bool {
+	return true
+}
diff --git a/tests/core/crypto/test_core_crypto.odin b/tests/core/crypto/test_core_crypto.odin
index a6d399097..742e3cc04 100644
--- a/tests/core/crypto/test_core_crypto.odin
+++ b/tests/core/crypto/test_core_crypto.odin
@@ -347,7 +347,7 @@ test_x25519 :: proc(t: ^testing.T) {
 test_rand_bytes :: proc(t: ^testing.T) {
 	tc.log(t, "Testing rand_bytes")
 
-	if ODIN_OS != .Linux {
+	if !crypto.has_rand_bytes() {
 		tc.log(t, "rand_bytes not supported - skipping")
 		return
 	}

From f9b9521bf07ffece22b24ac02ae4261e3d8b3c50 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Sun, 17 Mar 2024 10:39:10 +0900
Subject: [PATCH 03/14] core/crypto/_fiat/field_curve25519: Use multiply to
 calculate the mask

Largely for consistency with the generic code, either is valid with Odin
semantics, but this is easier to comprehend.
---
 core/crypto/_fiat/field_curve25519/field51.odin | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin
index 0be94eb51..1a731b31b 100644
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -599,7 +599,7 @@ fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 
 @(optimization_mode="none")
 fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
-	mask := -u64(arg1)
+	mask := (u64(arg1) * 0xffffffffffffffff)
 	x := (out1[0] ~ out2[0]) & mask
 	x1, y1 := out1[0] ~ x, out2[0] ~ x
 	x = (out1[1] ~ out2[1]) & mask

From 9a418fd27bcf5600ac16d74649f01e35bb8e626c Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Sun, 17 Mar 2024 19:02:11 +0900
Subject: [PATCH 04/14] core/crypto/_fiat/field_curve25519: Move routines (NFC)

---
 core/crypto/_fiat/field_curve25519/field.odin | 49 +++++++++++++++++
 .../_fiat/field_curve25519/field51.odin       | 53 -------------------
 2 files changed, 49 insertions(+), 53 deletions(-)

diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin
index faf8ae3f7..a8e0a0316 100644
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -136,3 +136,52 @@ fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 }
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+}
+
+fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 1
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+	out1[4] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	x5 := arg1[4]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+}
+
+@(optimization_mode="none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
+	mask := (u64(arg1) * 0xffffffffffffffff)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	x = (out1[3] ~ out2[3]) & mask
+	x4, y4 := out1[3] ~ x, out2[3] ~ x
+	x = (out1[4] ~ out2[4]) & mask
+	x5, y5 := out1[4] ~ x, out2[4] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+	out1[3], out2[3] = x4, y4
+	out1[4], out2[4] = x5, y5
+}
diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin
index 1a731b31b..3cbc296b7 100644
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -30,8 +30,6 @@ package field_curve25519
 //
 // While the base implementation is provably correct, this implementation
 // makes no such claims as the port and optimizations were done by hand.
-// At some point, it may be worth adding support to fiat-crypto for
-// generating Odin output.
 //
 // TODO:
 //  * When fiat-crypto supports it, using a saturated 64-bit limbs
@@ -565,54 +563,3 @@ fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_El
 	out1[3] = x27
 	out1[4] = x32
 }
-
-// The following routines were added by hand, and do not come from fiat-crypto.
-
-fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 0
-	out1[1] = 0
-	out1[2] = 0
-	out1[3] = 0
-	out1[4] = 0
-}
-
-fe_one :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 1
-	out1[1] = 0
-	out1[2] = 0
-	out1[3] = 0
-	out1[4] = 0
-}
-
-fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
-	x1 := arg1[0]
-	x2 := arg1[1]
-	x3 := arg1[2]
-	x4 := arg1[3]
-	x5 := arg1[4]
-	out1[0] = x1
-	out1[1] = x2
-	out1[2] = x3
-	out1[3] = x4
-	out1[4] = x5
-}
-
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
-	mask := (u64(arg1) * 0xffffffffffffffff)
-	x := (out1[0] ~ out2[0]) & mask
-	x1, y1 := out1[0] ~ x, out2[0] ~ x
-	x = (out1[1] ~ out2[1]) & mask
-	x2, y2 := out1[1] ~ x, out2[1] ~ x
-	x = (out1[2] ~ out2[2]) & mask
-	x3, y3 := out1[2] ~ x, out2[2] ~ x
-	x = (out1[3] ~ out2[3]) & mask
-	x4, y4 := out1[3] ~ x, out2[3] ~ x
-	x = (out1[4] ~ out2[4]) & mask
-	x5, y5 := out1[4] ~ x, out2[4] ~ x
-	out1[0], out2[0] = x1, y1
-	out1[1], out2[1] = x2, y2
-	out1[2], out2[2] = x3, y3
-	out1[3], out2[3] = x4, y4
-	out1[4], out2[4] = x5, y5
-}

From 31aba5a7280dd5c8fe70d960058002fd682baa57 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Sun, 17 Mar 2024 19:06:50 +0900
Subject: [PATCH 05/14] core/crypto/_fiat/field_poly1305: Move routines (NFC)

---
 core/crypto/_fiat/field_poly1305/field.odin   | 29 +++++++++++++++++
 .../_fiat/field_poly1305/field4344.odin       | 31 -------------------
 2 files changed, 29 insertions(+), 31 deletions(-)

diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin
index a103f6fc7..9b00ff3ec 100644
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -51,3 +51,32 @@ fe_from_u64s :: proc "contextless" (out1: ^Tight_Field_Element, lo, hi: u64) {
 	// This routine is only used to deserialize `r` which is confidential.
 	mem.zero_explicit(&tmp, size_of(tmp))
 }
+
+fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+}
+
+fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+}
+
+@(optimization_mode="none")
+fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
+	mask := -u64(arg1)
+	x := (out1[0] ~ out2[0]) & mask
+	x1, y1 := out1[0] ~ x, out2[0] ~ x
+	x = (out1[1] ~ out2[1]) & mask
+	x2, y2 := out1[1] ~ x, out2[1] ~ x
+	x = (out1[2] ~ out2[2]) & mask
+	x3, y3 := out1[2] ~ x, out2[2] ~ x
+	out1[0], out2[0] = x1, y1
+	out1[1], out2[1] = x2, y2
+	out1[2], out2[2] = x3, y3
+}
diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin
index 8e8a7cc78..ddc10de52 100644
--- a/core/crypto/_fiat/field_poly1305/field4344.odin
+++ b/core/crypto/_fiat/field_poly1305/field4344.odin
@@ -325,34 +325,3 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[1] = x2
 	out1[2] = x3
 }
-
-// The following routines were added by hand, and do not come from fiat-crypto.
-
-fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
-	out1[0] = 0
-	out1[1] = 0
-	out1[2] = 0
-}
-
-fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
-	x1 := arg1[0]
-	x2 := arg1[1]
-	x3 := arg1[2]
-	out1[0] = x1
-	out1[1] = x2
-	out1[2] = x3
-}
-
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
-	mask := -u64(arg1)
-	x := (out1[0] ~ out2[0]) & mask
-	x1, y1 := out1[0] ~ x, out2[0] ~ x
-	x = (out1[1] ~ out2[1]) & mask
-	x2, y2 := out1[1] ~ x, out2[1] ~ x
-	x = (out1[2] ~ out2[2]) & mask
-	x3, y3 := out1[2] ~ x, out2[2] ~ x
-	out1[0], out2[0] = x1, y1
-	out1[1], out2[1] = x2, y2
-	out1[2], out2[2] = x3, y3
-}

From c951cbdbbcb1bce484ee79ffcf3c288fde64b802 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Sun, 17 Mar 2024 19:08:30 +0900
Subject: [PATCH 06/14] core/crypto/_fiat: odinfmt (NFC)

---
 core/crypto/_fiat/fiat.odin                   |  4 +-
 core/crypto/_fiat/field_curve25519/field.odin | 20 +++++----
 .../_fiat/field_curve25519/field51.odin       | 31 +++++++++----
 core/crypto/_fiat/field_poly1305/field.odin   | 17 +++++---
 .../_fiat/field_poly1305/field4344.odin       | 43 +++++++++++++++----
 5 files changed, 84 insertions(+), 31 deletions(-)

diff --git a/core/crypto/_fiat/fiat.odin b/core/crypto/_fiat/fiat.odin
index f0551722f..cc73c6927 100644
--- a/core/crypto/_fiat/fiat.odin
+++ b/core/crypto/_fiat/fiat.odin
@@ -9,7 +9,7 @@ package fiat
 u1 :: distinct u8
 i1 :: distinct i8
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 	x1 := (u64(arg1) * 0xffffffffffffffff)
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
@@ -17,7 +17,7 @@ cmovznz_u64 :: proc "contextless" (arg1: u1, arg2, arg3: u64) -> (out1: u64) {
 	return
 }
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 cmovznz_u32 :: proc "contextless" (arg1: u1, arg2, arg3: u32) -> (out1: u32) {
 	x1 := (u32(arg1) * 0xffffffff)
 	x2 := ((x1 & arg3) | ((~x1) & arg2))
diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin
index a8e0a0316..cf7f694bc 100644
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -3,11 +3,15 @@ package field_curve25519
 import "core:crypto"
 import "core:mem"
 
-fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
 	return transmute(^Loose_Field_Element)(arg1)
 }
 
-fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
 	return transmute(^Tight_Field_Element)(arg1)
 }
 
@@ -46,7 +50,7 @@ fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byt
 	return ret
 }
 
-fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
+fe_carry_pow2k :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
 	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
 	if arg2 == 0 {
 		fe_one(out1)
@@ -54,7 +58,7 @@ fe_carry_pow2k :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element,
 	}
 
 	fe_carry_square(out1, arg1)
-	for _ in 1..<arg2 {
+	for _ in 1 ..< arg2 {
 		fe_carry_square(out1, fe_relax_cast(out1))
 	}
 }
@@ -64,7 +68,7 @@ fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Eleme
 	fe_carry(out1, fe_relax_cast(out1))
 }
 
-fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
+fe_carry_invsqrt :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
 	// Inverse square root taken from Monocypher.
 
 	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
@@ -116,7 +120,7 @@ fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element
 	// then  isr = x^((p-1)/4) * sqrt(-1)
 	// else  isr = x^((p-1)/4)
 	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
-	fe_cond_assign(out1, &tmp1, (m1|ms) ~ 1)
+	fe_cond_assign(out1, &tmp1, (m1 | ms) ~ 1)
 
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 	mem.zero_explicit(&tmp2, size_of(tmp2))
@@ -126,7 +130,7 @@ fe_carry_invsqrt :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element
 	return p1 | m1
 }
 
-fe_carry_inv :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_inv :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	tmp1: Tight_Field_Element
 
 	fe_carry_square(&tmp1, arg1)
@@ -166,7 +170,7 @@ fe_set :: proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 	out1[4] = x5
 }
 
-@(optimization_mode="none")
+@(optimization_mode = "none")
 fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: int) {
 	mask := (u64(arg1) * 0xffffffffffffffff)
 	x := (out1[0] ~ out2[0]) & mask
diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin
index 3cbc296b7..4cda96c81 100644
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -42,7 +42,7 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64
 
-SQRT_M1 := Tight_Field_Element{
+SQRT_M1 := Tight_Field_Element {
 	1718705420411056,
 	234908883556509,
 	2233514472574048,
@@ -50,7 +50,13 @@ SQRT_M1 := Tight_Field_Element{
 	765476049583133,
 }
 
-_addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u51 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0x7ffffffffffff)
 	x3 := fiat.u1((x1 >> 51))
@@ -59,7 +65,13 @@ _addcarryx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 }
 
-_subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u51 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 51))
 	x3 := (u64(x1) & 0x7ffffffffffff)
@@ -68,7 +80,7 @@ _subborrowx_u51 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 }
 
-fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
 	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
 	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
@@ -167,7 +179,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme
 	out1[4] = x152
 }
 
-fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[4] * 0x13)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[4] * 0x2)
@@ -303,8 +315,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele
 	out1[4] = x5
 }
 
-@(optimization_mode="none")
-fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: int) {
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Tight_Field_Element,
+	arg2: int,
+) {
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
@@ -525,7 +540,7 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[4] = x5
 }
 
-fe_carry_scmul_121666 :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_scmul_121666 :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
 	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
 	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin
index 9b00ff3ec..f5557cf5f 100644
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -3,15 +3,19 @@ package field_poly1305
 import "core:encoding/endian"
 import "core:mem"
 
-fe_relax_cast :: #force_inline proc "contextless" (arg1: ^Tight_Field_Element) -> ^Loose_Field_Element {
+fe_relax_cast :: #force_inline proc "contextless" (
+	arg1: ^Tight_Field_Element,
+) -> ^Loose_Field_Element {
 	return transmute(^Loose_Field_Element)(arg1)
 }
 
-fe_tighten_cast :: #force_inline proc "contextless" (arg1: ^Loose_Field_Element) -> ^Tight_Field_Element {
+fe_tighten_cast :: #force_inline proc "contextless" (
+	arg1: ^Loose_Field_Element,
+) -> ^Tight_Field_Element {
 	return transmute(^Tight_Field_Element)(arg1)
 }
 
-fe_from_bytes :: #force_inline proc (out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) {
+fe_from_bytes :: #force_inline proc(out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) {
 	// fiat-crypto's deserialization routine effectively processes a
 	// single byte at a time, and wants 256-bits of input for a value
 	// that will be 128-bits or 129-bits.
@@ -67,8 +71,11 @@ fe_set :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 	out1[2] = x3
 }
 
-@(optimization_mode="none")
-fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_Element, arg1: bool) {
+@(optimization_mode = "none")
+fe_cond_swap :: #force_no_inline proc "contextless" (
+	out1, out2: ^Tight_Field_Element,
+	arg1: bool,
+) {
 	mask := -u64(arg1)
 	x := (out1[0] ~ out2[0]) & mask
 	x1, y1 := out1[0] ~ x, out2[0] ~ x
diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin
index ddc10de52..bddb0c322 100644
--- a/core/crypto/_fiat/field_poly1305/field4344.odin
+++ b/core/crypto/_fiat/field_poly1305/field4344.odin
@@ -39,7 +39,13 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [3]u64
 Tight_Field_Element :: distinct [3]u64
 
-_addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u44 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0xfffffffffff)
 	x3 := fiat.u1((x1 >> 44))
@@ -48,7 +54,13 @@ _addcarryx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 }
 
-_subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u44 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 44))
 	x3 := (u64(x1) & 0xfffffffffff)
@@ -57,7 +69,13 @@ _subborrowx_u44 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 }
 
-_addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_addcarryx_u43 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((u64(arg1) + arg2) + arg3)
 	x2 := (x1 & 0x7ffffffffff)
 	x3 := fiat.u1((x1 >> 43))
@@ -66,7 +84,13 @@ _addcarryx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u
 	return
 }
 
-_subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3: u64) -> (out1: u64, out2: fiat.u1) {
+_subborrowx_u43 :: #force_inline proc "contextless" (
+	arg1: fiat.u1,
+	arg2, arg3: u64,
+) -> (
+	out1: u64,
+	out2: fiat.u1,
+) {
 	x1 := ((i64(arg2) - i64(arg1)) - i64(arg3))
 	x2 := fiat.i1((x1 >> 43))
 	x3 := (u64(x1) & 0x7ffffffffff)
@@ -75,7 +99,7 @@ _subborrowx_u43 :: #force_inline proc "contextless" (arg1: fiat.u1, arg2, arg3:
 	return
 }
 
-fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
 	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
 	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
@@ -120,7 +144,7 @@ fe_carry_mul :: proc (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Eleme
 	out1[2] = x62
 }
 
-fe_carry_square :: proc (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[2] * 0x5)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[2] * 0x2)
@@ -201,8 +225,11 @@ fe_opp :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_Ele
 	out1[2] = x3
 }
 
-@(optimization_mode="none")
-fe_cond_assign :: #force_no_inline proc "contextless" (out1, arg1: ^Tight_Field_Element, arg2: bool) {
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Tight_Field_Element,
+	arg2: bool,
+) {
 	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
 	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
 	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])

From 1ce279e6a1dd59f4bffc33acc4cc281e4c45d441 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Wed, 20 Mar 2024 23:17:05 +0900
Subject: [PATCH 07/14] core/crypto/_fiat/field_curve25519: Mark more functions
 contextless

---
 core/crypto/_fiat/field_curve25519/field.odin   | 13 ++++++++++---
 core/crypto/_fiat/field_curve25519/field51.odin |  9 ++++++---
 core/crypto/x25519/x25519.odin                  |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin
index cf7f694bc..64f9f8a1f 100644
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -50,7 +50,11 @@ fe_equal_bytes :: proc "contextless" (arg1: ^Tight_Field_Element, arg2: ^[32]byt
 	return ret
 }
 
-fe_carry_pow2k :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element, arg2: uint) {
+fe_carry_pow2k :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+	arg2: uint,
+) {
 	// Special case: `arg1^(2 * 0) = 1`, though this should never happen.
 	if arg2 == 0 {
 		fe_one(out1)
@@ -68,7 +72,10 @@ fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Eleme
 	fe_carry(out1, fe_relax_cast(out1))
 }
 
-fe_carry_invsqrt :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) -> int {
+fe_carry_invsqrt :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+) -> int {
 	// Inverse square root taken from Monocypher.
 
 	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
@@ -130,7 +137,7 @@ fe_carry_invsqrt :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element)
 	return p1 | m1
 }
 
-fe_carry_inv :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	tmp1: Tight_Field_Element
 
 	fe_carry_square(&tmp1, arg1)
diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin
index 4cda96c81..81dca19e2 100644
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -80,7 +80,7 @@ _subborrowx_u51 :: #force_inline proc "contextless" (
 	return
 }
 
-fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[4], (arg2[4] * 0x13))
 	x4, x3 := bits.mul_u64(arg1[4], (arg2[3] * 0x13))
 	x6, x5 := bits.mul_u64(arg1[4], (arg2[2] * 0x13))
@@ -179,7 +179,7 @@ fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Elemen
 	out1[4] = x152
 }
 
-fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[4] * 0x13)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[4] * 0x2)
@@ -540,7 +540,10 @@ fe_relax :: proc "contextless" (out1: ^Loose_Field_Element, arg1: ^Tight_Field_E
 	out1[4] = x5
 }
 
-fe_carry_scmul_121666 :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_scmul_121666 :: proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: ^Loose_Field_Element,
+) {
 	x2, x1 := bits.mul_u64(0x1db42, arg1[4])
 	x4, x3 := bits.mul_u64(0x1db42, arg1[3])
 	x6, x5 := bits.mul_u64(0x1db42, arg1[2])
diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin
index 285666a32..3cd247cf8 100644
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -27,7 +27,7 @@ _scalar_bit :: #force_inline proc "contextless" (s: ^[32]byte, i: int) -> u8 {
 }
 
 @(private)
-_scalarmult :: proc(out, scalar, point: ^[32]byte) {
+_scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) {
 	// Montgomery pseduo-multiplication taken from Monocypher.
 
 	// computes the scalar product

From 36f3001d59f0c4e1d00f3f75431830c3b463e9f6 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Wed, 20 Mar 2024 08:02:20 +0900
Subject: [PATCH 08/14] core/crypto/_fiat/field_poly1305: Use multiply to
 calculate the mask

---
 core/crypto/_fiat/field_poly1305/field.odin | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin
index f5557cf5f..f4eccc476 100644
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -76,7 +76,7 @@ fe_cond_swap :: #force_no_inline proc "contextless" (
 	out1, out2: ^Tight_Field_Element,
 	arg1: bool,
 ) {
-	mask := -u64(arg1)
+	mask := (u64(arg1) * 0xffffffffffffffff)
 	x := (out1[0] ~ out2[0]) & mask
 	x1, y1 := out1[0] ~ x, out2[0] ~ x
 	x = (out1[1] ~ out2[1]) & mask

From 4defe88decb740e73aaac3f5fe197a84d32b4c1e Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Wed, 20 Mar 2024 23:21:27 +0900
Subject: [PATCH 09/14] core/crypto/_fiat/field_poly1305: Mark more functions
 contextless

---
 core/crypto/_fiat/field_poly1305/field.odin     | 11 +++++++++--
 core/crypto/_fiat/field_poly1305/field4344.odin |  4 ++--
 core/crypto/poly1305/poly1305.odin              |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/core/crypto/_fiat/field_poly1305/field.odin b/core/crypto/_fiat/field_poly1305/field.odin
index f4eccc476..c50a56b0c 100644
--- a/core/crypto/_fiat/field_poly1305/field.odin
+++ b/core/crypto/_fiat/field_poly1305/field.odin
@@ -1,5 +1,6 @@
 package field_poly1305
 
+import "base:intrinsics"
 import "core:encoding/endian"
 import "core:mem"
 
@@ -15,7 +16,11 @@ fe_tighten_cast :: #force_inline proc "contextless" (
 	return transmute(^Tight_Field_Element)(arg1)
 }
 
-fe_from_bytes :: #force_inline proc(out1: ^Tight_Field_Element, arg1: []byte, arg2: byte) {
+fe_from_bytes :: #force_inline proc "contextless" (
+	out1: ^Tight_Field_Element,
+	arg1: []byte,
+	arg2: byte,
+) {
 	// fiat-crypto's deserialization routine effectively processes a
 	// single byte at a time, and wants 256-bits of input for a value
 	// that will be 128-bits or 129-bits.
@@ -24,7 +29,9 @@ fe_from_bytes :: #force_inline proc(out1: ^Tight_Field_Element, arg1: []byte, ar
 	// makes implementing the actual MAC block processing considerably
 	// neater.
 
-	assert(len(arg1) == 16)
+	if len(arg1) != 16 {
+		intrinsics.trap()
+	}
 
 	// While it may be unwise to do deserialization here on our
 	// own when fiat-crypto provides equivalent functionality,
diff --git a/core/crypto/_fiat/field_poly1305/field4344.odin b/core/crypto/_fiat/field_poly1305/field4344.odin
index bddb0c322..6a7a19d69 100644
--- a/core/crypto/_fiat/field_poly1305/field4344.odin
+++ b/core/crypto/_fiat/field_poly1305/field4344.odin
@@ -99,7 +99,7 @@ _subborrowx_u43 :: #force_inline proc "contextless" (
 	return
 }
 
-fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
+fe_carry_mul :: proc "contextless" (out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Element) {
 	x2, x1 := bits.mul_u64(arg1[2], (arg2[2] * 0x5))
 	x4, x3 := bits.mul_u64(arg1[2], (arg2[1] * 0xa))
 	x6, x5 := bits.mul_u64(arg1[1], (arg2[2] * 0xa))
@@ -144,7 +144,7 @@ fe_carry_mul :: proc(out1: ^Tight_Field_Element, arg1, arg2: ^Loose_Field_Elemen
 	out1[2] = x62
 }
 
-fe_carry_square :: proc(out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
+fe_carry_square :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	x1 := (arg1[2] * 0x5)
 	x2 := (x1 * 0x2)
 	x3 := (arg1[2] * 0x2)
diff --git a/core/crypto/poly1305/poly1305.odin b/core/crypto/poly1305/poly1305.odin
index 4ca4f75e1..443917a6a 100644
--- a/core/crypto/poly1305/poly1305.odin
+++ b/core/crypto/poly1305/poly1305.odin
@@ -168,7 +168,7 @@ reset :: proc(ctx: ^Context) {
 }
 
 @(private)
-_blocks :: proc(ctx: ^Context, msg: []byte, final := false) {
+_blocks :: proc "contextless" (ctx: ^Context, msg: []byte, final := false) {
 	n: field.Tight_Field_Element = ---
 	final_byte := byte(!final)
 

From fec42a6d741bfda489a5ab2423644f3169ff4128 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Tue, 19 Mar 2024 12:23:16 +0900
Subject: [PATCH 10/14] core/crypto/_fiat/field_scalar25519: Initial import

---
 .../crypto/_fiat/field_scalar25519/field.odin | 149 +++++
 .../_fiat/field_scalar25519/field64.odin      | 535 ++++++++++++++++++
 2 files changed, 684 insertions(+)
 create mode 100644 core/crypto/_fiat/field_scalar25519/field.odin
 create mode 100644 core/crypto/_fiat/field_scalar25519/field64.odin

diff --git a/core/crypto/_fiat/field_scalar25519/field.odin b/core/crypto/_fiat/field_scalar25519/field.odin
new file mode 100644
index 000000000..c741d30cf
--- /dev/null
+++ b/core/crypto/_fiat/field_scalar25519/field.odin
@@ -0,0 +1,149 @@
+package field_scalar25519
+
+import "base:intrinsics"
+import "core:encoding/endian"
+import "core:math/bits"
+import "core:mem"
+
+@(private)
+_TWO_168 := Montgomery_Domain_Field_Element {
+	0x5b8ab432eac74798,
+	0x38afddd6de59d5d7,
+	0xa2c131b399411b7c,
+	0x6329a7ed9ce5a30,
+}
+@(private)
+_TWO_336 := Montgomery_Domain_Field_Element {
+	0xbd3d108e2b35ecc5,
+	0x5c3a3718bdf9c90b,
+	0x63aa97a331b4f2ee,
+	0x3d217f5be65cb5c,
+}
+
+fe_from_bytes :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[32]byte,
+	unsafe_assume_canonical := false,
+) -> bool {
+	tmp := Non_Montgomery_Domain_Field_Element {
+		endian.unchecked_get_u64le(arg1[0:]),
+		endian.unchecked_get_u64le(arg1[8:]),
+		endian.unchecked_get_u64le(arg1[16:]),
+		endian.unchecked_get_u64le(arg1[24:]),
+	}
+	defer mem.zero_explicit(&tmp, size_of(tmp))
+
+	// Check that tmp is in the the range [0, ELL).
+	if !unsafe_assume_canonical {
+		_, borrow := bits.sub_u64(ELL[0] - 1, tmp[0], 0)
+		_, borrow = bits.sub_u64(ELL[1], tmp[1], borrow)
+		_, borrow = bits.sub_u64(ELL[2], tmp[2], borrow)
+		_, borrow = bits.sub_u64(ELL[3], tmp[3], borrow)
+		if borrow != 0 {
+			return false
+		}
+	}
+
+	fe_to_montgomery(out1, &tmp)
+
+	return true
+}
+
+fe_from_bytes_rfc8032 :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[32]byte,
+) {
+	tmp: [64]byte
+	copy(tmp[:], arg1[:])
+
+	// Apply "clamping" as in RFC 8032.
+	tmp[0] &= 248
+	tmp[31] &= 127
+	tmp[31] |= 64 // Sets the 254th bit, so the encoding is non-canonical.
+
+	fe_from_bytes_wide(out1, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_from_bytes_wide :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^[64]byte,
+) {
+	tmp: Montgomery_Domain_Field_Element
+	// Use Frank Denis' trick, as documented by Filippo Valsorda
+	// at https://words.filippo.io/dispatches/wide-reduction/
+	//
+	// x = c * 2^336 + b * 2^168 + a  mod l
+	_fe_from_bytes_short(out1, arg1[:21]) // a
+
+	_fe_from_bytes_short(&tmp, arg1[21:42]) // b
+	fe_mul(&tmp, &tmp, &_TWO_168) // b * 2^168
+	fe_add(out1, out1, &tmp) // a + b * 2^168
+
+	_fe_from_bytes_short(&tmp, arg1[42:]) // c
+	fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336
+	fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(private)
+_fe_from_bytes_short :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element, arg1: []byte) {
+	// INVARIANT: len(arg1) < 32.
+	if len(arg1) >= 32 {
+		intrinsics.trap()
+	}
+	tmp: [32]byte
+	copy(tmp[:], arg1)
+
+	_ = fe_from_bytes(out1, &tmp, true)
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_to_bytes :: proc "contextless" (out1: []byte, arg1: ^Montgomery_Domain_Field_Element) {
+	if len(out1) != 32 {
+		intrinsics.trap()
+	}
+
+	tmp: Non_Montgomery_Domain_Field_Element
+	fe_from_montgomery(&tmp, arg1)
+
+	endian.unchecked_put_u64le(out1[0:], tmp[0])
+	endian.unchecked_put_u64le(out1[8:], tmp[1])
+	endian.unchecked_put_u64le(out1[16:], tmp[2])
+	endian.unchecked_put_u64le(out1[24:], tmp[3])
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> int {
+	tmp: Montgomery_Domain_Field_Element
+	fe_sub(&tmp, arg1, arg2)
+
+	// This will only underflow iff arg1 == arg2, and we return the borrow,
+	// which will be 1.
+	_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+
+	return int(borrow)
+}
+
+fe_zero :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0
+	out1[1] = 0
+	out1[2] = 0
+	out1[3] = 0
+}
+
+fe_set :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[0]
+	x2 := arg1[1]
+	x3 := arg1[2]
+	x4 := arg1[3]
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
diff --git a/core/crypto/_fiat/field_scalar25519/field64.odin b/core/crypto/_fiat/field_scalar25519/field64.odin
new file mode 100644
index 000000000..268752e5b
--- /dev/null
+++ b/core/crypto/_fiat/field_scalar25519/field64.odin
@@ -0,0 +1,535 @@
+// The BSD 1-Clause License (BSD-1-Clause)
+//
+// Copyright (c) 2015-2020 the fiat-crypto authors (see the AUTHORS file)
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     1. Redistributions of source code must retain the above copyright
+//        notice, this list of conditions and the following disclaimer.
+//
+// THIS SOFTWARE IS PROVIDED BY the fiat-crypto authors "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design,
+// Inc. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package field_scalar25519
+
+// The file provides arithmetic on the field Z/(2^252+27742317777372353535851937790883648493)
+// using a 64-bit Montgomery form internal representation.  It is derived
+// primarily from the machine generated Golang output from the fiat-crypto
+// project.
+//
+// While the base implementation is provably correct, this implementation
+// makes no such claims as the port and optimizations were done by hand.
+
+import fiat "core:crypto/_fiat"
+import "core:math/bits"
+
+// ELL is the saturated representation of the field order, least-significant
+// limb first.
+ELL :: [4]u64{0x5812631a5cf5d3ed, 0x14def9dea2f79cd6, 0x0, 0x1000000000000000}
+
+Montgomery_Domain_Field_Element :: distinct [4]u64
+Non_Montgomery_Domain_Field_Element :: distinct [4]u64
+
+fe_mul :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, arg2[3])
+	x8, x7 := bits.mul_u64(x4, arg2[2])
+	x10, x9 := bits.mul_u64(x4, arg2[1])
+	x12, x11 := bits.mul_u64(x4, arg2[0])
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	x19 := (u64(fiat.u1(x18)) + x6)
+	_, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x23, x22 := bits.mul_u64(x20, 0x1000000000000000)
+	x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6)
+	x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed)
+	x28, x29 := bits.add_u64(x27, x24, u64(0x0))
+	x30 := (u64(fiat.u1(x29)) + x25)
+	_, x32 := bits.add_u64(x11, x26, u64(0x0))
+	x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34)))
+	x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36)))
+	x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38)))
+	x42, x41 := bits.mul_u64(x1, arg2[3])
+	x44, x43 := bits.mul_u64(x1, arg2[2])
+	x46, x45 := bits.mul_u64(x1, arg2[1])
+	x48, x47 := bits.mul_u64(x1, arg2[0])
+	x49, x50 := bits.add_u64(x48, x45, u64(0x0))
+	x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50)))
+	x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52)))
+	x55 := (u64(fiat.u1(x54)) + x42)
+	x56, x57 := bits.add_u64(x33, x47, u64(0x0))
+	x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57)))
+	x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59)))
+	x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	x76 := (u64(fiat.u1(x75)) + x71)
+	_, x78 := bits.add_u64(x56, x72, u64(0x0))
+	x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78)))
+	x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80)))
+	x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82)))
+	x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84)))
+	x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65)))
+	x89, x88 := bits.mul_u64(x2, arg2[3])
+	x91, x90 := bits.mul_u64(x2, arg2[2])
+	x93, x92 := bits.mul_u64(x2, arg2[1])
+	x95, x94 := bits.mul_u64(x2, arg2[0])
+	x96, x97 := bits.add_u64(x95, x92, u64(0x0))
+	x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97)))
+	x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99)))
+	x102 := (u64(fiat.u1(x101)) + x89)
+	x103, x104 := bits.add_u64(x79, x94, u64(0x0))
+	x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104)))
+	x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106)))
+	x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108)))
+	x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110)))
+	_, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b)
+	x116, x115 := bits.mul_u64(x113, 0x1000000000000000)
+	x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6)
+	x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed)
+	x121, x122 := bits.add_u64(x120, x117, u64(0x0))
+	x123 := (u64(fiat.u1(x122)) + x118)
+	_, x125 := bits.add_u64(x103, x119, u64(0x0))
+	x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125)))
+	x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127)))
+	x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129)))
+	x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131)))
+	x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112)))
+	x136, x135 := bits.mul_u64(x3, arg2[3])
+	x138, x137 := bits.mul_u64(x3, arg2[2])
+	x140, x139 := bits.mul_u64(x3, arg2[1])
+	x142, x141 := bits.mul_u64(x3, arg2[0])
+	x143, x144 := bits.add_u64(x142, x139, u64(0x0))
+	x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144)))
+	x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146)))
+	x149 := (u64(fiat.u1(x148)) + x136)
+	x150, x151 := bits.add_u64(x126, x141, u64(0x0))
+	x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151)))
+	x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153)))
+	x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155)))
+	x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157)))
+	_, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b)
+	x163, x162 := bits.mul_u64(x160, 0x1000000000000000)
+	x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6)
+	x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed)
+	x168, x169 := bits.add_u64(x167, x164, u64(0x0))
+	x170 := (u64(fiat.u1(x169)) + x165)
+	_, x172 := bits.add_u64(x150, x166, u64(0x0))
+	x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172)))
+	x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174)))
+	x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176)))
+	x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178)))
+	x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159)))
+	x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0))
+	x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183)))
+	x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185)))
+	x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187)))
+	_, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189)))
+	x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173)
+	x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175)
+	x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177)
+	x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179)
+	out1[0] = x192
+	out1[1] = x193
+	out1[2] = x194
+	out1[3] = x195
+}
+
+fe_square :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, arg1[3])
+	x8, x7 := bits.mul_u64(x4, arg1[2])
+	x10, x9 := bits.mul_u64(x4, arg1[1])
+	x12, x11 := bits.mul_u64(x4, arg1[0])
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	x19 := (u64(fiat.u1(x18)) + x6)
+	_, x20 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x23, x22 := bits.mul_u64(x20, 0x1000000000000000)
+	x25, x24 := bits.mul_u64(x20, 0x14def9dea2f79cd6)
+	x27, x26 := bits.mul_u64(x20, 0x5812631a5cf5d3ed)
+	x28, x29 := bits.add_u64(x27, x24, u64(0x0))
+	x30 := (u64(fiat.u1(x29)) + x25)
+	_, x32 := bits.add_u64(x11, x26, u64(0x0))
+	x33, x34 := bits.add_u64(x13, x28, u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x15, x30, u64(fiat.u1(x34)))
+	x37, x38 := bits.add_u64(x17, x22, u64(fiat.u1(x36)))
+	x39, x40 := bits.add_u64(x19, x23, u64(fiat.u1(x38)))
+	x42, x41 := bits.mul_u64(x1, arg1[3])
+	x44, x43 := bits.mul_u64(x1, arg1[2])
+	x46, x45 := bits.mul_u64(x1, arg1[1])
+	x48, x47 := bits.mul_u64(x1, arg1[0])
+	x49, x50 := bits.add_u64(x48, x45, u64(0x0))
+	x51, x52 := bits.add_u64(x46, x43, u64(fiat.u1(x50)))
+	x53, x54 := bits.add_u64(x44, x41, u64(fiat.u1(x52)))
+	x55 := (u64(fiat.u1(x54)) + x42)
+	x56, x57 := bits.add_u64(x33, x47, u64(0x0))
+	x58, x59 := bits.add_u64(x35, x49, u64(fiat.u1(x57)))
+	x60, x61 := bits.add_u64(x37, x51, u64(fiat.u1(x59)))
+	x62, x63 := bits.add_u64(x39, x53, u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(u64(fiat.u1(x40)), x55, u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x56, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	x76 := (u64(fiat.u1(x75)) + x71)
+	_, x78 := bits.add_u64(x56, x72, u64(0x0))
+	x79, x80 := bits.add_u64(x58, x74, u64(fiat.u1(x78)))
+	x81, x82 := bits.add_u64(x60, x76, u64(fiat.u1(x80)))
+	x83, x84 := bits.add_u64(x62, x68, u64(fiat.u1(x82)))
+	x85, x86 := bits.add_u64(x64, x69, u64(fiat.u1(x84)))
+	x87 := (u64(fiat.u1(x86)) + u64(fiat.u1(x65)))
+	x89, x88 := bits.mul_u64(x2, arg1[3])
+	x91, x90 := bits.mul_u64(x2, arg1[2])
+	x93, x92 := bits.mul_u64(x2, arg1[1])
+	x95, x94 := bits.mul_u64(x2, arg1[0])
+	x96, x97 := bits.add_u64(x95, x92, u64(0x0))
+	x98, x99 := bits.add_u64(x93, x90, u64(fiat.u1(x97)))
+	x100, x101 := bits.add_u64(x91, x88, u64(fiat.u1(x99)))
+	x102 := (u64(fiat.u1(x101)) + x89)
+	x103, x104 := bits.add_u64(x79, x94, u64(0x0))
+	x105, x106 := bits.add_u64(x81, x96, u64(fiat.u1(x104)))
+	x107, x108 := bits.add_u64(x83, x98, u64(fiat.u1(x106)))
+	x109, x110 := bits.add_u64(x85, x100, u64(fiat.u1(x108)))
+	x111, x112 := bits.add_u64(x87, x102, u64(fiat.u1(x110)))
+	_, x113 := bits.mul_u64(x103, 0xd2b51da312547e1b)
+	x116, x115 := bits.mul_u64(x113, 0x1000000000000000)
+	x118, x117 := bits.mul_u64(x113, 0x14def9dea2f79cd6)
+	x120, x119 := bits.mul_u64(x113, 0x5812631a5cf5d3ed)
+	x121, x122 := bits.add_u64(x120, x117, u64(0x0))
+	x123 := (u64(fiat.u1(x122)) + x118)
+	_, x125 := bits.add_u64(x103, x119, u64(0x0))
+	x126, x127 := bits.add_u64(x105, x121, u64(fiat.u1(x125)))
+	x128, x129 := bits.add_u64(x107, x123, u64(fiat.u1(x127)))
+	x130, x131 := bits.add_u64(x109, x115, u64(fiat.u1(x129)))
+	x132, x133 := bits.add_u64(x111, x116, u64(fiat.u1(x131)))
+	x134 := (u64(fiat.u1(x133)) + u64(fiat.u1(x112)))
+	x136, x135 := bits.mul_u64(x3, arg1[3])
+	x138, x137 := bits.mul_u64(x3, arg1[2])
+	x140, x139 := bits.mul_u64(x3, arg1[1])
+	x142, x141 := bits.mul_u64(x3, arg1[0])
+	x143, x144 := bits.add_u64(x142, x139, u64(0x0))
+	x145, x146 := bits.add_u64(x140, x137, u64(fiat.u1(x144)))
+	x147, x148 := bits.add_u64(x138, x135, u64(fiat.u1(x146)))
+	x149 := (u64(fiat.u1(x148)) + x136)
+	x150, x151 := bits.add_u64(x126, x141, u64(0x0))
+	x152, x153 := bits.add_u64(x128, x143, u64(fiat.u1(x151)))
+	x154, x155 := bits.add_u64(x130, x145, u64(fiat.u1(x153)))
+	x156, x157 := bits.add_u64(x132, x147, u64(fiat.u1(x155)))
+	x158, x159 := bits.add_u64(x134, x149, u64(fiat.u1(x157)))
+	_, x160 := bits.mul_u64(x150, 0xd2b51da312547e1b)
+	x163, x162 := bits.mul_u64(x160, 0x1000000000000000)
+	x165, x164 := bits.mul_u64(x160, 0x14def9dea2f79cd6)
+	x167, x166 := bits.mul_u64(x160, 0x5812631a5cf5d3ed)
+	x168, x169 := bits.add_u64(x167, x164, u64(0x0))
+	x170 := (u64(fiat.u1(x169)) + x165)
+	_, x172 := bits.add_u64(x150, x166, u64(0x0))
+	x173, x174 := bits.add_u64(x152, x168, u64(fiat.u1(x172)))
+	x175, x176 := bits.add_u64(x154, x170, u64(fiat.u1(x174)))
+	x177, x178 := bits.add_u64(x156, x162, u64(fiat.u1(x176)))
+	x179, x180 := bits.add_u64(x158, x163, u64(fiat.u1(x178)))
+	x181 := (u64(fiat.u1(x180)) + u64(fiat.u1(x159)))
+	x182, x183 := bits.sub_u64(x173, 0x5812631a5cf5d3ed, u64(0x0))
+	x184, x185 := bits.sub_u64(x175, 0x14def9dea2f79cd6, u64(fiat.u1(x183)))
+	x186, x187 := bits.sub_u64(x177, u64(0x0), u64(fiat.u1(x185)))
+	x188, x189 := bits.sub_u64(x179, 0x1000000000000000, u64(fiat.u1(x187)))
+	_, x191 := bits.sub_u64(x181, u64(0x0), u64(fiat.u1(x189)))
+	x192 := fiat.cmovznz_u64(fiat.u1(x191), x182, x173)
+	x193 := fiat.cmovznz_u64(fiat.u1(x191), x184, x175)
+	x194 := fiat.cmovznz_u64(fiat.u1(x191), x186, x177)
+	x195 := fiat.cmovznz_u64(fiat.u1(x191), x188, x179)
+	out1[0] = x192
+	out1[1] = x193
+	out1[2] = x194
+	out1[3] = x195
+}
+
+fe_add :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.add_u64(arg1[0], arg2[0], u64(0x0))
+	x3, x4 := bits.add_u64(arg1[1], arg2[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.add_u64(arg1[2], arg2[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.add_u64(arg1[3], arg2[3], u64(fiat.u1(x6)))
+	x9, x10 := bits.sub_u64(x1, 0x5812631a5cf5d3ed, u64(0x0))
+	x11, x12 := bits.sub_u64(x3, 0x14def9dea2f79cd6, u64(fiat.u1(x10)))
+	x13, x14 := bits.sub_u64(x5, u64(0x0), u64(fiat.u1(x12)))
+	x15, x16 := bits.sub_u64(x7, 0x1000000000000000, u64(fiat.u1(x14)))
+	_, x18 := bits.sub_u64(u64(fiat.u1(x8)), u64(0x0), u64(fiat.u1(x16)))
+	x19 := fiat.cmovznz_u64(fiat.u1(x18), x9, x1)
+	x20 := fiat.cmovznz_u64(fiat.u1(x18), x11, x3)
+	x21 := fiat.cmovznz_u64(fiat.u1(x18), x13, x5)
+	x22 := fiat.cmovznz_u64(fiat.u1(x18), x15, x7)
+	out1[0] = x19
+	out1[1] = x20
+	out1[2] = x21
+	out1[3] = x22
+}
+
+fe_sub :: proc "contextless" (out1, arg1, arg2: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.sub_u64(arg1[0], arg2[0], u64(0x0))
+	x3, x4 := bits.sub_u64(arg1[1], arg2[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.sub_u64(arg1[2], arg2[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.sub_u64(arg1[3], arg2[3], u64(fiat.u1(x6)))
+	x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff)
+	x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0))
+	x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11)))
+	x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13)))
+	x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15)))
+	out1[0] = x10
+	out1[1] = x12
+	out1[2] = x14
+	out1[3] = x16
+}
+
+fe_opp :: proc "contextless" (out1, arg1: ^Montgomery_Domain_Field_Element) {
+	x1, x2 := bits.sub_u64(u64(0x0), arg1[0], u64(0x0))
+	x3, x4 := bits.sub_u64(u64(0x0), arg1[1], u64(fiat.u1(x2)))
+	x5, x6 := bits.sub_u64(u64(0x0), arg1[2], u64(fiat.u1(x4)))
+	x7, x8 := bits.sub_u64(u64(0x0), arg1[3], u64(fiat.u1(x6)))
+	x9 := fiat.cmovznz_u64(fiat.u1(x8), u64(0x0), 0xffffffffffffffff)
+	x10, x11 := bits.add_u64(x1, (x9 & 0x5812631a5cf5d3ed), u64(0x0))
+	x12, x13 := bits.add_u64(x3, (x9 & 0x14def9dea2f79cd6), u64(fiat.u1(x11)))
+	x14, x15 := bits.add_u64(x5, u64(0x0), u64(fiat.u1(x13)))
+	x16, _ := bits.add_u64(x7, (x9 & 0x1000000000000000), u64(fiat.u1(x15)))
+	out1[0] = x10
+	out1[1] = x12
+	out1[2] = x14
+	out1[3] = x16
+}
+
+fe_one :: proc "contextless" (out1: ^Montgomery_Domain_Field_Element) {
+	out1[0] = 0xd6ec31748d98951d
+	out1[1] = 0xc6ef5bf4737dcf70
+	out1[2] = 0xfffffffffffffffe
+	out1[3] = 0xfffffffffffffff
+}
+
+fe_non_zero :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) -> u64 {
+	return arg1[0] | (arg1[1] | (arg1[2] | arg1[3]))
+}
+
+@(optimization_mode = "none")
+fe_cond_assign :: #force_no_inline proc "contextless" (
+	out1, arg1: ^Montgomery_Domain_Field_Element,
+	arg2: int,
+) {
+	x1 := fiat.cmovznz_u64(fiat.u1(arg2), out1[0], arg1[0])
+	x2 := fiat.cmovznz_u64(fiat.u1(arg2), out1[1], arg1[1])
+	x3 := fiat.cmovznz_u64(fiat.u1(arg2), out1[2], arg1[2])
+	x4 := fiat.cmovznz_u64(fiat.u1(arg2), out1[3], arg1[3])
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+}
+
+fe_from_montgomery :: proc "contextless" (
+	out1: ^Non_Montgomery_Domain_Field_Element,
+	arg1: ^Montgomery_Domain_Field_Element,
+) {
+	x1 := arg1[0]
+	_, x2 := bits.mul_u64(x1, 0xd2b51da312547e1b)
+	x5, x4 := bits.mul_u64(x2, 0x1000000000000000)
+	x7, x6 := bits.mul_u64(x2, 0x14def9dea2f79cd6)
+	x9, x8 := bits.mul_u64(x2, 0x5812631a5cf5d3ed)
+	x10, x11 := bits.add_u64(x9, x6, u64(0x0))
+	_, x13 := bits.add_u64(x1, x8, u64(0x0))
+	x14, x15 := bits.add_u64(u64(0x0), x10, u64(fiat.u1(x13)))
+	x16, x17 := bits.add_u64(x14, arg1[1], u64(0x0))
+	_, x18 := bits.mul_u64(x16, 0xd2b51da312547e1b)
+	x21, x20 := bits.mul_u64(x18, 0x1000000000000000)
+	x23, x22 := bits.mul_u64(x18, 0x14def9dea2f79cd6)
+	x25, x24 := bits.mul_u64(x18, 0x5812631a5cf5d3ed)
+	x26, x27 := bits.add_u64(x25, x22, u64(0x0))
+	_, x29 := bits.add_u64(x16, x24, u64(0x0))
+	x30, x31 := bits.add_u64(
+		(u64(fiat.u1(x17)) + (u64(fiat.u1(x15)) + (u64(fiat.u1(x11)) + x7))),
+		x26,
+		u64(fiat.u1(x29)),
+	)
+	x32, x33 := bits.add_u64(x4, (u64(fiat.u1(x27)) + x23), u64(fiat.u1(x31)))
+	x34, x35 := bits.add_u64(x5, x20, u64(fiat.u1(x33)))
+	x36, x37 := bits.add_u64(x30, arg1[2], u64(0x0))
+	x38, x39 := bits.add_u64(x32, u64(0x0), u64(fiat.u1(x37)))
+	x40, x41 := bits.add_u64(x34, u64(0x0), u64(fiat.u1(x39)))
+	_, x42 := bits.mul_u64(x36, 0xd2b51da312547e1b)
+	x45, x44 := bits.mul_u64(x42, 0x1000000000000000)
+	x47, x46 := bits.mul_u64(x42, 0x14def9dea2f79cd6)
+	x49, x48 := bits.mul_u64(x42, 0x5812631a5cf5d3ed)
+	x50, x51 := bits.add_u64(x49, x46, u64(0x0))
+	_, x53 := bits.add_u64(x36, x48, u64(0x0))
+	x54, x55 := bits.add_u64(x38, x50, u64(fiat.u1(x53)))
+	x56, x57 := bits.add_u64(x40, (u64(fiat.u1(x51)) + x47), u64(fiat.u1(x55)))
+	x58, x59 := bits.add_u64(
+		(u64(fiat.u1(x41)) + (u64(fiat.u1(x35)) + x21)),
+		x44,
+		u64(fiat.u1(x57)),
+	)
+	x60, x61 := bits.add_u64(x54, arg1[3], u64(0x0))
+	x62, x63 := bits.add_u64(x56, u64(0x0), u64(fiat.u1(x61)))
+	x64, x65 := bits.add_u64(x58, u64(0x0), u64(fiat.u1(x63)))
+	_, x66 := bits.mul_u64(x60, 0xd2b51da312547e1b)
+	x69, x68 := bits.mul_u64(x66, 0x1000000000000000)
+	x71, x70 := bits.mul_u64(x66, 0x14def9dea2f79cd6)
+	x73, x72 := bits.mul_u64(x66, 0x5812631a5cf5d3ed)
+	x74, x75 := bits.add_u64(x73, x70, u64(0x0))
+	_, x77 := bits.add_u64(x60, x72, u64(0x0))
+	x78, x79 := bits.add_u64(x62, x74, u64(fiat.u1(x77)))
+	x80, x81 := bits.add_u64(x64, (u64(fiat.u1(x75)) + x71), u64(fiat.u1(x79)))
+	x82, x83 := bits.add_u64(
+		(u64(fiat.u1(x65)) + (u64(fiat.u1(x59)) + x45)),
+		x68,
+		u64(fiat.u1(x81)),
+	)
+	x84 := (u64(fiat.u1(x83)) + x69)
+	x85, x86 := bits.sub_u64(x78, 0x5812631a5cf5d3ed, u64(0x0))
+	x87, x88 := bits.sub_u64(x80, 0x14def9dea2f79cd6, u64(fiat.u1(x86)))
+	x89, x90 := bits.sub_u64(x82, u64(0x0), u64(fiat.u1(x88)))
+	x91, x92 := bits.sub_u64(x84, 0x1000000000000000, u64(fiat.u1(x90)))
+	_, x94 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x92)))
+	x95 := fiat.cmovznz_u64(fiat.u1(x94), x85, x78)
+	x96 := fiat.cmovznz_u64(fiat.u1(x94), x87, x80)
+	x97 := fiat.cmovznz_u64(fiat.u1(x94), x89, x82)
+	x98 := fiat.cmovznz_u64(fiat.u1(x94), x91, x84)
+	out1[0] = x95
+	out1[1] = x96
+	out1[2] = x97
+	out1[3] = x98
+}
+
+fe_to_montgomery :: proc "contextless" (
+	out1: ^Montgomery_Domain_Field_Element,
+	arg1: ^Non_Montgomery_Domain_Field_Element,
+) {
+	x1 := arg1[1]
+	x2 := arg1[2]
+	x3 := arg1[3]
+	x4 := arg1[0]
+	x6, x5 := bits.mul_u64(x4, 0x399411b7c309a3d)
+	x8, x7 := bits.mul_u64(x4, 0xceec73d217f5be65)
+	x10, x9 := bits.mul_u64(x4, 0xd00e1ba768859347)
+	x12, x11 := bits.mul_u64(x4, 0xa40611e3449c0f01)
+	x13, x14 := bits.add_u64(x12, x9, u64(0x0))
+	x15, x16 := bits.add_u64(x10, x7, u64(fiat.u1(x14)))
+	x17, x18 := bits.add_u64(x8, x5, u64(fiat.u1(x16)))
+	_, x19 := bits.mul_u64(x11, 0xd2b51da312547e1b)
+	x22, x21 := bits.mul_u64(x19, 0x1000000000000000)
+	x24, x23 := bits.mul_u64(x19, 0x14def9dea2f79cd6)
+	x26, x25 := bits.mul_u64(x19, 0x5812631a5cf5d3ed)
+	x27, x28 := bits.add_u64(x26, x23, u64(0x0))
+	_, x30 := bits.add_u64(x11, x25, u64(0x0))
+	x31, x32 := bits.add_u64(x13, x27, u64(fiat.u1(x30)))
+	x33, x34 := bits.add_u64(x15, (u64(fiat.u1(x28)) + x24), u64(fiat.u1(x32)))
+	x35, x36 := bits.add_u64(x17, x21, u64(fiat.u1(x34)))
+	x38, x37 := bits.mul_u64(x1, 0x399411b7c309a3d)
+	x40, x39 := bits.mul_u64(x1, 0xceec73d217f5be65)
+	x42, x41 := bits.mul_u64(x1, 0xd00e1ba768859347)
+	x44, x43 := bits.mul_u64(x1, 0xa40611e3449c0f01)
+	x45, x46 := bits.add_u64(x44, x41, u64(0x0))
+	x47, x48 := bits.add_u64(x42, x39, u64(fiat.u1(x46)))
+	x49, x50 := bits.add_u64(x40, x37, u64(fiat.u1(x48)))
+	x51, x52 := bits.add_u64(x31, x43, u64(0x0))
+	x53, x54 := bits.add_u64(x33, x45, u64(fiat.u1(x52)))
+	x55, x56 := bits.add_u64(x35, x47, u64(fiat.u1(x54)))
+	x57, x58 := bits.add_u64(
+		((u64(fiat.u1(x36)) + (u64(fiat.u1(x18)) + x6)) + x22),
+		x49,
+		u64(fiat.u1(x56)),
+	)
+	_, x59 := bits.mul_u64(x51, 0xd2b51da312547e1b)
+	x62, x61 := bits.mul_u64(x59, 0x1000000000000000)
+	x64, x63 := bits.mul_u64(x59, 0x14def9dea2f79cd6)
+	x66, x65 := bits.mul_u64(x59, 0x5812631a5cf5d3ed)
+	x67, x68 := bits.add_u64(x66, x63, u64(0x0))
+	_, x70 := bits.add_u64(x51, x65, u64(0x0))
+	x71, x72 := bits.add_u64(x53, x67, u64(fiat.u1(x70)))
+	x73, x74 := bits.add_u64(x55, (u64(fiat.u1(x68)) + x64), u64(fiat.u1(x72)))
+	x75, x76 := bits.add_u64(x57, x61, u64(fiat.u1(x74)))
+	x78, x77 := bits.mul_u64(x2, 0x399411b7c309a3d)
+	x80, x79 := bits.mul_u64(x2, 0xceec73d217f5be65)
+	x82, x81 := bits.mul_u64(x2, 0xd00e1ba768859347)
+	x84, x83 := bits.mul_u64(x2, 0xa40611e3449c0f01)
+	x85, x86 := bits.add_u64(x84, x81, u64(0x0))
+	x87, x88 := bits.add_u64(x82, x79, u64(fiat.u1(x86)))
+	x89, x90 := bits.add_u64(x80, x77, u64(fiat.u1(x88)))
+	x91, x92 := bits.add_u64(x71, x83, u64(0x0))
+	x93, x94 := bits.add_u64(x73, x85, u64(fiat.u1(x92)))
+	x95, x96 := bits.add_u64(x75, x87, u64(fiat.u1(x94)))
+	x97, x98 := bits.add_u64(
+		((u64(fiat.u1(x76)) + (u64(fiat.u1(x58)) + (u64(fiat.u1(x50)) + x38))) + x62),
+		x89,
+		u64(fiat.u1(x96)),
+	)
+	_, x99 := bits.mul_u64(x91, 0xd2b51da312547e1b)
+	x102, x101 := bits.mul_u64(x99, 0x1000000000000000)
+	x104, x103 := bits.mul_u64(x99, 0x14def9dea2f79cd6)
+	x106, x105 := bits.mul_u64(x99, 0x5812631a5cf5d3ed)
+	x107, x108 := bits.add_u64(x106, x103, u64(0x0))
+	_, x110 := bits.add_u64(x91, x105, u64(0x0))
+	x111, x112 := bits.add_u64(x93, x107, u64(fiat.u1(x110)))
+	x113, x114 := bits.add_u64(x95, (u64(fiat.u1(x108)) + x104), u64(fiat.u1(x112)))
+	x115, x116 := bits.add_u64(x97, x101, u64(fiat.u1(x114)))
+	x118, x117 := bits.mul_u64(x3, 0x399411b7c309a3d)
+	x120, x119 := bits.mul_u64(x3, 0xceec73d217f5be65)
+	x122, x121 := bits.mul_u64(x3, 0xd00e1ba768859347)
+	x124, x123 := bits.mul_u64(x3, 0xa40611e3449c0f01)
+	x125, x126 := bits.add_u64(x124, x121, u64(0x0))
+	x127, x128 := bits.add_u64(x122, x119, u64(fiat.u1(x126)))
+	x129, x130 := bits.add_u64(x120, x117, u64(fiat.u1(x128)))
+	x131, x132 := bits.add_u64(x111, x123, u64(0x0))
+	x133, x134 := bits.add_u64(x113, x125, u64(fiat.u1(x132)))
+	x135, x136 := bits.add_u64(x115, x127, u64(fiat.u1(x134)))
+	x137, x138 := bits.add_u64(
+		((u64(fiat.u1(x116)) + (u64(fiat.u1(x98)) + (u64(fiat.u1(x90)) + x78))) + x102),
+		x129,
+		u64(fiat.u1(x136)),
+	)
+	_, x139 := bits.mul_u64(x131, 0xd2b51da312547e1b)
+	x142, x141 := bits.mul_u64(x139, 0x1000000000000000)
+	x144, x143 := bits.mul_u64(x139, 0x14def9dea2f79cd6)
+	x146, x145 := bits.mul_u64(x139, 0x5812631a5cf5d3ed)
+	x147, x148 := bits.add_u64(x146, x143, u64(0x0))
+	_, x150 := bits.add_u64(x131, x145, u64(0x0))
+	x151, x152 := bits.add_u64(x133, x147, u64(fiat.u1(x150)))
+	x153, x154 := bits.add_u64(x135, (u64(fiat.u1(x148)) + x144), u64(fiat.u1(x152)))
+	x155, x156 := bits.add_u64(x137, x141, u64(fiat.u1(x154)))
+	x157 := ((u64(fiat.u1(x156)) + (u64(fiat.u1(x138)) + (u64(fiat.u1(x130)) + x118))) + x142)
+	x158, x159 := bits.sub_u64(x151, 0x5812631a5cf5d3ed, u64(0x0))
+	x160, x161 := bits.sub_u64(x153, 0x14def9dea2f79cd6, u64(fiat.u1(x159)))
+	x162, x163 := bits.sub_u64(x155, u64(0x0), u64(fiat.u1(x161)))
+	x164, x165 := bits.sub_u64(x157, 0x1000000000000000, u64(fiat.u1(x163)))
+	_, x167 := bits.sub_u64(u64(0x0), u64(0x0), u64(fiat.u1(x165)))
+	x168 := fiat.cmovznz_u64(fiat.u1(x167), x158, x151)
+	x169 := fiat.cmovznz_u64(fiat.u1(x167), x160, x153)
+	x170 := fiat.cmovznz_u64(fiat.u1(x167), x162, x155)
+	x171 := fiat.cmovznz_u64(fiat.u1(x167), x164, x157)
+	out1[0] = x168
+	out1[1] = x169
+	out1[2] = x170
+	out1[3] = x171
+}

From 563c52741903d3a930fd4c4f8128c275fefc1399 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Thu, 21 Mar 2024 02:17:59 +0900
Subject: [PATCH 11/14] core/crypto/_edwards25519: Initial import

---
 core/crypto/_edwards25519/edwards25519.odin   | 428 ++++++++++++++++++
 .../_edwards25519/edwards25519_scalar.odin    |  61 +++
 .../edwards25519_scalar_mul.odin              | 288 ++++++++++++
 core/crypto/_fiat/field_curve25519/field.odin | 134 ++++--
 .../_fiat/field_curve25519/field51.odin       |   5 +-
 .../crypto/_fiat/field_scalar25519/field.odin |   8 +-
 core/crypto/x25519/x25519.odin                |  10 +-
 7 files changed, 885 insertions(+), 49 deletions(-)
 create mode 100644 core/crypto/_edwards25519/edwards25519.odin
 create mode 100644 core/crypto/_edwards25519/edwards25519_scalar.odin
 create mode 100644 core/crypto/_edwards25519/edwards25519_scalar_mul.odin

diff --git a/core/crypto/_edwards25519/edwards25519.odin b/core/crypto/_edwards25519/edwards25519.odin
new file mode 100644
index 000000000..952bb9ef8
--- /dev/null
+++ b/core/crypto/_edwards25519/edwards25519.odin
@@ -0,0 +1,428 @@
+package _edwards25519
+
+/*
+This implements the edwards25519 composite-order group, primarily for
+the purpose of implementing X25519, Ed25519, and ristretto255.  Use of
+this package for other purposes is NOT RECOMMENDED.
+
+See:
+- https://eprint.iacr.org/2011/368.pdf
+- https://datatracker.ietf.org/doc/html/rfc8032
+- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
+*/
+
+import "base:intrinsics"
+import "core:crypto"
+import field "core:crypto/_fiat/field_curve25519"
+import "core:mem"
+
+// Group_Element is an edwards25519 group element, as extended homogenous
+// coordinates, which represents the affine point `(x, y)` as `(X, Y, Z, T)`,
+// with the relations `x = X/Z`, `y = Y/Z`, and `x * y = T/Z`.
+//
+// d = -121665/121666 = 37095705934669439343138083508754565189542113879843219016388785533085940283555
+// a = -1
+//
+// Notes:
+// - There is considerable scope for optimization, however that
+//   will not change the external API, and this is simple and reasonably
+//   performant.
+// - The API delibarately makes it hard to create arbitrary group
+//   elements that are not on the curve.
+// - The group element decoding routine takes the opinionated stance of
+//   rejecting non-canonical encodings.
+
+FE_D := field.Tight_Field_Element {
+	929955233495203,
+	466365720129213,
+	1662059464998953,
+	2033849074728123,
+	1442794654840575,
+}
+@(private)
+FE_A := field.Tight_Field_Element {
+	2251799813685228,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+}
+@(private)
+FE_D2 := field.Tight_Field_Element {
+	1859910466990425,
+	932731440258426,
+	1072319116312658,
+	1815898335770999,
+	633789495995903,
+}
+@(private)
+GE_BASEPOINT := Group_Element {
+	field.Tight_Field_Element {
+		1738742601995546,
+		1146398526822698,
+		2070867633025821,
+		562264141797630,
+		587772402128613,
+	},
+	field.Tight_Field_Element {
+		1801439850948184,
+		1351079888211148,
+		450359962737049,
+		900719925474099,
+		1801439850948198,
+	},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element {
+		1841354044333475,
+		16398895984059,
+		755974180946558,
+		900171276175154,
+		1821297809914039,
+	},
+}
+GE_IDENTITY := Group_Element {
+	field.Tight_Field_Element{0, 0, 0, 0, 0},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element{1, 0, 0, 0, 0},
+	field.Tight_Field_Element{0, 0, 0, 0, 0},
+}
+
+Group_Element :: struct {
+	x: field.Tight_Field_Element,
+	y: field.Tight_Field_Element,
+	z: field.Tight_Field_Element,
+	t: field.Tight_Field_Element,
+}
+
+ge_clear :: proc "contextless" (ge: ^Group_Element) {
+	mem.zero_explicit(ge, size_of(Group_Element))
+}
+
+ge_set :: proc "contextless" (ge, a: ^Group_Element) {
+	field.fe_set(&ge.x, &a.x)
+	field.fe_set(&ge.y, &a.y)
+	field.fe_set(&ge.z, &a.z)
+	field.fe_set(&ge.t, &a.t)
+}
+
+@(require_results)
+ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	// Do the work in a scratch element, so that ge is unchanged on
+	// failure.
+	tmp: Group_Element = ---
+	defer ge_clear(&tmp)
+	field.fe_one(&tmp.z) // Z = 1
+
+	// The encoding is the y-coordinate, with the x-coordinate polarity
+	// (odd/even) encoded in the MSB.
+	field.fe_from_bytes(&tmp.y, b_) // ignores high bit
+
+	// Recover the candidate x-coordinate via the curve equation:
+	// x^2 = (y^2 - 1) / (d * y^2 + 1) (mod p)
+
+	fe_tmp := &tmp.t // Use this to store intermediaries.
+	fe_one := &tmp.z
+
+	// x = num = y^2 - 1
+	field.fe_carry_square(fe_tmp, field.fe_relax_cast(&tmp.y)) // fe_tmp = y^2
+	field.fe_carry_sub(&tmp.x, fe_tmp, fe_one)
+
+	// den = d * y^2 + 1
+	field.fe_carry_mul(fe_tmp, field.fe_relax_cast(fe_tmp), field.fe_relax_cast(&FE_D))
+	field.fe_carry_add(fe_tmp, fe_tmp, fe_one)
+
+	// x = invsqrt(den/num)
+	is_square := field.fe_carry_sqrt_ratio_m1(
+		&tmp.x,
+		field.fe_relax_cast(&tmp.x),
+		field.fe_relax_cast(fe_tmp),
+	)
+	if is_square == 0 {
+		return false
+	}
+
+	// Pick the right x-coordinate.
+	field.fe_cond_negate(&tmp.x, &tmp.x, int(b[31] >> 7))
+
+	// t = x * y
+	field.fe_carry_mul(&tmp.t, field.fe_relax_cast(&tmp.x), field.fe_relax_cast(&tmp.y))
+
+	// Reject non-canonical encodings of ge.
+	buf: [32]byte = ---
+	field.fe_to_bytes(&buf, &tmp.y)
+	buf[31] |= byte(field.fe_is_negative(&tmp.x)) << 7
+	is_canonical := crypto.compare_constant_time(b, buf[:])
+
+	ge_cond_assign(ge, &tmp, is_canonical)
+
+	mem.zero_explicit(&buf, size_of(buf))
+
+	return is_canonical == 1
+}
+
+ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
+	if len(dst) != 32 {
+		intrinsics.trap()
+	}
+	dst_ := transmute(^[32]byte)(raw_data(dst))
+
+	// Convert the element to affine (x, y) representation.
+	x, y, z_inv: field.Tight_Field_Element = ---, ---, ---
+	field.fe_carry_inv(&z_inv, field.fe_relax_cast(&ge.z))
+	field.fe_carry_mul(&x, field.fe_relax_cast(&ge.x), field.fe_relax_cast(&z_inv))
+	field.fe_carry_mul(&y, field.fe_relax_cast(&ge.y), field.fe_relax_cast(&z_inv))
+
+	// Encode the y-coordinate.
+	field.fe_to_bytes(dst_, &y)
+
+	// Copy the least significant bit of the x-coordinate to the most
+	// significant bit of the encoded y-coordinate.
+	dst_[31] |= byte((x[0] & 1) << 7)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &z_inv})
+}
+
+ge_identity :: proc "contextless" (ge: ^Group_Element) {
+	field.fe_zero(&ge.x)
+	field.fe_one(&ge.y)
+	field.fe_one(&ge.z)
+	field.fe_zero(&ge.t)
+}
+
+ge_generator :: proc "contextless" (ge: ^Group_Element) {
+	ge_set(ge, &GE_BASEPOINT)
+}
+
+@(private)
+Addend_Group_Element :: struct {
+	y2_minus_x2:  field.Loose_Field_Element, // t1
+	y2_plus_x2:   field.Loose_Field_Element, // t3
+	k_times_t2:   field.Tight_Field_Element, // t4
+	two_times_z2: field.Loose_Field_Element, // t5
+}
+
+@(private)
+ge_addend_set :: proc "contextless" (ge_a: ^Addend_Group_Element, ge: ^Group_Element) {
+	field.fe_sub(&ge_a.y2_minus_x2, &ge.y, &ge.x)
+	field.fe_add(&ge_a.y2_plus_x2, &ge.y, &ge.x)
+	field.fe_carry_mul(&ge_a.k_times_t2, field.fe_relax_cast(&FE_D2), field.fe_relax_cast(&ge.t))
+	field.fe_add(&ge_a.two_times_z2, &ge.z, &ge.z)
+}
+
+@(private)
+ge_addend_conditional_assign :: proc "contextless" (ge_a, a: ^Addend_Group_Element, ctrl: int) {
+	field.fe_cond_select(&ge_a.y2_minus_x2, &ge_a.y2_minus_x2, &a.y2_minus_x2, ctrl)
+	field.fe_cond_select(&ge_a.y2_plus_x2, &ge_a.y2_plus_x2, &a.y2_plus_x2, ctrl)
+	field.fe_cond_select(&ge_a.k_times_t2, &ge_a.k_times_t2, &a.k_times_t2, ctrl)
+	field.fe_cond_select(&ge_a.two_times_z2, &ge_a.two_times_z2, &a.two_times_z2, ctrl)
+}
+
+@(private)
+Add_Scratch :: struct {
+	A, B, C, D: field.Tight_Field_Element,
+	E, F, G, H: field.Loose_Field_Element,
+	t0, t2:     field.Loose_Field_Element,
+}
+
+ge_add :: proc "contextless" (ge, a, b: ^Group_Element) {
+	b_: Addend_Group_Element = ---
+	ge_addend_set(&b_, b)
+
+	scratch: Add_Scratch = ---
+	ge_add_addend(ge, a, &b_, &scratch)
+
+	mem.zero_explicit(&b_, size_of(Addend_Group_Element))
+	mem.zero_explicit(&scratch, size_of(Add_Scratch))
+}
+
+@(private)
+ge_add_addend :: proc "contextless" (
+	ge, a: ^Group_Element,
+	b: ^Addend_Group_Element,
+	scratch: ^Add_Scratch,
+) {
+	// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3
+	// Assumptions: k=2*d.
+	//
+	// t0 = Y1-X1
+	// t1 = Y2-X2
+	// A = t0*t1
+	// t2 = Y1+X1
+	// t3 = Y2+X2
+	// B = t2*t3
+	// t4 = k*T2
+	// C = T1*t4
+	// t5 = 2*Z2
+	// D = Z1*t5
+	// E = B-A
+	// F = D-C
+	// G = D+C
+	// H = B+A
+	// X3 = E*F
+	// Y3 = G*H
+	// T3 = E*H
+	// Z3 = F*G
+	//
+	// In order to make the scalar multiply faster, the addend is provided
+	// as a `Addend_Group_Element` with t1, t3, t4, and t5 precomputed, as
+	// it is trivially obvious that those are the only values used by the
+	// formula that are directly dependent on `b`, and are only dependent
+	// on `b` and constants.  This saves 1 sub, 2 adds, and 1 multiply,
+	// each time the intermediate representation can be reused.
+
+	A, B, C, D := &scratch.A, &scratch.B, &scratch.C, &scratch.D
+	E, F, G, H := &scratch.E, &scratch.F, &scratch.G, &scratch.H
+	t0, t2 := &scratch.t0, &scratch.t2
+
+	field.fe_sub(t0, &a.y, &a.x)
+	t1 := &b.y2_minus_x2
+	field.fe_carry_mul(A, t0, t1)
+	field.fe_add(t2, &a.y, &a.x)
+	t3 := &b.y2_plus_x2
+	field.fe_carry_mul(B, t2, t3)
+	t4 := &b.k_times_t2
+	field.fe_carry_mul(C, field.fe_relax_cast(&a.t), field.fe_relax_cast(t4))
+	t5 := &b.two_times_z2
+	field.fe_carry_mul(D, field.fe_relax_cast(&a.z), t5)
+	field.fe_sub(E, B, A)
+	field.fe_sub(F, D, C)
+	field.fe_add(G, D, C)
+	field.fe_add(H, B, A)
+	field.fe_carry_mul(&ge.x, E, F)
+	field.fe_carry_mul(&ge.y, G, H)
+	field.fe_carry_mul(&ge.t, E, H)
+	field.fe_carry_mul(&ge.z, F, G)
+}
+
+@(private)
+Double_Scratch :: struct {
+	A, B, C, D, G: field.Tight_Field_Element,
+	t0, t2, t3:    field.Tight_Field_Element,
+	E, F, H:       field.Loose_Field_Element,
+	t1:            field.Loose_Field_Element,
+}
+
+ge_double :: proc "contextless" (ge, a: ^Group_Element, scratch: ^Double_Scratch = nil) {
+	// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
+	//
+	// A = X1^2
+	// B = Y1^2
+	// t0 = Z1^2
+	// C = 2*t0
+	// D = a*A
+	// t1 = X1+Y1
+	// t2 = t1^2
+	// t3 = t2-A
+	// E = t3-B
+	// G = D+B
+	// F = G-C
+	// H = D-B
+	// X3 = E*F
+	// Y3 = G*H
+	// T3 = E*H
+	// Z3 = F*G
+
+	sanitize, scratch := scratch == nil, scratch
+	if sanitize {
+		tmp: Double_Scratch = ---
+		scratch = &tmp
+	}
+
+	A, B, C, D, G := &scratch.A, &scratch.B, &scratch.C, &scratch.D, &scratch.G
+	t0, t2, t3 := &scratch.t0, &scratch.t2, &scratch.t3
+	E, F, H := &scratch.E, &scratch.F, &scratch.H
+	t1 := &scratch.t1
+
+	field.fe_carry_square(A, field.fe_relax_cast(&a.x))
+	field.fe_carry_square(B, field.fe_relax_cast(&a.y))
+	field.fe_carry_square(t0, field.fe_relax_cast(&a.z))
+	field.fe_carry_add(C, t0, t0)
+	field.fe_carry_mul(D, field.fe_relax_cast(&FE_A), field.fe_relax_cast(A))
+	field.fe_add(t1, &a.x, &a.y)
+	field.fe_carry_square(t2, t1)
+	field.fe_carry_sub(t3, t2, A)
+	field.fe_sub(E, t3, B)
+	field.fe_carry_add(G, D, B)
+	field.fe_sub(F, G, C)
+	field.fe_sub(H, D, B)
+	G_ := field.fe_relax_cast(G)
+	field.fe_carry_mul(&ge.x, E, F)
+	field.fe_carry_mul(&ge.y, G_, H)
+	field.fe_carry_mul(&ge.t, E, H)
+	field.fe_carry_mul(&ge.z, F, G_)
+
+	if sanitize {
+		mem.zero_explicit(scratch, size_of(Double_Scratch))
+	}
+}
+
+ge_negate :: proc "contextless" (ge, a: ^Group_Element) {
+	field.fe_carry_opp(&ge.x, &a.x)
+	field.fe_set(&ge.y, &a.y)
+	field.fe_set(&ge.z, &a.z)
+	field.fe_carry_opp(&ge.t, &a.t)
+}
+
+ge_cond_negate :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
+	tmp: Group_Element = ---
+	ge_negate(&tmp, a)
+	ge_cond_assign(ge, &tmp, ctrl)
+
+	ge_clear(&tmp)
+}
+
+ge_cond_assign :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
+	field.fe_cond_assign(&ge.x, &a.x, ctrl)
+	field.fe_cond_assign(&ge.y, &a.y, ctrl)
+	field.fe_cond_assign(&ge.z, &a.z, ctrl)
+	field.fe_cond_assign(&ge.t, &a.t, ctrl)
+}
+
+ge_cond_select :: proc "contextless" (ge, a, b: ^Group_Element, ctrl: int) {
+	field.fe_cond_select(&ge.x, &a.x, &b.x, ctrl)
+	field.fe_cond_select(&ge.y, &a.y, &b.y, ctrl)
+	field.fe_cond_select(&ge.z, &a.z, &b.z, ctrl)
+	field.fe_cond_select(&ge.t, &a.t, &b.t, ctrl)
+}
+
+@(require_results)
+ge_equal :: proc "contextless" (a, b: ^Group_Element) -> int {
+	// (x, y) ?= (x', y') -> (X/Z, Y/Z) ?= (X'/Z', Y'/Z')
+	// X/Z ?= X'/Z', Y/Z ?= Y'/Z' -> X*Z' ?= X'*Z, Y*Z' ?= Y'*Z
+	ax_bz, bx_az, ay_bz, by_az: field.Tight_Field_Element = ---, ---, ---, ---
+	field.fe_carry_mul(&ax_bz, field.fe_relax_cast(&a.x), field.fe_relax_cast(&b.z))
+	field.fe_carry_mul(&bx_az, field.fe_relax_cast(&b.x), field.fe_relax_cast(&a.z))
+	field.fe_carry_mul(&ay_bz, field.fe_relax_cast(&a.y), field.fe_relax_cast(&b.z))
+	field.fe_carry_mul(&by_az, field.fe_relax_cast(&b.y), field.fe_relax_cast(&a.z))
+
+	ret := field.fe_equal(&ax_bz, &bx_az) & field.fe_equal(&ay_bz, &by_az)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&ax_bz, &ay_bz, &bx_az, &by_az})
+
+	return ret
+}
+
+@(require_results)
+ge_is_small_order :: proc "contextless" (ge: ^Group_Element) -> bool {
+	tmp: Group_Element = ---
+	ge_double(&tmp, ge)
+	ge_double(&tmp, &tmp)
+	ge_double(&tmp, &tmp)
+	return ge_equal(&tmp, &GE_IDENTITY) == 1
+}
+
+@(require_results)
+ge_in_prime_order_subgroup_vartime :: proc "contextless" (ge: ^Group_Element) -> bool {
+	// This is currently *very* expensive.  The faster method would be
+	// something like (https://eprint.iacr.org/2022/1164.pdf), however
+	// that is a ~50% speedup, and a lot of added complexity for something
+	// that is better solved by "just use ristretto255".
+	tmp: Group_Element = ---
+	_ge_scalarmult(&tmp, ge, &SC_ELL, true)
+	return ge_equal(&tmp, &GE_IDENTITY) == 1
+}
diff --git a/core/crypto/_edwards25519/edwards25519_scalar.odin b/core/crypto/_edwards25519/edwards25519_scalar.odin
new file mode 100644
index 000000000..2644fe5f7
--- /dev/null
+++ b/core/crypto/_edwards25519/edwards25519_scalar.odin
@@ -0,0 +1,61 @@
+package _edwards25519
+
+import "base:intrinsics"
+import field "core:crypto/_fiat/field_scalar25519"
+import "core:mem"
+
+Scalar :: field.Montgomery_Domain_Field_Element
+
+// WARNING: This is non-canonical and only to be used when checking if
+// a group element is on the prime-order subgroup.
+@(private)
+SC_ELL := field.Non_Montgomery_Domain_Field_Element {
+	field.ELL[0],
+	field.ELL[1],
+	field.ELL[2],
+	field.ELL[3],
+}
+
+sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
+	tmp := field.Non_Montgomery_Domain_Field_Element{i, 0, 0, 0}
+	field.fe_to_montgomery(sc, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+@(require_results)
+sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+	return field.fe_from_bytes(sc, b_)
+}
+
+sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
+	if len(b) != 32 {
+		intrinsics.trap()
+	}
+	b_ := transmute(^[32]byte)(raw_data(b))
+	field.fe_from_bytes_rfc8032(sc, b_)
+}
+
+sc_clear :: proc "contextless" (sc: ^Scalar) {
+	mem.zero_explicit(sc, size_of(Scalar))
+}
+
+sc_set :: field.fe_set
+sc_set_bytes_wide :: field.fe_from_bytes_wide
+sc_bytes :: field.fe_to_bytes
+
+sc_zero :: field.fe_zero
+sc_one :: field.fe_one
+
+sc_add :: field.fe_add
+sc_sub :: field.fe_sub
+sc_negate :: field.fe_opp
+sc_mul :: field.fe_mul
+sc_square :: field.fe_square
+
+sc_cond_assign :: field.fe_cond_assign
+sc_equal :: field.fe_equal
diff --git a/core/crypto/_edwards25519/edwards25519_scalar_mul.odin b/core/crypto/_edwards25519/edwards25519_scalar_mul.odin
new file mode 100644
index 000000000..757a51257
--- /dev/null
+++ b/core/crypto/_edwards25519/edwards25519_scalar_mul.odin
@@ -0,0 +1,288 @@
+package _edwards25519
+
+import field "core:crypto/_fiat/field_scalar25519"
+import "core:math/bits"
+import "core:mem"
+
+// GE_BASEPOINT_TABLE is 1 * G, ... 15 * G, in precomputed format.
+//
+// Note: When generating, the values were reduced to Tight_Field_Element
+// ranges, even though that is not required.
+@(private)
+GE_BASEPOINT_TABLE := Multiply_Table {
+	{
+		{62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585},
+		{1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563},
+		{301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142},
+		{2, 0, 0, 0, 0},
+	},
+	{
+		{1519297034332653, 1098796920435767, 1823476547744119, 808144629470969, 2110930855619772},
+		{338005982828284, 1667856962156925, 100399270107451, 1604566703601691, 1950338038771369},
+		{1920505767731247, 1443759578976892, 1659852098357048, 1484431291070208, 275018744912646},
+		{763163817085987, 2195095074806923, 2167883174351839, 1868059999999762, 911071066608705},
+	},
+	{
+		{960627541894068, 1314966688943942, 1126875971034044, 2059608312958945, 605975666152586},
+		{1714478358025626, 2209607666607510, 1600912834284834, 496072478982142, 481970031861896},
+		{851735079403194, 1088965826757164, 141569479297499, 602804610059257, 2004026468601520},
+		{197585529552380, 324719066578543, 564481854250498, 1173818332764578, 35452976395676},
+	},
+	{
+		{1152980410747203, 2196804280851952, 25745194962557, 1915167295473129, 1266299690309224},
+		{809905889679060, 979732230071345, 1509972345538142, 188492426534402, 818965583123815},
+		{997685409185036, 1451818320876327, 2126681166774509, 2000509606057528, 235432372486854},
+		{887734189279642, 1460338685162044, 877378220074262, 102436391401299, 153369156847490},
+	},
+	{
+		{2056621900836770, 1821657694132497, 1627986892909426, 1163363868678833, 1108873376459226},
+		{1187697490593623, 1066539945237335, 885654531892000, 1357534489491782, 359370291392448},
+		{1509033452137525, 1305318174298508, 613642471748944, 1987256352550234, 1044283663101541},
+		{220105720697037, 387661783287620, 328296827867762, 360035589590664, 795213236824054},
+	},
+	{
+		{1820794733038396, 1612235121681074, 757405923441402, 1094031020892801, 231025333128907},
+		{1639067873254194, 1484176557946322, 300800382144789, 1329915446659183, 1211704578730455},
+		{641900794791527, 1711751746971612, 179044712319955, 576455585963824, 1852617592509865},
+		{743549047192397, 685091042550147, 1952415336873496, 1965124675654685, 513364998442917},
+	},
+	{
+		{1004557076870448, 1762911374844520, 1330807633622723, 384072910939787, 953849032243810},
+		{2178275058221458, 257933183722891, 376684351537894, 2010189102001786, 1981824297484148},
+		{1332915663881114, 1286540505502549, 1741691283561518, 977214932156314, 1764059494778091},
+		{429702949064027, 1368332611650677, 2019867176450999, 2212258376161746, 526160996742554},
+	},
+	{
+		{2098932988258576, 2203688382075948, 2120400160059479, 1748488020948146, 1203264167282624},
+		{677131386735829, 1850249298025188, 672782146532031, 2144145693078904, 2088656272813787},
+		{1065622343976192, 1573853211848116, 223560413590068, 333846833073379, 27832122205830},
+		{1781008836504573, 917619542051793, 544322748939913, 882577394308384, 1720521246471195},
+	},
+	{
+		{660120928379860, 2081944024858618, 1878411111349191, 424587356517195, 2111317439894005},
+		{1834193977811532, 1864164086863319, 797334633289424, 150410812403062, 2085177078466389},
+		{1438117271371866, 783915531014482, 388731514584658, 292113935417795, 1945855002546714},
+		{1678140823166658, 679103239148744, 614102761596238, 1052962498997885, 1863983323810390},
+	},
+	{
+		{1690309392496233, 1116333140326275, 1377242323631039, 717196888780674, 82724646713353},
+		{1722370213432106, 74265192976253, 264239578448472, 1714909985012994, 2216984958602173},
+		{2010482366920922, 1294036471886319, 566466395005815, 1631955803657320, 1751698647538458},
+		{1073230604155753, 1159087041338551, 1664057985455483, 127472702826203, 1339591128522371},
+	},
+	{
+		{478053307175577, 2179515791720985, 21146535423512, 1831683844029536, 462805561553981},
+		{1945267486565588, 1298536818409655, 2214511796262989, 1904981051429012, 252904800782086},
+		{268945954671210, 222740425595395, 1208025911856230, 1080418823003555, 75929831922483},
+		{1884784014268948, 643868448202966, 978736549726821, 46385971089796, 1296884812292320},
+	},
+	{
+		{1861159462859103, 7077532564710, 963010365896826, 1938780006785270, 766241051941647},
+		{1778966986051906, 1713995999765361, 1394565822271816, 1366699246468722, 1213407027149475},
+		{1978989286560907, 2135084162045594, 1951565508865477, 671788336314416, 293123929458176},
+		{902608944504080, 2167765718046481, 1285718473078022, 1222562171329269, 492109027844479},
+	},
+	{
+		{1820807832746213, 1029220580458586, 1101997555432203, 1039081975563572, 202477981158221},
+		{1866134980680205, 2222325502763386, 1830284629571201, 1046966214478970, 418381946936795},
+		{1783460633291322, 1719505443254998, 1810489639976220, 877049370713018, 2187801198742619},
+		{197118243000763, 305493867565736, 518814410156522, 1656246186645170, 901894734874934},
+	},
+	{
+		{225454942125915, 478410476654509, 600524586037746, 643450007230715, 1018615928259319},
+		{1733330584845708, 881092297970296, 507039890129464, 496397090721598, 2230888519577628},
+		{690155664737246, 1010454785646677, 753170144375012, 1651277613844874, 1622648796364156},
+		{1321310321891618, 1089655277873603, 235891750867089, 815878279563688, 1709264240047556},
+	},
+	{
+		{805027036551342, 1387174275567452, 1156538511461704, 1465897486692171, 1208567094120903},
+		{2228417017817483, 202885584970535, 2182114782271881, 2077405042592934, 1029684358182774},
+		{460447547653983, 627817697755692, 524899434670834, 1228019344939427, 740684787777653},
+		{849757462467675, 447476306919899, 422618957298818, 302134659227815, 675831828440895},
+	},
+}
+
+ge_scalarmult :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
+	tmp: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&tmp, sc)
+
+	_ge_scalarmult(ge, p, &tmp)
+
+	mem.zero_explicit(&tmp, size_of(tmp))
+}
+
+ge_scalarmult_basepoint :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) {
+	// Something like the comb method from "Fast and compact elliptic-curve
+	// cryptography" Section 3.3, would be more performant, but more
+	// complex.
+	//
+	// - https://eprint.iacr.org/2012/309
+	ge_scalarmult(ge, &GE_BASEPOINT, sc)
+}
+
+ge_scalarmult_vartime :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
+	tmp: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&tmp, sc)
+
+	_ge_scalarmult(ge, p, &tmp, true)
+}
+
+ge_double_scalarmult_basepoint_vartime :: proc "contextless" (
+	ge: ^Group_Element,
+	a: ^Scalar,
+	A: ^Group_Element,
+	b: ^Scalar,
+) {
+	// Strauss-Shamir, commonly referred to as the "Shamir trick",
+	// saves half the doublings, relative to doing this the naive way.
+	//
+	// ABGLSV-Pornin (https://eprint.iacr.org/2020/454) is faster,
+	// but significantly more complex, and has incompatibilities with
+	// mixed-order group elements.
+
+	tmp_add: Add_Scratch = ---
+	tmp_addend: Addend_Group_Element = ---
+	tmp_dbl: Double_Scratch = ---
+	tmp: Group_Element = ---
+
+	A_tbl: Multiply_Table = ---
+	mul_tbl_set(&A_tbl, A, &tmp_add)
+
+	sc_a, sc_b: field.Non_Montgomery_Domain_Field_Element
+	field.fe_from_montgomery(&sc_a, a)
+	field.fe_from_montgomery(&sc_b, b)
+
+	ge_identity(&tmp)
+	for i := 31; i >= 0; i = i - 1 {
+		limb := i / 8
+		shift := uint(i & 7) * 8
+
+		limb_byte_a := sc_a[limb] >> shift
+		limb_byte_b := sc_b[limb] >> shift
+
+		hi_a, lo_a := (limb_byte_a >> 4) & 0x0f, limb_byte_a & 0x0f
+		hi_b, lo_b := (limb_byte_b >> 4) & 0x0f, limb_byte_b & 0x0f
+
+		if i != 31 {
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+		}
+		mul_tbl_add(&tmp, &A_tbl, hi_a, &tmp_add, &tmp_addend, true)
+		mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, hi_b, &tmp_add, &tmp_addend, true)
+
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		mul_tbl_add(&tmp, &A_tbl, lo_a, &tmp_add, &tmp_addend, true)
+		mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, lo_b, &tmp_add, &tmp_addend, true)
+	}
+
+	ge_set(ge, &tmp)
+}
+
+@(private)
+_ge_scalarmult :: proc "contextless" (
+	ge, p: ^Group_Element,
+	sc: ^field.Non_Montgomery_Domain_Field_Element,
+	unsafe_is_vartime := false,
+) {
+	// Do the simplest possible thing that works and provides adequate,
+	// performance, which is windowed add-then-multiply.
+
+	tmp_add: Add_Scratch = ---
+	tmp_addend: Addend_Group_Element = ---
+	tmp_dbl: Double_Scratch = ---
+	tmp: Group_Element = ---
+
+	p_tbl: Multiply_Table = ---
+	mul_tbl_set(&p_tbl, p, &tmp_add)
+
+	ge_identity(&tmp)
+	for i := 31; i >= 0; i = i - 1 {
+		limb := i / 8
+		shift := uint(i & 7) * 8
+		limb_byte := sc[limb] >> shift
+
+		hi, lo := (limb_byte >> 4) & 0x0f, limb_byte & 0x0f
+
+		if i != 31 {
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+			ge_double(&tmp, &tmp, &tmp_dbl)
+		}
+		mul_tbl_add(&tmp, &p_tbl, hi, &tmp_add, &tmp_addend, unsafe_is_vartime)
+
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		ge_double(&tmp, &tmp, &tmp_dbl)
+		mul_tbl_add(&tmp, &p_tbl, lo, &tmp_add, &tmp_addend, unsafe_is_vartime)
+	}
+
+	ge_set(ge, &tmp)
+
+	if !unsafe_is_vartime {
+		ge_clear(&tmp)
+		mem.zero_explicit(&tmp_add, size_of(Add_Scratch))
+		mem.zero_explicit(&tmp_addend, size_of(Addend_Group_Element))
+		mem.zero_explicit(&tmp_dbl, size_of(Double_Scratch))
+	}
+}
+
+@(private)
+Multiply_Table :: [15]Addend_Group_Element // 0 = inf, which is implicit.
+
+@(private)
+mul_tbl_set :: proc "contextless" (
+	tbl: ^Multiply_Table,
+	ge: ^Group_Element,
+	tmp_add: ^Add_Scratch,
+) {
+	tmp: Group_Element = ---
+	ge_set(&tmp, ge)
+
+	ge_addend_set(&tbl[0], ge)
+	for i := 1; i < 15; i = i + 1 {
+		ge_add_addend(&tmp, &tmp, &tbl[0], tmp_add)
+		ge_addend_set(&tbl[i], &tmp)
+	}
+
+	ge_clear(&tmp)
+}
+
+@(private)
+mul_tbl_add :: proc "contextless" (
+	ge: ^Group_Element,
+	tbl: ^Multiply_Table,
+	idx: u64,
+	tmp_add: ^Add_Scratch,
+	tmp_addend: ^Addend_Group_Element,
+	unsafe_is_vartime: bool,
+) {
+	// Variable time lookup, with the addition omitted entirely if idx == 0.
+	if unsafe_is_vartime {
+		// Skip adding the point at infinity.
+		if idx != 0 {
+			ge_add_addend(ge, ge, &tbl[idx - 1], tmp_add)
+		}
+		return
+	}
+
+	// Constant time lookup.
+	tmp_addend^ = {
+		// Point at infinity (0, 1, 1, 0) in precomputed form
+		{1, 0, 0, 0, 0}, // y - x
+		{1, 0, 0, 0, 0}, // y + x
+		{0, 0, 0, 0, 0}, // t * 2d
+		{2, 0, 0, 0, 0}, // z * 2
+	}
+	for i := u64(1); i < 16; i = i + 1 {
+		_, ctrl := bits.sub_u64(0, (i ~ idx), 0)
+		ge_addend_conditional_assign(tmp_addend, &tbl[i - 1], int(~ctrl) & 1)
+	}
+	ge_add_addend(ge, ge, tmp_addend, tmp_add)
+}
diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin
index 64f9f8a1f..6b2d3b595 100644
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -15,6 +15,20 @@ fe_tighten_cast :: #force_inline proc "contextless" (
 	return transmute(^Tight_Field_Element)(arg1)
 }
 
+fe_clear :: proc "contextless" (
+	arg1: $T,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mem.zero_explicit(arg1, size_of(arg1^))
+}
+
+fe_clear_vec :: proc "contextless" (
+	arg1: $T,
+) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
+	for fe in arg1 {
+		fe_clear(fe)
+	}
+}
+
 fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
 	// Ignore the unused bit by copying the input and masking the bit off
 	// prior to deserialization.
@@ -27,12 +41,25 @@ fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte
 	mem.zero_explicit(&tmp1, size_of(tmp1))
 }
 
+fe_is_negative :: proc "contextless" (arg1: ^Tight_Field_Element) -> int {
+	tmp1: [32]byte = ---
+
+	fe_to_bytes(&tmp1, arg1)
+	ret := tmp1[0] & 1
+
+	mem.zero_explicit(&tmp1, size_of(tmp1))
+
+	return int(ret)
+}
+
 fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
-	tmp2: [32]byte = ---
+	tmp1, tmp2: [32]byte = ---, ---
 
+	fe_to_bytes(&tmp1, arg1)
 	fe_to_bytes(&tmp2, arg2)
-	ret := fe_equal_bytes(arg1, &tmp2)
+	ret := crypto.compare_constant_time(tmp1[:], tmp2[:])
 
+	mem.zero_explicit(&tmp1, size_of(tmp1))
 	mem.zero_explicit(&tmp2, size_of(tmp2))
 
 	return ret
@@ -67,25 +94,37 @@ fe_carry_pow2k :: proc "contextless" (
 	}
 }
 
+fe_carry_add :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
+	fe_add(fe_relax_cast(out1), arg1, arg2)
+	fe_carry(out1, fe_relax_cast(out1))
+}
+
+fe_carry_sub :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
+	fe_sub(fe_relax_cast(out1), arg1, arg2)
+	fe_carry(out1, fe_relax_cast(out1))
+}
+
 fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
 	fe_opp(fe_relax_cast(out1), arg1)
 	fe_carry(out1, fe_relax_cast(out1))
 }
 
-fe_carry_invsqrt :: proc "contextless" (
+fe_carry_sqrt_ratio_m1 :: proc "contextless" (
 	out1: ^Tight_Field_Element,
-	arg1: ^Loose_Field_Element,
+	arg1: ^Loose_Field_Element, // u
+	arg2: ^Loose_Field_Element, // v
 ) -> int {
-	// Inverse square root taken from Monocypher.
+	// SQRT_RATIO_M1(u, v) from RFC 9496 - 4.2, based on the inverse
+	// square root from Monocypher.
 
+	w: Tight_Field_Element = ---
+	fe_carry_mul(&w, arg1, arg2) // u * v
+
+	// r = tmp1 = u * w^((p-5)/8)
 	tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
-
-	// t0 = x^((p-5)/8)
-	// Can be achieved with a simple double & add ladder,
-	// but it would be slower.
-	fe_carry_pow2k(&tmp1, arg1, 1)
+	fe_carry_pow2k(&tmp1, fe_relax_cast(&w), 1)
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
-	fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
+	fe_carry_mul(&tmp2, fe_relax_cast(&w), fe_relax_cast(&tmp2))
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
@@ -104,48 +143,49 @@ fe_carry_invsqrt :: proc "contextless" (
 	fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
 	fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
 	fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
-	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&w)) // w^((p-5)/8)
 
-	// quartic = x^((p-1)/4)
-	quartic := &tmp2
-	fe_carry_square(quartic, fe_relax_cast(&tmp1))
-	fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
+	fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) // u * w^((p-5)/8)
 
-	// Serialize quartic once to save on repeated serialization/sanitization.
-	quartic_buf: [32]byte = ---
-	fe_to_bytes(&quartic_buf, quartic)
-	check := &tmp3
+	// Serialize `check` once to save on repeated serialization.
+	r, check := &tmp1, &tmp2
+	b: [32]byte = ---
+	fe_carry_square(check, fe_relax_cast(r))
+	fe_carry_mul(check, fe_relax_cast(check), arg2) // check * v
+	fe_to_bytes(&b, check)
 
-	fe_one(check)
-	p1 := fe_equal_bytes(check, &quartic_buf)
-	fe_carry_opp(check, check)
-	m1 := fe_equal_bytes(check, &quartic_buf)
-	fe_carry_opp(check, &SQRT_M1)
-	ms := fe_equal_bytes(check, &quartic_buf)
+	u, neg_u, neg_u_i := &tmp3, &w, check
+	fe_carry(u, arg1)
+	fe_carry_opp(neg_u, u)
+	fe_carry_mul(neg_u_i, fe_relax_cast(neg_u), fe_relax_cast(&FE_SQRT_M1))
 
-	// if quartic == -1 or sqrt(-1)
-	// then  isr = x^((p-1)/4) * sqrt(-1)
-	// else  isr = x^((p-1)/4)
-	fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
-	fe_cond_assign(out1, &tmp1, (m1 | ms) ~ 1)
+	correct_sign_sqrt := fe_equal_bytes(u, &b)
+	flipped_sign_sqrt := fe_equal_bytes(neg_u, &b)
+	flipped_sign_sqrt_i := fe_equal_bytes(neg_u_i, &b)
 
-	mem.zero_explicit(&tmp1, size_of(tmp1))
-	mem.zero_explicit(&tmp2, size_of(tmp2))
-	mem.zero_explicit(&tmp3, size_of(tmp3))
-	mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
+	r_prime := check
+	fe_carry_mul(r_prime, fe_relax_cast(r), fe_relax_cast(&FE_SQRT_M1))
+	fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i)
 
-	return p1 | m1
+	// Pick the non-negative square root.
+	fe_carry_opp(r_prime, r)
+	fe_cond_select(out1, r, r_prime, fe_is_negative(r))
+
+	fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3})
+	mem.zero_explicit(&b, size_of(b))
+
+	return correct_sign_sqrt | flipped_sign_sqrt
 }
 
 fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
 	tmp1: Tight_Field_Element
 
 	fe_carry_square(&tmp1, arg1)
-	_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
+	_ = fe_carry_sqrt_ratio_m1(&tmp1, fe_relax_cast(&FE_ONE), fe_relax_cast(&tmp1))
 	fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
 	fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
 
-	mem.zero_explicit(&tmp1, size_of(tmp1))
+	fe_clear(&tmp1)
 }
 
 fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
@@ -196,3 +236,21 @@ fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_El
 	out1[3], out2[3] = x4, y4
 	out1[4], out2[4] = x5, y5
 }
+
+@(optimization_mode = "none")
+fe_cond_select :: #force_no_inline proc "contextless" (
+	out1, arg1, arg2: $T,
+	arg3: int,
+) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
+	mask := (u64(arg3) * 0xffffffffffffffff)
+	x1 := ((mask & arg2[0]) | ((~mask) & arg1[0]))
+	x2 := ((mask & arg2[1]) | ((~mask) & arg1[1]))
+	x3 := ((mask & arg2[2]) | ((~mask) & arg1[2]))
+	x4 := ((mask & arg2[3]) | ((~mask) & arg1[3]))
+	x5 := ((mask & arg2[4]) | ((~mask) & arg1[4]))
+	out1[0] = x1
+	out1[1] = x2
+	out1[2] = x3
+	out1[3] = x4
+	out1[4] = x5
+}
diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin
index 81dca19e2..d039bd411 100644
--- a/core/crypto/_fiat/field_curve25519/field51.odin
+++ b/core/crypto/_fiat/field_curve25519/field51.odin
@@ -42,7 +42,10 @@ import "core:math/bits"
 Loose_Field_Element :: distinct [5]u64
 Tight_Field_Element :: distinct [5]u64
 
-SQRT_M1 := Tight_Field_Element {
+FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
+FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}
+
+FE_SQRT_M1 := Tight_Field_Element {
 	1718705420411056,
 	234908883556509,
 	2233514472574048,
diff --git a/core/crypto/_fiat/field_scalar25519/field.odin b/core/crypto/_fiat/field_scalar25519/field.odin
index c741d30cf..9b40661b7 100644
--- a/core/crypto/_fiat/field_scalar25519/field.odin
+++ b/core/crypto/_fiat/field_scalar25519/field.odin
@@ -20,6 +20,10 @@ _TWO_336 := Montgomery_Domain_Field_Element {
 	0x3d217f5be65cb5c,
 }
 
+fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) {
+	mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element))
+}
+
 fe_from_bytes :: proc "contextless" (
 	out1: ^Montgomery_Domain_Field_Element,
 	arg1: ^[32]byte,
@@ -85,7 +89,7 @@ fe_from_bytes_wide :: proc "contextless" (
 	fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336
 	fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336
 
-	mem.zero_explicit(&tmp, size_of(tmp))
+	fe_clear(&tmp)
 }
 
 @(private)
@@ -125,7 +129,7 @@ fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) ->
 	// which will be 1.
 	_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
 
-	mem.zero_explicit(&tmp, size_of(tmp))
+	fe_clear(&tmp)
 
 	return int(borrow)
 }
diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin
index 3cd247cf8..f8a301810 100644
--- a/core/crypto/x25519/x25519.odin
+++ b/core/crypto/x25519/x25519.odin
@@ -94,13 +94,8 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) {
 	field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
 	field.fe_to_bytes(out, &x2)
 
-	mem.zero_explicit(&x1, size_of(x1))
-	mem.zero_explicit(&x2, size_of(x2))
-	mem.zero_explicit(&x3, size_of(x3))
-	mem.zero_explicit(&z2, size_of(z2))
-	mem.zero_explicit(&z3, size_of(z3))
-	mem.zero_explicit(&t0, size_of(t0))
-	mem.zero_explicit(&t1, size_of(t1))
+	field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&t0, &t1})
 }
 
 // scalarmult "multiplies" the provided scalar and point, and writes the
@@ -137,6 +132,5 @@ scalarmult :: proc(dst, scalar, point: []byte) {
 // scalarmult_basepoint "multiplies" the provided scalar with the X25519
 // base point and writes the resulting point to dst.
 scalarmult_basepoint :: proc(dst, scalar: []byte) {
-	// TODO/perf: Switch to using a precomputed table.
 	scalarmult(dst, scalar, _BASE_POINT[:])
 }

From d96f8bb5c1f5f7b24a23383f88c1b9a637b586b2 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Tue, 26 Mar 2024 13:05:50 +0900
Subject: [PATCH 12/14] core/crypto/ristretto255: Initial import

---
 core/crypto/_fiat/field_curve25519/field.odin |  15 +-
 core/crypto/ristretto255/ristretto255.odin    | 510 ++++++++++++++++++
 .../ristretto255/ristretto255_scalar.odin     |  97 ++++
 examples/all/all_main.odin                    |   2 +
 4 files changed, 622 insertions(+), 2 deletions(-)
 create mode 100644 core/crypto/ristretto255/ristretto255.odin
 create mode 100644 core/crypto/ristretto255/ristretto255_scalar.odin

diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin
index 6b2d3b595..8a8202ac4 100644
--- a/core/crypto/_fiat/field_curve25519/field.odin
+++ b/core/crypto/_fiat/field_curve25519/field.odin
@@ -109,6 +109,10 @@ fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Eleme
 	fe_carry(out1, fe_relax_cast(out1))
 }
 
+fe_carry_abs :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
+	fe_cond_negate(out1, arg1, fe_is_negative(arg1))
+}
+
 fe_carry_sqrt_ratio_m1 :: proc "contextless" (
 	out1: ^Tight_Field_Element,
 	arg1: ^Loose_Field_Element, // u
@@ -168,8 +172,7 @@ fe_carry_sqrt_ratio_m1 :: proc "contextless" (
 	fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i)
 
 	// Pick the non-negative square root.
-	fe_carry_opp(r_prime, r)
-	fe_cond_select(out1, r, r_prime, fe_is_negative(r))
+	fe_carry_abs(out1, r)
 
 	fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3})
 	mem.zero_explicit(&b, size_of(b))
@@ -254,3 +257,11 @@ fe_cond_select :: #force_no_inline proc "contextless" (
 	out1[3] = x4
 	out1[4] = x5
 }
+
+fe_cond_negate :: proc "contextless" (out1, arg1: ^Tight_Field_Element, ctrl: int) {
+	tmp1: Tight_Field_Element = ---
+	fe_carry_opp(&tmp1, arg1)
+	fe_cond_select(out1, arg1, &tmp1, ctrl)
+
+	fe_clear(&tmp1)
+}
diff --git a/core/crypto/ristretto255/ristretto255.odin b/core/crypto/ristretto255/ristretto255.odin
new file mode 100644
index 000000000..d1f2b6ee5
--- /dev/null
+++ b/core/crypto/ristretto255/ristretto255.odin
@@ -0,0 +1,510 @@
+/*
+package ristretto255 implement the ristretto255 prime-order group.
+
+See:
+- https://www.rfc-editor.org/rfc/rfc9496
+*/
+package ristretto255
+
+import grp "core:crypto/_edwards25519"
+import field "core:crypto/_fiat/field_curve25519"
+import "core:mem"
+
+// ELEMENT_SIZE is the size of a byte-encoded ristretto255 group element.
+ELEMENT_SIZE :: 32
+// WIDE_ELEMENT_SIZE is the side of a wide byte-encoded ristretto255
+// group element.
+WIDE_ELEMENT_SIZE :: 64
+
+@(private)
+FE_NEG_ONE := field.Tight_Field_Element {
+	2251799813685228,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+	2251799813685247,
+}
+@(private)
+FE_INVSQRT_A_MINUS_D := field.Tight_Field_Element {
+	278908739862762,
+	821645201101625,
+	8113234426968,
+	1777959178193151,
+	2118520810568447,
+}
+@(private)
+FE_ONE_MINUS_D_SQ := field.Tight_Field_Element {
+	1136626929484150,
+	1998550399581263,
+	496427632559748,
+	118527312129759,
+	45110755273534,
+}
+@(private)
+FE_D_MINUS_ONE_SQUARED := field.Tight_Field_Element {
+	1507062230895904,
+	1572317787530805,
+	683053064812840,
+	317374165784489,
+	1572899562415810,
+}
+@(private)
+FE_SQRT_AD_MINUS_ONE := field.Tight_Field_Element {
+	2241493124984347,
+	425987919032274,
+	2207028919301688,
+	1220490630685848,
+	974799131293748,
+}
+@(private)
+GE_IDENTITY := Group_Element{grp.GE_IDENTITY, true}
+
+// Group_Element is a ristretto255 group element.  The zero-initialized
+// value is invalid.
+Group_Element :: struct {
+	// WARNING: While the internal representation is an Edwards25519
+	// group element, this is not guaranteed to always be the case,
+	// and your code *WILL* break if you mess with `_p`.
+	_p:              grp.Group_Element,
+	_is_initialized: bool,
+}
+
+// ge_clear clears ge to the uninitialized state.
+ge_clear :: proc "contextless" (ge: ^Group_Element) {
+	mem.zero_explicit(ge, size_of(Group_Element))
+}
+
+// ge_set sets `ge = a`.
+ge_set :: proc(ge, a: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_set(&ge._p, &a._p)
+	ge._is_initialized = true
+}
+
+// ge_identity sets ge to the identity (neutral) element.
+ge_identity :: proc "contextless" (ge: ^Group_Element) {
+	grp.ge_identity(&ge._p)
+	ge._is_initialized = true
+}
+
+// ge_generator sets ge to the group generator.
+ge_generator :: proc "contextless" (ge: ^Group_Element) {
+	grp.ge_generator(&ge._p)
+	ge._is_initialized = true
+}
+
+// ge_set_bytes sets ge to the result of decoding b as a ristretto255
+// group element, and returns true on success.
+@(require_results)
+ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
+	// 1.  Interpret the string as an unsigned integer s in little-endian
+	//     representation.  If the length of the string is not 32 bytes or
+	//     if the resulting value is >= p, decoding fails.
+	//
+	// 2.  If IS_NEGATIVE(s) returns TRUE, decoding fails.
+
+	if len(b) != ELEMENT_SIZE {
+		return false
+	}
+	if b[31] & 128 != 0 || b[0] & 1 != 0 {
+		// Fail early if b is clearly > p, or negative.
+		return false
+	}
+
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	s: field.Tight_Field_Element = ---
+	defer field.fe_clear(&s)
+
+	field.fe_from_bytes(&s, b_)
+	if field.fe_equal_bytes(&s, b_) != 1 {
+		// Reject non-canonical encodings of s.
+		return false
+	}
+
+	// 3.  Process s as follows:
+	v, u1, u2: field.Loose_Field_Element = ---, ---, ---
+	tmp, u2_sqr: field.Tight_Field_Element = ---, ---
+
+	// ss = s^2
+	// u1 = 1 - ss
+	// u2 = 1 + ss
+	// u2_sqr = u2^2
+	field.fe_carry_square(&tmp, field.fe_relax_cast(&s))
+	field.fe_sub(&u1, &field.FE_ONE, &tmp)
+	field.fe_add(&u2, &field.FE_ONE, &tmp)
+	field.fe_carry_square(&u2_sqr, &u2)
+
+	// v = -(D * u1^2) - u2_sqr
+	field.fe_carry_square(&tmp, &u1)
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(&grp.FE_D), field.fe_relax_cast(&tmp))
+	field.fe_carry_add(&tmp, &tmp, &u2_sqr)
+	field.fe_opp(&v, &tmp)
+
+	// (was_square, invsqrt) = SQRT_RATIO_M1(1, v * u2_sqr)
+	field.fe_carry_mul(&tmp, &v, field.fe_relax_cast(&u2_sqr))
+	was_square := field.fe_carry_sqrt_ratio_m1(
+		&tmp,
+		field.fe_relax_cast(&field.FE_ONE),
+		field.fe_relax_cast(&tmp),
+	)
+
+	// den_x = invsqrt * u2
+	// den_y = invsqrt * den_x * v
+	x, y, t: field.Tight_Field_Element = ---, ---, ---
+	field.fe_carry_mul(&x, field.fe_relax_cast(&tmp), &u2)
+	field.fe_carry_mul(&y, field.fe_relax_cast(&tmp), field.fe_relax_cast(&x))
+	field.fe_carry_mul(&y, field.fe_relax_cast(&y), &v)
+
+	// x = CT_ABS(2 * s * den_x)
+	field.fe_carry_mul(&x, field.fe_relax_cast(&s), field.fe_relax_cast(&x))
+	field.fe_carry_add(&x, &x, &x)
+	field.fe_carry_abs(&x, &x)
+
+	// y = u1 * den_y
+	field.fe_carry_mul(&y, &u1, field.fe_relax_cast(&y))
+
+	// t = x * y
+	field.fe_carry_mul(&t, field.fe_relax_cast(&x), field.fe_relax_cast(&y))
+
+	field.fe_clear_vec([]^field.Loose_Field_Element{&v, &u1, &u2})
+	field.fe_clear_vec([]^field.Tight_Field_Element{&tmp, &u2_sqr})
+	defer field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &t})
+
+	// 4.  If was_square is FALSE, IS_NEGATIVE(t) returns TRUE, or y = 0,
+	// decoding fails.  Otherwise, return the group element represented
+	// by the internal representation (x, y, 1, t) as the result of
+	// decoding.
+
+	switch {
+	case was_square == 0:
+		// Not sure why the RFC doesn't have this just fail early.
+		return false
+	case field.fe_is_negative(&t) != 0:
+		return false
+	case field.fe_equal(&y, &field.FE_ZERO) != 0:
+		return false
+	}
+
+	field.fe_set(&ge._p.x, &x)
+	field.fe_set(&ge._p.y, &y)
+	field.fe_one(&ge._p.z)
+	field.fe_set(&ge._p.t, &t)
+	ge._is_initialized = true
+
+	return true
+}
+
+// ge_set_wide_bytes sets ge to the result of deriving a ristretto255
+// group element, from a wide (512-bit) byte string.
+ge_set_wide_bytes :: proc(ge: ^Group_Element, b: []byte) {
+	if len(b) != WIDE_ELEMENT_SIZE {
+		panic("crypto/ristretto255: invalid wide input size")
+	}
+
+	// The element derivation function on an input string b proceeds as
+	// follows:
+	//
+	// 1.  Compute P1 as MAP(b[0:32]).
+	// 2.  Compute P2 as MAP(b[32:64]).
+	// 3.  Return P1 + P2.
+
+	p1, p2: Group_Element = ---, ---
+	ge_map(&p1, b[0:32])
+	ge_map(&p2, b[32:64])
+
+	ge_add(ge, &p1, &p2)
+
+	ge_clear(&p1)
+	ge_clear(&p2)
+}
+
+// ge_bytes sets dst to the canonical encoding of ge.
+ge_bytes :: proc(ge: ^Group_Element, dst: []byte) {
+	_ge_assert_initialized([]^Group_Element{ge})
+	if len(dst) != ELEMENT_SIZE {
+		panic("crypto/ristretto255: invalid destination size")
+	}
+
+	x0, y0, z0, t0 := &ge._p.x, &ge._p.y, &ge._p.z, &ge._p.t
+
+	// 1.  Process the internal representation into a field element s as
+	// follows:
+
+	// u1 = (z0 + y0) * (z0 - y0)
+	// u2 = x0 * y0
+	u1, u2: field.Tight_Field_Element = ---, ---
+	tmp1, tmp2: field.Loose_Field_Element = ---, ---
+	field.fe_add(&tmp1, z0, y0)
+	field.fe_sub(&tmp2, z0, y0)
+	field.fe_carry_mul(&u1, &tmp1, &tmp2)
+	field.fe_carry_mul(&u2, field.fe_relax_cast(x0), field.fe_relax_cast(y0))
+
+	// Ignore was_square since this is always square.
+	// (_, invsqrt) = SQRT_RATIO_M1(1, u1 * u2^2)
+	tmp: field.Tight_Field_Element = ---
+	field.fe_carry_square(&tmp, field.fe_relax_cast(&u2))
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(&u1), field.fe_relax_cast(&tmp))
+	_ = field.fe_carry_sqrt_ratio_m1(
+		&tmp,
+		field.fe_relax_cast(&field.FE_ONE),
+		field.fe_relax_cast(&tmp),
+	)
+
+	// den1 = invsqrt * u1
+	// den2 = invsqrt * u2
+	// z_inv = den1 * den2 * t0
+	den1, den2 := &u1, &u2
+	z_inv: field.Tight_Field_Element = ---
+	field.fe_carry_mul(den1, field.fe_relax_cast(&tmp), field.fe_relax_cast(&u1))
+	field.fe_carry_mul(den2, field.fe_relax_cast(&tmp), field.fe_relax_cast(&u2))
+	field.fe_carry_mul(&z_inv, field.fe_relax_cast(den1), field.fe_relax_cast(den2))
+	field.fe_carry_mul(&z_inv, field.fe_relax_cast(&z_inv), field.fe_relax_cast(t0))
+
+	// rotate = IS_NEGATIVE(t0 * z_inv)
+	// Note: Reordered from the RFC because invsqrt is no longer needed.
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(t0), field.fe_relax_cast(&z_inv))
+	rotate := field.fe_is_negative(&tmp)
+
+	// ix0 = x0 * SQRT_M1
+	// iy0 = y0 * SQRT_M1
+	// enchanted_denominator = den1 * INVSQRT_A_MINUS_D
+	ix0, iy0: field.Tight_Field_Element = ---, ---
+	field.fe_carry_mul(&ix0, field.fe_relax_cast(x0), field.fe_relax_cast(&field.FE_SQRT_M1))
+	field.fe_carry_mul(&iy0, field.fe_relax_cast(y0), field.fe_relax_cast(&field.FE_SQRT_M1))
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(den1), field.fe_relax_cast(&FE_INVSQRT_A_MINUS_D))
+
+	// Conditionally rotate x and y.
+	// x = CT_SELECT(iy0 IF rotate ELSE x0)
+	// y = CT_SELECT(ix0 IF rotate ELSE y0)
+	// z = z0
+	// den_inv = CT_SELECT(enchanted_denominator IF rotate ELSE den2)
+	x, y: field.Tight_Field_Element = ---, ---
+	field.fe_cond_select(&x, x0, &iy0, rotate)
+	field.fe_cond_select(&y, y0, &ix0, rotate)
+	field.fe_cond_select(&tmp, den2, &tmp, rotate)
+
+	// y = CT_SELECT(-y IF IS_NEGATIVE(x * z_inv) ELSE y)
+	field.fe_carry_mul(&x, field.fe_relax_cast(&x), field.fe_relax_cast(&z_inv))
+	field.fe_cond_negate(&y, &y, field.fe_is_negative(&x))
+
+	// s = CT_ABS(den_inv * (z - y))
+	field.fe_sub(&tmp1, z0, &y)
+	field.fe_carry_mul(&tmp, field.fe_relax_cast(&tmp), &tmp1)
+	field.fe_carry_abs(&tmp, &tmp)
+
+	// 2.  Return the 32-byte little-endian encoding of s.  More
+	// specifically, this is the encoding of the canonical
+	// representation of s as an integer between 0 and p-1, inclusive.
+	dst_ := transmute(^[32]byte)(raw_data(dst))
+	field.fe_to_bytes(dst_, &tmp)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&u1, &u2, &tmp, &z_inv, &ix0, &iy0, &x, &y})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&tmp1, &tmp2})
+}
+
+// ge_add sets `ge = a + b`.
+ge_add :: proc(ge, a, b: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a, b})
+
+	grp.ge_add(&ge._p, &a._p, &b._p)
+	ge._is_initialized = true
+}
+
+// ge_double sets `ge = a + a`.
+ge_double :: proc(ge, a: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_double(&ge._p, &a._p)
+	ge._is_initialized = true
+}
+
+// ge_negate sets `ge = -a`.
+ge_negate :: proc(ge, a: ^Group_Element) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_negate(&ge._p, &a._p)
+	ge._is_initialized = true
+}
+
+// ge_scalarmult sets `ge = A * sc`.
+ge_scalarmult :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
+	_ge_assert_initialized([]^Group_Element{A})
+
+	grp.ge_scalarmult(&ge._p, &A._p, sc)
+	ge._is_initialized = true
+}
+
+// ge_scalarmult_generator sets `ge = G * sc`
+ge_scalarmult_generator :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) {
+	grp.ge_scalarmult_basepoint(&ge._p, sc)
+	ge._is_initialized = true
+}
+
+// ge_scalarmult_vartime sets `ge = A * sc` in variable time.
+ge_scalarmult_vartime :: proc(ge, A: ^Group_Element, sc: ^Scalar) {
+	_ge_assert_initialized([]^Group_Element{A})
+
+	grp.ge_scalarmult_vartime(&ge._p, &A._p, sc)
+	ge._is_initialized = true
+}
+
+// ge_double_scalarmult_generator_vartime sets `ge = A * a + G * b` in variable
+// time.
+ge_double_scalarmult_generator_vartime :: proc(
+	ge: ^Group_Element,
+	a: ^Scalar,
+	A: ^Group_Element,
+	b: ^Scalar,
+) {
+	_ge_assert_initialized([]^Group_Element{A})
+
+	grp.ge_double_scalarmult_basepoint_vartime(&ge._p, a, &A._p, b)
+	ge._is_initialized = true
+}
+
+// ge_cond_negate sets `ge = a` iff `ctrl == 0` and `ge = -a` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+ge_cond_negate :: proc(ge, a: ^Group_Element, ctrl: int) {
+	_ge_assert_initialized([]^Group_Element{a})
+
+	grp.ge_cond_negate(&ge._p, &a._p, ctrl)
+	ge._is_initialized = true
+}
+
+// ge_cond_assign sets `ge = ge` iff `ctrl == 0` and `ge = a` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+ge_cond_assign :: proc(ge, a: ^Group_Element, ctrl: int) {
+	_ge_assert_initialized([]^Group_Element{ge, a})
+
+	grp.ge_cond_assign(&ge._p, &a._p, ctrl)
+}
+
+// ge_cond_select sets `ge = a` iff `ctrl == 0` and `ge = b` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+ge_cond_select :: proc(ge, a, b: ^Group_Element, ctrl: int) {
+	_ge_assert_initialized([]^Group_Element{a, b})
+
+	grp.ge_cond_select(&ge._p, &a._p, &b._p, ctrl)
+	ge._is_initialized = true
+}
+
+// ge_equal returns 1 iff `a == b`, and 0 otherwise.
+@(require_results)
+ge_equal :: proc(a, b: ^Group_Element) -> int {
+	_ge_assert_initialized([]^Group_Element{a, b})
+
+	// CT_EQ(x1 * y2, y1 * x2) | CT_EQ(y1 * y2, x1 * x2)
+	ax_by, ay_bx, ay_by, ax_bx: field.Tight_Field_Element = ---, ---, ---, ---
+	field.fe_carry_mul(&ax_by, field.fe_relax_cast(&a._p.x), field.fe_relax_cast(&b._p.y))
+	field.fe_carry_mul(&ay_bx, field.fe_relax_cast(&a._p.y), field.fe_relax_cast(&b._p.x))
+	field.fe_carry_mul(&ay_by, field.fe_relax_cast(&a._p.y), field.fe_relax_cast(&b._p.y))
+	field.fe_carry_mul(&ax_bx, field.fe_relax_cast(&a._p.x), field.fe_relax_cast(&b._p.x))
+
+	ret := field.fe_equal(&ax_by, &ay_bx) | field.fe_equal(&ay_by, &ax_bx)
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&ax_by, &ay_bx, &ay_by, &ax_bx})
+
+	return ret
+}
+
+// ge_is_identity returns 1 iff `ge` is the identity element, and 0 otherwise.
+@(require_results)
+ge_is_identity :: proc(ge: ^Group_Element) -> int {
+	return ge_equal(ge, &GE_IDENTITY)
+}
+
+@(private)
+ge_map :: proc "contextless" (ge: ^Group_Element, b: []byte) {
+	b_ := transmute(^[32]byte)(raw_data(b))
+
+	// The MAP function is defined on 32-byte strings as:
+	//
+	// 1.  Mask the most significant bit in the final byte of the string,
+	// and interpret the string as an unsigned integer r in little-
+	// endian representation.  Reduce r modulo p to obtain a field
+	// element t.
+	// *  Masking the most significant bit is equivalent to interpreting
+	// the whole string as an unsigned integer in little-endian
+	// representation and then reducing it modulo 2^255.
+	t: field.Tight_Field_Element = ---
+	field.fe_from_bytes(&t, b_)
+
+	// 2.  Process t as follows:
+	//
+	// r = SQRT_M1 * t^2
+	// u = (r + 1) * ONE_MINUS_D_SQ
+	// v = (-1 - r*D) * (r + D)
+	tmp1: field.Loose_Field_Element = ---
+	r, u, v: field.Tight_Field_Element = ---, ---, ---
+
+	field.fe_carry_square(&r, field.fe_relax_cast(&t))
+	field.fe_carry_mul(&r, field.fe_relax_cast(&field.FE_SQRT_M1), field.fe_relax_cast(&r))
+
+	field.fe_add(&tmp1, &field.FE_ONE, &r)
+	field.fe_carry_mul(&u, &tmp1, field.fe_relax_cast(&FE_ONE_MINUS_D_SQ))
+
+	field.fe_carry_mul(&v, field.fe_relax_cast(&r), field.fe_relax_cast(&grp.FE_D))
+	field.fe_carry_add(&v, &field.FE_ONE, &v)
+	field.fe_carry_opp(&v, &v)
+	field.fe_add(&tmp1, &r, &grp.FE_D)
+	field.fe_carry_mul(&v, field.fe_relax_cast(&v), &tmp1)
+
+	// (was_square, s) = SQRT_RATIO_M1(u, v)
+	// s_prime = -CT_ABS(s*t)
+	// s = CT_SELECT(s IF was_square ELSE s_prime)
+	// c = CT_SELECT(-1 IF was_square ELSE r)
+	s, s_prime, c: field.Tight_Field_Element = ---, ---, ---
+	was_square := field.fe_carry_sqrt_ratio_m1(
+		&s,
+		field.fe_relax_cast(&u),
+		field.fe_relax_cast(&v),
+	)
+	field.fe_carry_mul(&s_prime, field.fe_relax_cast(&s), field.fe_relax_cast(&t))
+	field.fe_carry_abs(&s_prime, &s_prime)
+	field.fe_carry_opp(&s_prime, &s_prime)
+	field.fe_cond_select(&s, &s_prime, &s, was_square)
+	field.fe_cond_select(&c, &r, &FE_NEG_ONE, was_square)
+
+	// N = c * (r - 1) * D_MINUS_ONE_SQ - v
+	N: field.Tight_Field_Element = ---
+	field.fe_sub(&tmp1, &r, &field.FE_ONE)
+	field.fe_carry_mul(&N, field.fe_relax_cast(&c), &tmp1)
+	field.fe_carry_mul(&N, field.fe_relax_cast(&N), field.fe_relax_cast(&FE_D_MINUS_ONE_SQUARED))
+	field.fe_carry_sub(&N, &N, &v)
+
+	// w0 = 2 * s * v
+	// w1 = N * SQRT_AD_MINUS_ONE
+	// w2 = 1 - s^2
+	// w3 = 1 + s^2
+	w0, w1: field.Tight_Field_Element = ---, ---
+	w2, w3: field.Loose_Field_Element = ---, ---
+	field.fe_carry_mul(&w0, field.fe_relax_cast(&s), field.fe_relax_cast(&v))
+	field.fe_carry_add(&w0, &w0, &w0)
+	field.fe_carry_mul(&w1, field.fe_relax_cast(&N), field.fe_relax_cast(&FE_SQRT_AD_MINUS_ONE))
+	field.fe_carry_square(&s, field.fe_relax_cast(&s))
+	field.fe_sub(&w2, &field.FE_ONE, &s)
+	field.fe_add(&w3, &field.FE_ONE, &s)
+
+	// 3.  Return the group element represented by the internal
+	// representation (w0*w3, w2*w1, w1*w3, w0*w2).
+
+	field.fe_carry_mul(&ge._p.x, field.fe_relax_cast(&w0), &w3)
+	field.fe_carry_mul(&ge._p.y, &w2, field.fe_relax_cast(&w1))
+	field.fe_carry_mul(&ge._p.z, field.fe_relax_cast(&w1), &w3)
+	field.fe_carry_mul(&ge._p.t, field.fe_relax_cast(&w0), &w2)
+	ge._is_initialized = true
+
+	field.fe_clear_vec([]^field.Tight_Field_Element{&r, &u, &v, &s, &s_prime, &c, &N, &w0, &w1})
+	field.fe_clear_vec([]^field.Loose_Field_Element{&tmp1, &w2, &w3})
+}
+
+@(private)
+_ge_assert_initialized :: proc(ges: []^Group_Element) {
+	for ge in ges {
+		if !ge._is_initialized {
+			panic("crypto/ristretto255: uninitialized group element")
+		}
+	}
+}
diff --git a/core/crypto/ristretto255/ristretto255_scalar.odin b/core/crypto/ristretto255/ristretto255_scalar.odin
new file mode 100644
index 000000000..f581e5963
--- /dev/null
+++ b/core/crypto/ristretto255/ristretto255_scalar.odin
@@ -0,0 +1,97 @@
+package ristretto255
+
+import grp "core:crypto/_edwards25519"
+
+// SCALAR_SIZE is the size of a byte-encoded ristretto255 scalar.
+SCALAR_SIZE :: 32
+// WIDE_SCALAR_SIZE is the size of a wide byte-encoded ristretto255
+// scalar.
+WIDE_SCALAR_SIZE :: 64
+
+// Scalar is a ristretto255 scalar.  The zero-initialized value is valid,
+// and represents `0`.
+Scalar :: grp.Scalar
+
+// sc_clear clears sc to the uninitialized state.
+sc_clear :: proc "contextless" (sc: ^Scalar) {
+	grp.sc_clear(sc)
+}
+
+// sc_set sets `sc = a`.
+sc_set :: proc "contextless" (sc, a: ^Scalar) {
+	grp.sc_set(sc, a)
+}
+
+// sc_set_u64 sets `sc = i`.
+sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
+	grp.sc_set_u64(sc, i)
+}
+
+// sc_set_bytes sets sc to the result of decoding b as a ristretto255
+// scalar, and returns true on success.
+@(require_results)
+sc_set_bytes :: proc(sc: ^Scalar, b: []byte) -> bool {
+	if len(b) != SCALAR_SIZE {
+		return false
+	}
+
+	return grp.sc_set_bytes(sc, b)
+}
+
+// sc_set_wide_bytes sets sc to the result of deriving a ristretto255
+// scalar, from a wide (512-bit) byte string by interpreting b as a
+// little-endian value, and reducing it mod the group order.
+sc_set_bytes_wide :: proc(sc: ^Scalar, b: []byte) {
+	if len(b) != WIDE_SCALAR_SIZE {
+		panic("crypto/ristretto255: invalid wide input size")
+	}
+
+	b_ := transmute(^[WIDE_SCALAR_SIZE]byte)(raw_data(b))
+	grp.sc_set_bytes_wide(sc, b_)
+}
+
+// sc_bytes sets dst to the canonical encoding of sc.
+sc_bytes :: proc(sc: ^Scalar, dst: []byte) {
+	if len(dst) != SCALAR_SIZE {
+		panic("crypto/ristretto255: invalid destination size")
+	}
+
+	grp.sc_bytes(dst, sc)
+}
+
+// sc_add sets `sc = a + b`.
+sc_add :: proc "contextless" (sc, a, b: ^Scalar) {
+	grp.sc_add(sc, a, b)
+}
+
+// sc_sub sets `sc = a - b`.
+sc_sub :: proc "contextless" (sc, a, b: ^Scalar) {
+	grp.sc_sub(sc, a, b)
+}
+
+// sc_negate sets `sc = -a`.
+sc_negate :: proc "contextless" (sc, a: ^Scalar) {
+	grp.sc_negate(sc, a)
+}
+
+// sc_mul sets `sc = a * b`.
+sc_mul :: proc "contextless" (sc, a, b: ^Scalar) {
+	grp.sc_mul(sc, a, b)
+}
+
+// sc_square sets `sc = a^2`.
+sc_square :: proc "contextless" (sc, a: ^Scalar) {
+	grp.sc_square(sc, a)
+}
+
+// sc_cond_assign sets `sc = sc` iff `ctrl == 0` and `sc = a` iff `ctrl == 1`.
+// Behavior for all other values of ctrl are undefined,
+sc_cond_assign :: proc(sc, a: ^Scalar, ctrl: int) {
+	grp.sc_cond_assign(sc, a, ctrl)
+}
+
+// sc_equal returns 1 iff `a == b`, and 0 otherwise.
+@(require_results)
+sc_equal :: proc(a, b: ^Scalar) -> int {
+	return grp.sc_equal(a, b)
+}
diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin
index bc1aff607..cc0005840 100644
--- a/examples/all/all_main.odin
+++ b/examples/all/all_main.odin
@@ -37,6 +37,7 @@ import md5              "core:crypto/legacy/md5"
 import sha1             "core:crypto/legacy/sha1"
 import pbkdf2           "core:crypto/pbkdf2"
 import poly1305         "core:crypto/poly1305"
+import ristretto255     "core:crypto/ristretto255"
 import sha2             "core:crypto/sha2"
 import sha3             "core:crypto/sha3"
 import shake            "core:crypto/shake"
@@ -158,6 +159,7 @@ _ :: keccak
 _ :: md5
 _ :: pbkdf2
 _ :: poly1305
+_ :: ristretto255
 _ :: sha1
 _ :: sha2
 _ :: sha3

From 893c3bef9a45fd58da38a11daa8ec9b0c6c323fe Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Fri, 5 Apr 2024 20:03:46 +0900
Subject: [PATCH 13/14] core/crypto/ed25519: Initial import

---
 core/crypto/ed25519/ed25519.odin | 314 +++++++++++++++++++++++++++++++
 examples/all/all_main.odin       |   2 +
 2 files changed, 316 insertions(+)
 create mode 100644 core/crypto/ed25519/ed25519.odin

diff --git a/core/crypto/ed25519/ed25519.odin b/core/crypto/ed25519/ed25519.odin
new file mode 100644
index 000000000..86da35669
--- /dev/null
+++ b/core/crypto/ed25519/ed25519.odin
@@ -0,0 +1,314 @@
+/*
+package ed25519 implements the Ed25519 EdDSA signature algorithm.
+
+See:
+- https://datatracker.ietf.org/doc/html/rfc8032
+- https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.186-5.pdf
+- https://eprint.iacr.org/2020/1244.pdf
+*/
+package ed25519
+
+import "core:crypto"
+import grp "core:crypto/_edwards25519"
+import "core:crypto/sha2"
+import "core:mem"
+
+// PRIVATE_KEY_SIZE is the byte-encoded private key size.
+PRIVATE_KEY_SIZE :: 32
+// PUBLIC_KEY_SIZE is the byte-encoded public key size.
+PUBLIC_KEY_SIZE :: 32
+// SIGNATURE_SIZE is the byte-encoded signature size.
+SIGNATURE_SIZE :: 64
+
+@(private)
+NONCE_SIZE :: 32
+
+// Private_Key is an Ed25519 private key.
+Private_Key :: struct {
+	// WARNING: All of the members are to be treated as internal (ie:
+	// the Private_Key structure is intended to be opaque).  There are
+	// subtle vulnerabilities that can be introduced if the internal
+	// values are allowed to be altered.
+	//
+	// See: https://github.com/MystenLabs/ed25519-unsafe-libs
+	_b:              [PRIVATE_KEY_SIZE]byte,
+	_s:              grp.Scalar,
+	_nonce:          [NONCE_SIZE]byte,
+	_pub_key:        Public_Key,
+	_is_initialized: bool,
+}
+
+// Public_Key is an Ed25519 public key.
+Public_Key :: struct {
+	// WARNING: All of the members are to be treated as internal (ie:
+	// the Public_Key structure is intended to be opaque).
+	_b:              [PUBLIC_KEY_SIZE]byte,
+	_neg_A:          grp.Group_Element,
+	_is_valid:       bool,
+	_is_initialized: bool,
+}
+
+// private_key_set_bytes decodes a byte-encoded private key, and returns
+// true iff the operation was successful.
+private_key_set_bytes :: proc(priv_key: ^Private_Key, b: []byte) -> bool {
+	if len(b) != PRIVATE_KEY_SIZE {
+		return false
+	}
+
+	// Derive the private key.
+	ctx: sha2.Context_512 = ---
+	h_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, b)
+	sha2.final(&ctx, h_bytes[:])
+
+	copy(priv_key._b[:], b)
+	copy(priv_key._nonce[:], h_bytes[32:])
+	grp.sc_set_bytes_rfc8032(&priv_key._s, h_bytes[:32])
+
+	// Derive the corresponding public key.
+	A: grp.Group_Element = ---
+	grp.ge_scalarmult_basepoint(&A, &priv_key._s)
+	grp.ge_bytes(&A, priv_key._pub_key._b[:])
+	grp.ge_negate(&priv_key._pub_key._neg_A, &A)
+	priv_key._pub_key._is_valid = !grp.ge_is_small_order(&A)
+	priv_key._pub_key._is_initialized = true
+
+	priv_key._is_initialized = true
+
+	return true
+}
+
+// private_key_bytes sets dst to byte-encoding of priv_key.
+private_key_bytes :: proc(priv_key: ^Private_Key, dst: []byte) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized private key")
+	}
+	if len(dst) != PRIVATE_KEY_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	copy(dst, priv_key._b[:])
+}
+
+// private_key_clear clears priv_key to the uninitialized state.
+private_key_clear :: proc "contextless" (priv_key: ^Private_Key) {
+	mem.zero_explicit(priv_key, size_of(Private_Key))
+}
+
+// sign writes the signature by priv_key over msg to sig.
+sign :: proc(priv_key: ^Private_Key, msg, sig: []byte) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized private key")
+	}
+	if len(sig) != SIGNATURE_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	// 1. Compute the hash of the private key d, H(d) = (h_0, h_1, ..., h_2b-1)
+	// using SHA-512 for Ed25519.  H(d) may be precomputed.
+	//
+	// 2. Using the second half of the digest hdigest2 = hb || ... || h2b-1,
+	// define:
+	//
+	// 2.1 For Ed25519, r = SHA-512(hdigest2 || M); Interpret r as a
+	// 64-octet little-endian integer.
+	ctx: sha2.Context_512 = ---
+	digest_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, priv_key._nonce[:])
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, digest_bytes[:])
+
+	r: grp.Scalar = ---
+	grp.sc_set_bytes_wide(&r, &digest_bytes)
+
+	// 3. Compute the point [r]G. The octet string R is the encoding of
+	// the point [r]G.
+	R: grp.Group_Element = ---
+	R_bytes := sig[:32]
+	grp.ge_scalarmult_basepoint(&R, &r)
+	grp.ge_bytes(&R, R_bytes)
+
+	// 4. Derive s from H(d) as in the key pair generation algorithm.
+	// Use octet strings R, Q, and M to define:
+	//
+	// 4.1 For Ed25519, digest = SHA-512(R || Q || M).
+	// Interpret digest as a little-endian integer.
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, R_bytes)
+	sha2.update(&ctx, priv_key._pub_key._b[:]) // Q in NIST terminology.
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, digest_bytes[:])
+
+	sc: grp.Scalar = --- // `digest` in NIST terminology.
+	grp.sc_set_bytes_wide(&sc, &digest_bytes)
+
+	// 5. Compute S = (r + digest × s) mod n. The octet string S is the
+	// encoding of the resultant integer.
+	grp.sc_mul(&sc, &sc, &priv_key._s)
+	grp.sc_add(&sc, &sc, &r)
+
+	// 6. Form the signature as the concatenation of the octet strings
+	// R and S.
+	grp.sc_bytes(sig[32:], &sc)
+
+	grp.sc_clear(&r)
+}
+
+// public_key_set_bytes decodes a byte-encoded public key, and returns
+// true iff the operation was successful.
+public_key_set_bytes :: proc "contextless" (pub_key: ^Public_Key, b: []byte) -> bool {
+	if len(b) != PUBLIC_KEY_SIZE {
+		return false
+	}
+
+	A: grp.Group_Element = ---
+	if !grp.ge_set_bytes(&A, b) {
+		return false
+	}
+
+	copy(pub_key._b[:], b)
+	grp.ge_negate(&pub_key._neg_A, &A)
+	pub_key._is_valid = !grp.ge_is_small_order(&A)
+	pub_key._is_initialized = true
+
+	return true
+}
+
+// public_key_set_priv sets pub_key to the public component of priv_key.
+public_key_set_priv :: proc(pub_key: ^Public_Key, priv_key: ^Private_Key) {
+	if !priv_key._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+
+	src := &priv_key._pub_key
+	copy(pub_key._b[:], src._b[:])
+	grp.ge_set(&pub_key._neg_A, &src._neg_A)
+	pub_key._is_valid = src._is_valid
+	pub_key._is_initialized = src._is_initialized
+}
+
+// public_key_bytes sets dst to byte-encoding of pub_key.
+public_key_bytes :: proc(pub_key: ^Public_Key, dst: []byte) {
+	if !pub_key._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+	if len(dst) != PUBLIC_KEY_SIZE {
+		panic("crypto/ed25519: invalid destination size")
+	}
+
+	copy(dst, pub_key._b[:])
+}
+
+// public_key_equal returns true iff pub_key is equal to other.
+public_key_equal :: proc(pub_key, other: ^Public_Key) -> bool {
+	if !pub_key._is_initialized || !other._is_initialized {
+		panic("crypto/ed25519: uninitialized public key")
+	}
+
+	return crypto.compare_constant_time(pub_key._b[:], other._b[:]) == 1
+}
+
+// verify returns true iff sig is a valid signature by pub_key over msg.
+//
+// The optional `allow_small_order_A` parameter will make this
+// implementation strictly compatible with FIPS 186-5, at the expense of
+// SBS-security.  Doing so is NOT recommended, and the disallowed
+// public keys all have a known discrete-log.
+verify :: proc(pub_key: ^Public_Key, msg, sig: []byte, allow_small_order_A := false) -> bool {
+	switch {
+	case !pub_key._is_initialized:
+		return false
+	case len(sig) != SIGNATURE_SIZE:
+		return false
+	}
+
+	// TLDR: Just use ristretto255.
+	//
+	// While there are two "standards" for EdDSA, existing implementations
+	// diverge (sometimes dramatically).  This implementation opts for
+	// "Algorithm 2" from "Taming the Many EdDSAs", which provides the
+	// strongest notion of security (SUF-CMA + SBS).
+	//
+	// The relevant properties are:
+	// - Reject non-canonical S.
+	// - Reject non-canonical A/R.
+	// - Reject small-order A (Extra non-standard check).
+	// - Cofactored verification equation.
+	//
+	// There are 19 possible non-canonical group element encodings of
+	// which:
+	// - 2 are small order
+	// - 10 are mixed order
+	// - 7 are not on the curve
+	//
+	// While historical implementations have been lax about enforcing
+	// that A/R are canonically encoded, that behavior is mandated by
+	// both the RFC and FIPS specification.  No valid key generation
+	// or sign implementation will ever produce non-canonically encoded
+	// public keys or signatures.
+	//
+	// There are 8 small-order group elements, 1 which is in the
+	// prime-order sub-group, and thus the probability that a properly
+	// generated A is small-order is cryptographically insignificant.
+	//
+	// While both the RFC and FIPS standard allow for either the
+	// cofactored or non-cofactored equation.  It is possible to
+	// artificially produce signatures that are valid for the former
+	// but not the latter.  This will NEVER occur with a valid sign
+	// implementation.  The choice of the latter is to be compatible
+	// with ABGLSV-Pornin, batch verification, and FROST (among other
+	// things).
+
+	s_bytes, r_bytes := sig[32:], sig[:32]
+
+	// 1. Reject the signature if S is not in the range [0, L).
+	s: grp.Scalar = ---
+	if !grp.sc_set_bytes(&s, s_bytes) {
+		return false
+	}
+
+	// 2. Reject the signature if the public key A is one of 8 small
+	// order points.
+	//
+	// As this check is optional and not part of the standard, we allow
+	// the caller to bypass it if desired.  Disabling the check makes
+	// the scheme NOT SBS-secure.
+	if !pub_key._is_valid && !allow_small_order_A {
+		return false
+	}
+
+	// 3. Reject the signature if A or R are non-canonical.
+	//
+	// Note: All initialized public keys are guaranteed to be canonical.
+	neg_R: grp.Group_Element = ---
+	if !grp.ge_set_bytes(&neg_R, r_bytes) {
+		return false
+	}
+	grp.ge_negate(&neg_R, &neg_R)
+
+	// 4. Compute the hash SHA512(R||A||M) and reduce it mod L to get a
+	// scalar h.
+	ctx: sha2.Context_512 = ---
+	h_bytes: [sha2.DIGEST_SIZE_512]byte = ---
+	sha2.init_512(&ctx)
+	sha2.update(&ctx, r_bytes)
+	sha2.update(&ctx, pub_key._b[:])
+	sha2.update(&ctx, msg)
+	sha2.final(&ctx, h_bytes[:])
+
+	h: grp.Scalar = ---
+	grp.sc_set_bytes_wide(&h, &h_bytes)
+
+	// 5. Accept if 8(s * G) - 8R - 8(h * A) = 0
+	//
+	// > first compute V = SB − R − hA and then accept if V is one of
+	// > 8 small order points (or alternatively compute 8V with 3
+	// > doublings and check against the neutral element)
+	V: grp.Group_Element = ---
+	grp.ge_double_scalarmult_basepoint_vartime(&V, &h, &pub_key._neg_A, &s)
+	grp.ge_add(&V, &V, &neg_R)
+
+	return grp.ge_is_small_order(&V)
+}
diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin
index cc0005840..f60088823 100644
--- a/examples/all/all_main.odin
+++ b/examples/all/all_main.odin
@@ -29,6 +29,7 @@ import blake2s          "core:crypto/blake2s"
 import chacha20         "core:crypto/chacha20"
 import chacha20poly1305 "core:crypto/chacha20poly1305"
 import crypto_hash      "core:crypto/hash"
+import ed25519          "core:crypto/ed25519"
 import hkdf             "core:crypto/hkdf"
 import hmac             "core:crypto/hmac"
 import kmac             "core:crypto/kmac"
@@ -152,6 +153,7 @@ _ :: blake2b
 _ :: blake2s
 _ :: chacha20
 _ :: chacha20poly1305
+_ :: ed25519
 _ :: hmac
 _ :: hkdf
 _ :: kmac

From fa1d681e65c3a22c8f4fa45bad42c6de8b028c66 Mon Sep 17 00:00:00 2001
From: Yawning Angel <yawning@schwanenlied.me>
Date: Fri, 29 Mar 2024 19:05:13 +0900
Subject: [PATCH 14/14] tests/core/crypto: Start adding comprehensive
 curve25519 tests

---
 tests/core/crypto/test_core_crypto.odin       |  72 +-
 .../crypto/test_core_crypto_ecc25519.odin     | 766 ++++++++++++++++++
 tests/core/crypto/test_core_crypto_hash.odin  |   3 +
 tests/core/crypto/test_core_crypto_kdf.odin   |   3 +
 tests/core/crypto/test_core_crypto_mac.odin   |   3 +
 .../test_core_crypto_sha3_variants.odin       |   3 +
 tests/core/crypto/test_crypto_benchmark.odin  |  63 ++
 7 files changed, 842 insertions(+), 71 deletions(-)
 create mode 100644 tests/core/crypto/test_core_crypto_ecc25519.odin

diff --git a/tests/core/crypto/test_core_crypto.odin b/tests/core/crypto/test_core_crypto.odin
index 742e3cc04..72d8e7c78 100644
--- a/tests/core/crypto/test_core_crypto.odin
+++ b/tests/core/crypto/test_core_crypto.odin
@@ -20,7 +20,6 @@ import "core:testing"
 import "core:crypto"
 import "core:crypto/chacha20"
 import "core:crypto/chacha20poly1305"
-import "core:crypto/x25519"
 
 import tc "tests:common"
 
@@ -32,10 +31,10 @@ main :: proc() {
 	test_hash(&t)
 	test_mac(&t)
 	test_kdf(&t) // After hash/mac tests because those should pass first.
+	test_ecc25519(&t)
 
 	test_chacha20(&t)
 	test_chacha20poly1305(&t)
-	test_x25519(&t)
 	test_sha3_variants(&t)
 
 	bench_crypto(&t)
@@ -274,75 +273,6 @@ test_chacha20poly1305 :: proc(t: ^testing.T) {
 	tc.expect(t, !ok, "Expected false for decrypt(tag, corrupted_aad, ciphertext)")
 }
 
-@(test)
-test_x25519 :: proc(t: ^testing.T) {
-	tc.log(t, "Testing X25519")
-
-	// Local copy of this so that the base point doesn't need to be exported.
-	_BASE_POINT: [32]byte =  {
-		9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	}
-
-	test_vectors := []struct{
-		scalar:  string,
-		point:   string,
-		product: string,
-	} {
-		// Test vectors from RFC 7748
-		{
-			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
-			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
-			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
-		},
-		{
-			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
-			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
-			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
-		},
-	}
-	for v, _ in test_vectors {
-		scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
-		point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
-
-		derived_point: [x25519.POINT_SIZE]byte
-		x25519.scalarmult(derived_point[:], scalar[:], point[:])
-		derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
-
-		tc.expect(
-			t,
-			derived_point_str == v.product,
-			fmt.tprintf(
-				"Expected %s for %s * %s, but got %s instead",
-				v.product,
-				v.scalar,
-				v.point,
-				derived_point_str,
-			),
-		)
-
-		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
-		p1, p2: [x25519.POINT_SIZE]byte
-		x25519.scalarmult_basepoint(p1[:], scalar[:])
-		x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
-		p1_str := string(hex.encode(p1[:], context.temp_allocator))
-		p2_str := string(hex.encode(p2[:], context.temp_allocator))
-		tc.expect(
-			t,
-			p1_str == p2_str,
-			fmt.tprintf(
-				"Expected %s for %s * basepoint, but got %s instead",
-				p2_str,
-				v.scalar,
-				p1_str,
-			),
-		)
-	}
-
-	// TODO/tests: Run the wycheproof test vectors, once I figure out
-	// how to work with JSON.
-}
-
 @(test)
 test_rand_bytes :: proc(t: ^testing.T) {
 	tc.log(t, "Testing rand_bytes")
diff --git a/tests/core/crypto/test_core_crypto_ecc25519.odin b/tests/core/crypto/test_core_crypto_ecc25519.odin
new file mode 100644
index 000000000..5ea008f90
--- /dev/null
+++ b/tests/core/crypto/test_core_crypto_ecc25519.odin
@@ -0,0 +1,766 @@
+package test_core_crypto
+
+import "base:runtime"
+import "core:encoding/hex"
+import "core:fmt"
+import "core:testing"
+
+import field "core:crypto/_fiat/field_curve25519"
+import "core:crypto/ed25519"
+import "core:crypto/ristretto255"
+import "core:crypto/x25519"
+
+import tc "tests:common"
+
+@(test)
+test_ecc25519 :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
+	tc.log(t, "Testing curve25519 ECC")
+
+	test_sqrt_ratio_m1(t)
+	test_ristretto255(t)
+
+	test_ed25519(t)
+	test_x25519(t)
+}
+
+@(test)
+test_sqrt_ratio_m1 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing sqrt_ratio_m1")
+
+	test_vectors := []struct {
+		u: string,
+		v: string,
+		r: string,
+		was_square: bool,
+	} {
+		{
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			true,
+		},
+		{
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			true,
+		},
+		{
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			"0000000000000000000000000000000000000000000000000000000000000000",
+			false,
+		},
+		{
+			"0200000000000000000000000000000000000000000000000000000000000000",
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"3c5ff1b5d8e4113b871bd052f9e7bcd0582804c266ffb2d4f4203eb07fdb7c54",
+			false,
+		},
+		{
+			"0400000000000000000000000000000000000000000000000000000000000000",
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0200000000000000000000000000000000000000000000000000000000000000",
+			true,
+		},
+		{
+			"0100000000000000000000000000000000000000000000000000000000000000",
+			"0400000000000000000000000000000000000000000000000000000000000000",
+			"f6ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff3f",
+			true,
+		},
+	}
+	for v, _ in test_vectors {
+		u_bytes, _ := hex.decode(transmute([]byte)(v.u), context.temp_allocator)
+		v_bytes, _ := hex.decode(transmute([]byte)(v.v), context.temp_allocator)
+		r_bytes, _ := hex.decode(transmute([]byte)(v.r), context.temp_allocator)
+
+		u_ := transmute(^[32]byte)(raw_data(u_bytes))
+		v_ := transmute(^[32]byte)(raw_data(v_bytes))
+		r_ := transmute(^[32]byte)(raw_data(r_bytes))
+
+		u, vee, r: field.Tight_Field_Element
+		field.fe_from_bytes(&u, u_)
+		field.fe_from_bytes(&vee, v_)
+		was_square := field.fe_carry_sqrt_ratio_m1(
+			&r,
+			field.fe_relax_cast(&u),
+			field.fe_relax_cast(&vee),
+		)
+
+		tc.expect(
+			t,
+			(was_square == 1) == v.was_square && field.fe_equal_bytes(&r, r_) == 1,
+			fmt.tprintf(
+				"Expected (%v, %s) for SQRT_RATIO_M1(%s, %s), got %s",
+				v.was_square,
+				v.r,
+				v.u,
+				v.v,
+				fe_str(&r),
+			),
+		)
+	}
+}
+
+@(test)
+test_ristretto255 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing ristretto255")
+
+	ge_gen: ristretto255.Group_Element
+	ristretto255.ge_generator(&ge_gen)
+
+	// Invalid encodings.
+	bad_encodings := []string {
+		// Non-canonical field encodings.
+		"00ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+		"ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+		"f3ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+		"edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+
+		// Negative field elements.
+		"0100000000000000000000000000000000000000000000000000000000000000",
+		"01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+		"ed57ffd8c914fb201471d1c3d245ce3c746fcbe63a3679d51b6a516ebebe0e20",
+		"c34c4e1826e5d403b78e246e88aa051c36ccf0aafebffe137d148a2bf9104562",
+		"c940e5a4404157cfb1628b108db051a8d439e1a421394ec4ebccb9ec92a8ac78",
+		"47cfc5497c53dc8e61c91d17fd626ffb1c49e2bca94eed052281b510b1117a24",
+		"f1c6165d33367351b0da8f6e4511010c68174a03b6581212c71c0e1d026c3c72",
+		"87260f7a2f12495118360f02c26a470f450dadf34a413d21042b43b9d93e1309",
+
+		// Non-square x^2.
+		"26948d35ca62e643e26a83177332e6b6afeb9d08e4268b650f1f5bbd8d81d371",
+		"4eac077a713c57b4f4397629a4145982c661f48044dd3f96427d40b147d9742f",
+		"de6a7b00deadc788eb6b6c8d20c0ae96c2f2019078fa604fee5b87d6e989ad7b",
+		"bcab477be20861e01e4a0e295284146a510150d9817763caf1a6f4b422d67042",
+		"2a292df7e32cababbd9de088d1d1abec9fc0440f637ed2fba145094dc14bea08",
+		"f4a9e534fc0d216c44b218fa0c42d99635a0127ee2e53c712f70609649fdff22",
+		"8268436f8c4126196cf64b3c7ddbda90746a378625f9813dd9b8457077256731",
+		"2810e5cbc2cc4d4eece54f61c6f69758e289aa7ab440b3cbeaa21995c2f4232b",
+
+		// Negative x * y value.
+		"3eb858e78f5a7254d8c9731174a94f76755fd3941c0ac93735c07ba14579630e",
+		"a45fdc55c76448c049a1ab33f17023edfb2be3581e9c7aade8a6125215e04220",
+		"d483fe813c6ba647ebbfd3ec41adca1c6130c2beeee9d9bf065c8d151c5f396e",
+		"8a2e1d30050198c65a54483123960ccc38aef6848e1ec8f5f780e8523769ba32",
+		"32888462f8b486c68ad7dd9610be5192bbeaf3b443951ac1a8118419d9fa097b",
+		"227142501b9d4355ccba290404bde41575b037693cef1f438c47f8fbf35d1165",
+		"5c37cc491da847cfeb9281d407efc41e15144c876e0170b499a96a22ed31e01e",
+		"445425117cb8c90edcbc7c1cc0e74f747f2c1efa5630a967c64f287792a48a4b",
+
+		// s = -1, which causes y = 0.
+		"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+	}
+	for x, _ in bad_encodings {
+		b, _ := hex.decode(transmute([]byte)(x), context.temp_allocator)
+
+		ge: ristretto255.Group_Element
+		ok := ristretto255.ge_set_bytes(&ge, b)
+		tc.expect(t, !ok, fmt.tprintf("Expected false for %s", x))
+	}
+
+	generator_multiples := []string {
+		"0000000000000000000000000000000000000000000000000000000000000000",
+		"e2f2ae0a6abc4e71a884a961c500515f58e30b6aa582dd8db6a65945e08d2d76",
+		"6a493210f7499cd17fecb510ae0cea23a110e8d5b901f8acadd3095c73a3b919",
+		"94741f5d5d52755ece4f23f044ee27d5d1ea1e2bd196b462166b16152a9d0259",
+		"da80862773358b466ffadfe0b3293ab3d9fd53c5ea6c955358f568322daf6a57",
+		"e882b131016b52c1d3337080187cf768423efccbb517bb495ab812c4160ff44e",
+		"f64746d3c92b13050ed8d80236a7f0007c3b3f962f5ba793d19a601ebb1df403",
+		"44f53520926ec81fbd5a387845beb7df85a96a24ece18738bdcfa6a7822a176d",
+		"903293d8f2287ebe10e2374dc1a53e0bc887e592699f02d077d5263cdd55601c",
+		"02622ace8f7303a31cafc63f8fc48fdc16e1c8c8d234b2f0d6685282a9076031",
+		"20706fd788b2720a1ed2a5dad4952b01f413bcf0e7564de8cdc816689e2db95f",
+		"bce83f8ba5dd2fa572864c24ba1810f9522bc6004afe95877ac73241cafdab42",
+		"e4549ee16b9aa03099ca208c67adafcafa4c3f3e4e5303de6026e3ca8ff84460",
+		"aa52e000df2e16f55fb1032fc33bc42742dad6bd5a8fc0be0167436c5948501f",
+		"46376b80f409b29dc2b5f6f0c52591990896e5716f41477cd30085ab7f10301e",
+		"e0c418f7c8d9c4cdd7395b93ea124f3ad99021bb681dfc3302a9d99a2e53e64e",
+	}
+	ges: [16]ristretto255.Group_Element
+	for x, i in generator_multiples {
+		b, _ := hex.decode(transmute([]byte)(x), context.temp_allocator)
+
+		ge := &ges[i]
+		ok := ristretto255.ge_set_bytes(ge, b)
+		tc.expect(t, ok, fmt.tprintf("Expected true for %s", x))
+
+		x_check := ge_str(ge)
+
+		tc.expect(
+			t,
+			x == x_check,
+			fmt.tprintf(
+				"Expected %s (round-trip) but got %s instead",
+				x,
+				x_check,
+			),
+		)
+
+		if i == 1 {
+			tc.expect(
+				t,
+				ristretto255.ge_equal(ge, &ge_gen) == 1,
+				"Expected element 1 to be the generator",
+			)
+		}
+	}
+
+	// Addition/Multiplication.
+	for _, i in ges {
+		sc: ristretto255.Scalar
+		ristretto255.sc_set_u64(&sc, u64(i))
+
+		ge_check: ristretto255.Group_Element
+
+		ristretto255.ge_scalarmult_generator(&ge_check, &sc)
+		x_check := ge_str(&ge_check)
+		tc.expect(
+			t,
+			x_check == generator_multiples[i],
+			fmt.tprintf(
+				"Expected %s for G * %d (specialized), got %s",
+				generator_multiples[i],
+				i,
+				x_check,
+			),
+		)
+
+		ristretto255.ge_scalarmult(&ge_check, &ges[1], &sc)
+		x_check = ge_str(&ge_check)
+		tc.expect(
+			t,
+			x_check == generator_multiples[i],
+			fmt.tprintf(
+				"Expected %s for G * %d (generic), got %s (slow compare)",
+				generator_multiples[i],
+				i,
+				x_check,
+			),
+		)
+
+		ristretto255.ge_scalarmult_vartime(&ge_check, &ges[1], &sc)
+		x_check = ge_str(&ge_check)
+		tc.expect(
+			t,
+			x_check == generator_multiples[i],
+			fmt.tprintf(
+				"Expected %s for G * %d (generic vartime), got %s (slow compare)",
+				generator_multiples[i],
+				i,
+				x_check,
+			),
+		)
+
+		switch i {
+		case 0:
+		case:
+			ge_prev := &ges[i-1]
+			ristretto255.ge_add(&ge_check, ge_prev, &ge_gen)
+
+			x_check = ge_str(&ge_check)
+			tc.expect(
+				t,
+				x_check == generator_multiples[i],
+				fmt.tprintf(
+					"Expected %s for ges[%d] + ges[%d], got %s (slow compare)",
+					generator_multiples[i],
+					i-1,
+					1,
+					x_check,
+				),
+			)
+
+			tc.expect(
+				t,
+				ristretto255.ge_equal(&ges[i], &ge_check) == 1,
+				fmt.tprintf(
+					"Expected %s for ges[%d] + ges[%d], got %s (fast compare)",
+					generator_multiples[i],
+					i-1,
+					1,
+					x_check,
+				),
+			)
+		}
+	}
+
+	wide_test_vectors := []struct {
+		input: string,
+		output: string,
+	} {
+		{
+			"5d1be09e3d0c82fc538112490e35701979d99e06ca3e2b5b54bffe8b4dc772c14d98b696a1bbfb5ca32c436cc61c16563790306c79eaca7705668b47dffe5bb6",
+			"3066f82a1a747d45120d1740f14358531a8f04bbffe6a819f86dfe50f44a0a46",
+		},
+		{
+			"f116b34b8f17ceb56e8732a60d913dd10cce47a6d53bee9204be8b44f6678b270102a56902e2488c46120e9276cfe54638286b9e4b3cdb470b542d46c2068d38",
+			"f26e5b6f7d362d2d2a94c5d0e7602cb4773c95a2e5c31a64f133189fa76ed61b",
+		},
+		{
+			"8422e1bbdaab52938b81fd602effb6f89110e1e57208ad12d9ad767e2e25510c27140775f9337088b982d83d7fcf0b2fa1edffe51952cbe7365e95c86eaf325c",
+			"006ccd2a9e6867e6a2c5cea83d3302cc9de128dd2a9a57dd8ee7b9d7ffe02826",
+		},
+		{
+			"ac22415129b61427bf464e17baee8db65940c233b98afce8d17c57beeb7876c2150d15af1cb1fb824bbd14955f2b57d08d388aab431a391cfc33d5bafb5dbbaf",
+			"f8f0c87cf237953c5890aec3998169005dae3eca1fbb04548c635953c817f92a",
+		},
+		{
+			"165d697a1ef3d5cf3c38565beefcf88c0f282b8e7dbd28544c483432f1cec7675debea8ebb4e5fe7d6f6e5db15f15587ac4d4d4a1de7191e0c1ca6664abcc413",
+			"ae81e7dedf20a497e10c304a765c1767a42d6e06029758d2d7e8ef7cc4c41179",
+		},
+		{
+			"a836e6c9a9ca9f1e8d486273ad56a78c70cf18f0ce10abb1c7172ddd605d7fd2979854f47ae1ccf204a33102095b4200e5befc0465accc263175485f0e17ea5c",
+			"e2705652ff9f5e44d3e841bf1c251cf7dddb77d140870d1ab2ed64f1a9ce8628",
+		},
+		{
+			"2cdc11eaeb95daf01189417cdddbf95952993aa9cb9c640eb5058d09702c74622c9965a697a3b345ec24ee56335b556e677b30e6f90ac77d781064f866a3c982",
+			"80bd07262511cdde4863f8a7434cef696750681cb9510eea557088f76d9e5065",
+		},
+		// These all produce the same output.
+		{
+			"edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff1200000000000000000000000000000000000000000000000000000000000000",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+		{
+			"edffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+		{
+			"0000000000000000000000000000000000000000000000000000000000000080ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+		{
+			"00000000000000000000000000000000000000000000000000000000000000001200000000000000000000000000000000000000000000000000000000000080",
+			"304282791023b73128d277bdcb5c7746ef2eac08dde9f2983379cb8e5ef0517f",
+		},
+	}
+	for v, _ in wide_test_vectors {
+		in_bytes, _ := hex.decode(transmute([]byte)(v.input), context.temp_allocator)
+
+		ge: ristretto255.Group_Element
+		ristretto255.ge_set_wide_bytes(&ge, in_bytes)
+
+		ge_check := ge_str(&ge)
+		tc.expect(
+			t,
+			ge_check == v.output,
+			fmt.tprintf(
+				"Expected %s for %s, got %s",
+				v.output,
+				ge_check,
+			),
+		)
+	}
+}
+
+@(test)
+test_ed25519 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing ed25519")
+
+	test_vectors_rfc := []struct {
+		priv_key: string,
+		pub_key:  string,
+		msg:      string,
+		sig:      string,
+	} {
+		// Test vectors from RFC 8032
+		{
+			"9d61b19deffd5a60ba844af492ec2cc44449c5697b326919703bac031cae7f60",
+			"d75a980182b10ab7d54bfed3c964073a0ee172f3daa62325af021a68f707511a",
+			"",
+			"e5564300c360ac729086e2cc806e828a84877f1eb8e5d974d873e065224901555fb8821590a33bacc61e39701cf9b46bd25bf5f0595bbe24655141438e7a100b",
+		},
+		{
+			"4ccd089b28ff96da9db6c346ec114e0f5b8a319f35aba624da8cf6ed4fb8a6fb",
+			"3d4017c3e843895a92b70aa74d1b7ebc9c982ccf2ec4968cc0cd55f12af4660c",
+			"72",
+			"92a009a9f0d4cab8720e820b5f642540a2b27b5416503f8fb3762223ebdb69da085ac1e43e15996e458f3613d0f11d8c387b2eaeb4302aeeb00d291612bb0c00",
+		},
+		{
+			"c5aa8df43f9f837bedb7442f31dcb7b166d38535076f094b85ce3a2e0b4458f7",
+			"fc51cd8e6218a1a38da47ed00230f0580816ed13ba3303ac5deb911548908025",
+			"af82",
+			"6291d657deec24024827e69c3abe01a30ce548a284743a445e3680d7db5ac3ac18ff9b538d16f290ae67f760984dc6594a7c15e9716ed28dc027beceea1ec40a",
+		},
+		// TEST 1024 omitted for brevity, because all that does is add more to SHA-512
+		{
+			"833fe62409237b9d62ec77587520911e9a759cec1d19755b7da901b96dca3d42",
+			"ec172b93ad5e563bf4932c70e1245034c35467ef2efd4d64ebf819683467e2bf",
+			"ddaf35a193617abacc417349ae20413112e6fa4e89a97ea20a9eeee64b55d39a2192992a274fc1a836ba3c23a3feebbd454d4423643ce80e2a9ac94fa54ca49f",
+			"dc2a4459e7369633a52b1bf277839a00201009a3efbf3ecb69bea2186c26b58909351fc9ac90b3ecfdfbc7c66431e0303dca179c138ac17ad9bef1177331a704",
+		},
+	}
+	for v, _ in test_vectors_rfc {
+		priv_bytes, _ := hex.decode(transmute([]byte)(v.priv_key), context.temp_allocator)
+		pub_bytes, _ := hex.decode(transmute([]byte)(v.pub_key), context.temp_allocator)
+		msg_bytes, _ := hex.decode(transmute([]byte)(v.msg), context.temp_allocator)
+		sig_bytes, _ := hex.decode(transmute([]byte)(v.sig), context.temp_allocator)
+
+		priv_key: ed25519.Private_Key
+		ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected %s to be a valid private key",
+				v.priv_key,
+			),
+		)
+
+		key_bytes: [32]byte
+		ed25519.private_key_bytes(&priv_key, key_bytes[:])
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected private key %s round-trip, got %s",
+				v.priv_key,
+				string(hex.encode(key_bytes[:], context.temp_allocator)),
+			),
+		)
+
+		pub_key: ed25519.Public_Key
+		ok = ed25519.public_key_set_bytes(&pub_key, pub_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected %s to be a valid public key (priv->pub: %s)",
+				v.pub_key,
+				string(hex.encode(priv_key._pub_key._b[:], context.temp_allocator)),
+			),
+		)
+
+		ed25519.public_key_bytes(&pub_key, key_bytes[:])
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected public key %s round-trip, got %s",
+				v.pub_key,
+				string(hex.encode(key_bytes[:], context.temp_allocator)),
+			),
+		)
+
+		sig: [ed25519.SIGNATURE_SIZE]byte
+		ed25519.sign(&priv_key, msg_bytes, sig[:])
+		x := string(hex.encode(sig[:], context.temp_allocator))
+		tc.expect(
+			t,
+			x == v.sig,
+			fmt.tprintf(
+				"Expected %s for sign(%s, %s), got %s",
+				v.sig,
+				v.priv_key,
+				v.msg,
+				x,
+			),
+		)
+
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected true for verify(%s, %s, %s)",
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+
+		ok = ed25519.verify(&priv_key._pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok,
+			fmt.tprintf(
+				"Expected true for verify(pub(%s), %s %s)",
+				v.priv_key,
+				v.msg,
+				v.sig,
+			),
+		)
+
+		// Corrupt the message and make sure verification fails.
+		switch len(msg_bytes) {
+		case 0:
+			tmp_msg := []byte{69}
+			msg_bytes = tmp_msg[:]
+		case:
+			msg_bytes[0] = msg_bytes[0] ~ 69
+		}
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok == false,
+			fmt.tprintf(
+				"Expected false for verify(%s, %s (corrupted), %s)",
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+	}
+
+	// Test cases from "Taming the many EdDSAs", which aim to exercise
+	// all of the ed25519 edge cases/implementation differences.
+	//
+	// - https://eprint.iacr.org/2020/1244
+	// - https://github.com/novifinancial/ed25519-speccheck
+	test_vectors_speccheck := []struct {
+		pub_key:        string,
+		msg:            string,
+		sig:            string,
+		pub_key_ok:     bool,
+		sig_ok:         bool,
+		sig_ok_relaxed: bool, // Ok if the small-order A check is relaxed.
+	} {
+		// S = 0, small-order A, small-order R
+		{
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa",
+			"8c93255d71dcab10e8f379c26200f3c7bd5f09d9bc3068d3ef4edeb4853022b6",
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac037a0000000000000000000000000000000000000000000000000000000000000000",
+			true,
+			false,
+			true,
+		},
+		// 0 < S < L, small-order A, mixed-order R
+		{
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa",
+			"9bd9f44f4dcc75bd531b56b2cd280b0bb38fc1cd6d1230e14861d861de092e79",
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43a5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04",
+			true,
+			false,
+			true,
+		},
+		// 0 < S < L, mixed-order A, small-order R
+		{
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
+			"aebf3f2601a0c8c5d39cc7d8911642f740b78168218da8471772b35f9d35b9ab",
+			"c7176a703d4dd84fba3c0b760d10670f2a2053fa2c39ccc64ec7fd7792ac03fa8c4bd45aecaca5b24fb97bc10ac27ac8751a7dfe1baff8b953ec9f5833ca260e",
+			true,
+			true,
+			true,
+		},
+		// 0 < S < L, mixed-order A, mixed-order R
+		{
+			"cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d",
+			"9bd9f44f4dcc75bd531b56b2cd280b0bb38fc1cd6d1230e14861d861de092e79",
+			"9046a64750444938de19f227bb80485e92b83fdb4b6506c160484c016cc1852f87909e14428a7a1d62e9f22f3d3ad7802db02eb2e688b6c52fcd6648a98bd009",
+			true,
+			true,
+			true,
+		},
+		// 0 < S < L, mixed-order A, mixed-order R
+		{
+			"cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d",
+			"e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec4011eaccd55b53f56c",
+			"160a1cb0dc9c0258cd0a7d23e94d8fa878bcb1925f2c64246b2dee1796bed5125ec6bc982a269b723e0668e540911a9a6a58921d6925e434ab10aa7940551a09",
+			true,
+			true, // cofactored-only
+			true,
+		},
+		// 0 < S < L, mixed-order A, L-order R
+		{
+			"cdb267ce40c5cd45306fa5d2f29731459387dbf9eb933b7bd5aed9a765b88d4d",
+			"e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec4011eaccd55b53f56c",
+			"21122a84e0b5fca4052f5b1235c80a537878b38f3142356b2c2384ebad4668b7e40bc836dac0f71076f9abe3a53f9c03c1ceeeddb658d0030494ace586687405",
+			true,
+			true, // cofactored only, (fail if 8h is pre-reduced)
+			true,
+		},
+		// S > L, L-order A, L-order R
+		{
+			"442aad9f089ad9e14647b1ef9099a1ff4798d78589e66f28eca69c11f582a623",
+			"85e241a07d148b41e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec40",
+			"e96f66be976d82e60150baecff9906684aebb1ef181f67a7189ac78ea23b6c0e547f7690a0e2ddcd04d87dbc3490dc19b3b3052f7ff0538cb68afb369ba3a514",
+			true,
+			false,
+			false,
+		},
+		// S >> L, L-order A, L-order R
+		{
+			"442aad9f089ad9e14647b1ef9099a1ff4798d78589e66f28eca69c11f582a623",
+			"85e241a07d148b41e47d62c63f830dc7a6851a0b1f33ae4bb2f507fb6cffec40",
+			"8ce5b96c8f26d0ab6c47958c9e68b937104cd36e13c33566acd2fe8d38aa19427e71f98a473474f2f13f06f97c20d58cc3f54b8bd0d272f42b695dd7e89a8c22",
+			true,
+			false,
+			false,
+		},
+		// 0 < S < L, mixed-order A, small-order R (non-canonical R, reduced for hash)
+		{
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
+			"9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41",
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff03be9678ac102edcd92b0210bb34d7428d12ffc5df5f37e359941266a4e35f0f",
+			true,
+			false,
+			false,
+		},
+		// 0 < S < L, mixed-order A, small-order R (non-canonical R, not reduced for hash)
+		{
+			"f7badec5b8abeaf699583992219b7b223f1df3fbbea919844e3f7c554a43dd43",
+			"9bedc267423725d473888631ebf45988bad3db83851ee85c85e241a07d148b41",
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffca8c5b64cd208982aa38d4936621a4775aa233aa0505711d8fdcfdaa943d4908",
+			true,
+			false,
+			false,
+		},
+		// 0 < S < L, small-order A, mixed-order R (non-canonical A, reduced for hash)
+		{
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+			"e96b7021eb39c1a163b6da4e3093dcd3f21387da4cc4572be588fafae23c155b",
+			"a9d55260f765261eb9b84e106f665e00b867287a761990d7135963ee0a7d59dca5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04",
+			false,
+			false,
+			false,
+		},
+		// 0 < S < L, small-order A, mixed-order R (non-canonical A, not reduced for hash)
+		{
+			"ecffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+			"39a591f5321bbe07fd5a23dc2f39d025d74526615746727ceefd6e82ae65c06f",
+			"a9d55260f765261eb9b84e106f665e00b867287a761990d7135963ee0a7d59dca5bb704786be79fc476f91d3f3f89b03984d8068dcf1bb7dfc6637b45450ac04",
+			false,
+			false,
+			false,
+		},
+	}
+	for v, i in test_vectors_speccheck {
+		pub_bytes, _ := hex.decode(transmute([]byte)(v.pub_key), context.temp_allocator)
+		msg_bytes, _ := hex.decode(transmute([]byte)(v.msg), context.temp_allocator)
+		sig_bytes, _ := hex.decode(transmute([]byte)(v.sig), context.temp_allocator)
+
+		pub_key: ed25519.Public_Key
+		ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes)
+		tc.expect(
+			t,
+			ok == v.pub_key_ok,
+			fmt.tprintf(
+				"speccheck/%d: Expected %s to be a (in)valid public key, got %v",
+				i,
+				v.pub_key,
+				ok,
+			),
+		)
+
+		// If A is rejected for being non-canonical, skip signature check.
+		if !v.pub_key_ok {
+			continue
+		}
+
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes)
+		tc.expect(
+			t,
+			ok == v.sig_ok,
+			fmt.tprintf(
+				"speccheck/%d Expected %v for verify(%s, %s, %s)",
+				i,
+				v.sig_ok,
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+
+		// If the signature is accepted, skip the relaxed signature check.
+		if v.sig_ok {
+			continue
+		}
+
+		ok = ed25519.verify(&pub_key, msg_bytes, sig_bytes, true)
+		tc.expect(
+			t,
+			ok == v.sig_ok_relaxed,
+			fmt.tprintf(
+				"speccheck/%d Expected %v for verify(%s, %s, %s, true)",
+				i,
+				v.sig_ok_relaxed,
+				v.pub_key,
+				v.msg,
+				v.sig,
+			),
+		)
+	}
+}
+
+@(test)
+test_x25519 :: proc(t: ^testing.T) {
+	tc.log(t, "Testing X25519")
+
+	// Local copy of this so that the base point doesn't need to be exported.
+	_BASE_POINT: [32]byte = {
+		9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	}
+
+	test_vectors := []struct {
+		scalar:  string,
+		point:   string,
+		product: string,
+	} {
+		// Test vectors from RFC 7748
+		{
+			"a546e36bf0527c9d3b16154b82465edd62144c0ac1fc5a18506a2244ba449ac4",
+			"e6db6867583030db3594c1a424b15f7c726624ec26b3353b10a903a6d0ab1c4c",
+			"c3da55379de9c6908e94ea4df28d084f32eccf03491c71f754b4075577a28552",
+		},
+		{
+			"4b66e9d4d1b4673c5ad22691957d6af5c11b6421e0ea01d42ca4169e7918ba0d",
+			"e5210f12786811d3f4b7959d0538ae2c31dbe7106fc03c3efc4cd549c715a493",
+			"95cbde9476e8907d7aade45cb4b873f88b595a68799fa152e6f8f7647aac7957",
+		},
+	}
+	for v, _ in test_vectors {
+		scalar, _ := hex.decode(transmute([]byte)(v.scalar), context.temp_allocator)
+		point, _ := hex.decode(transmute([]byte)(v.point), context.temp_allocator)
+
+		derived_point: [x25519.POINT_SIZE]byte
+		x25519.scalarmult(derived_point[:], scalar[:], point[:])
+		derived_point_str := string(hex.encode(derived_point[:], context.temp_allocator))
+
+		tc.expect(
+			t,
+			derived_point_str == v.product,
+			fmt.tprintf(
+				"Expected %s for %s * %s, but got %s instead",
+				v.product,
+				v.scalar,
+				v.point,
+				derived_point_str,
+			),
+		)
+
+		// Abuse the test vectors to sanity-check the scalar-basepoint multiply.
+		p1, p2: [x25519.POINT_SIZE]byte
+		x25519.scalarmult_basepoint(p1[:], scalar[:])
+		x25519.scalarmult(p2[:], scalar[:], _BASE_POINT[:])
+		p1_str := string(hex.encode(p1[:], context.temp_allocator))
+		p2_str := string(hex.encode(p2[:], context.temp_allocator))
+		tc.expect(
+			t,
+			p1_str == p2_str,
+			fmt.tprintf(
+				"Expected %s for %s * basepoint, but got %s instead",
+				p2_str,
+				v.scalar,
+				p1_str,
+			),
+		)
+	}
+}
+
+@(private)
+ge_str :: proc(ge: ^ristretto255.Group_Element) -> string {
+	b: [ristretto255.ELEMENT_SIZE]byte
+	ristretto255.ge_bytes(ge, b[:])
+	return string(hex.encode(b[:], context.temp_allocator))
+}
+
+@(private)
+fe_str :: proc(fe: ^field.Tight_Field_Element) -> string {
+	b: [32]byte
+	field.fe_to_bytes(&b, fe)
+	return string(hex.encode(b[:], context.temp_allocator))
+}
diff --git a/tests/core/crypto/test_core_crypto_hash.odin b/tests/core/crypto/test_core_crypto_hash.odin
index bd40a9b23..c4e8e8dd7 100644
--- a/tests/core/crypto/test_core_crypto_hash.odin
+++ b/tests/core/crypto/test_core_crypto_hash.odin
@@ -1,5 +1,6 @@
 package test_core_crypto
 
+import "base:runtime"
 import "core:bytes"
 import "core:encoding/hex"
 import "core:fmt"
@@ -12,6 +13,8 @@ import tc "tests:common"
 
 @(test)
 test_hash :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing Hashes")
 
 	// TODO:
diff --git a/tests/core/crypto/test_core_crypto_kdf.odin b/tests/core/crypto/test_core_crypto_kdf.odin
index d9e9a8501..73177d8be 100644
--- a/tests/core/crypto/test_core_crypto_kdf.odin
+++ b/tests/core/crypto/test_core_crypto_kdf.odin
@@ -1,5 +1,6 @@
 package test_core_crypto
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:testing"
@@ -12,6 +13,8 @@ import tc "tests:common"
 
 @(test)
 test_kdf :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing KDFs")
 
 	test_hkdf(t)
diff --git a/tests/core/crypto/test_core_crypto_mac.odin b/tests/core/crypto/test_core_crypto_mac.odin
index f0e6fa1bf..f2eeacb19 100644
--- a/tests/core/crypto/test_core_crypto_mac.odin
+++ b/tests/core/crypto/test_core_crypto_mac.odin
@@ -1,5 +1,6 @@
 package test_core_crypto
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:mem"
@@ -14,6 +15,8 @@ import tc "tests:common"
 
 @(test)
 test_mac :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing MACs")
 
 	test_hmac(t)
diff --git a/tests/core/crypto/test_core_crypto_sha3_variants.odin b/tests/core/crypto/test_core_crypto_sha3_variants.odin
index ec2d24331..8e44996bc 100644
--- a/tests/core/crypto/test_core_crypto_sha3_variants.odin
+++ b/tests/core/crypto/test_core_crypto_sha3_variants.odin
@@ -1,5 +1,6 @@
 package test_core_crypto
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:testing"
@@ -12,6 +13,8 @@ import tc "tests:common"
 
 @(test)
 test_sha3_variants :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	tc.log(t, "Testing SHA3 derived functions")
 
 	test_shake(t)
diff --git a/tests/core/crypto/test_crypto_benchmark.odin b/tests/core/crypto/test_crypto_benchmark.odin
index 494913b6b..cc69cb16d 100644
--- a/tests/core/crypto/test_crypto_benchmark.odin
+++ b/tests/core/crypto/test_crypto_benchmark.odin
@@ -1,5 +1,6 @@
 package test_core_crypto
 
+import "base:runtime"
 import "core:encoding/hex"
 import "core:fmt"
 import "core:testing"
@@ -7,6 +8,7 @@ import "core:time"
 
 import "core:crypto/chacha20"
 import "core:crypto/chacha20poly1305"
+import "core:crypto/ed25519"
 import "core:crypto/poly1305"
 import "core:crypto/x25519"
 
@@ -16,11 +18,14 @@ import tc "tests:common"
 
 @(test)
 bench_crypto :: proc(t: ^testing.T) {
+	runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+
 	fmt.println("Starting benchmarks:")
 
 	bench_chacha20(t)
 	bench_poly1305(t)
 	bench_chacha20poly1305(t)
+	bench_ed25519(t)
 	bench_x25519(t)
 }
 
@@ -216,6 +221,64 @@ bench_chacha20poly1305 :: proc(t: ^testing.T) {
 	benchmark_print(name, options)
 }
 
+bench_ed25519 :: proc(t: ^testing.T) {
+	iters :: 10000
+
+	priv_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"
+	priv_bytes, _ := hex.decode(transmute([]byte)(priv_str), context.temp_allocator)
+	priv_key: ed25519.Private_Key
+	start := time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ok := ed25519.private_key_set_bytes(&priv_key, priv_bytes)
+		assert(ok, "private key should deserialize")
+	}
+	elapsed := time.since(start)
+	tc.log(
+		t,
+		fmt.tprintf(
+			"ed25519.private_key_set_bytes: ~%f us/op",
+			time.duration_microseconds(elapsed) / iters,
+		),
+	)
+
+	pub_bytes := priv_key._pub_key._b[:] // "I know what I am doing"
+	pub_key: ed25519.Public_Key
+	start = time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ok := ed25519.public_key_set_bytes(&pub_key, pub_bytes[:])
+		assert(ok, "public key should deserialize")
+	}
+	elapsed = time.since(start)
+	tc.log(
+		t,
+		fmt.tprintf(
+			"ed25519.public_key_set_bytes: ~%f us/op",
+			time.duration_microseconds(elapsed) / iters,
+		),
+	)
+
+	msg := "Got a job for you, 621."
+	sig_bytes: [ed25519.SIGNATURE_SIZE]byte
+	msg_bytes := transmute([]byte)(msg)
+	start = time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ed25519.sign(&priv_key, msg_bytes, sig_bytes[:])
+	}
+	elapsed = time.since(start)
+	tc.log(t, fmt.tprintf("ed25519.sign: ~%f us/op", time.duration_microseconds(elapsed) / iters))
+
+	start = time.now()
+	for i := 0; i < iters; i = i + 1 {
+		ok := ed25519.verify(&pub_key, msg_bytes, sig_bytes[:])
+		assert(ok, "signature should validate")
+	}
+	elapsed = time.since(start)
+	tc.log(
+		t,
+		fmt.tprintf("ed25519.verify: ~%f us/op", time.duration_microseconds(elapsed) / iters),
+	)
+}
+
 bench_x25519 :: proc(t: ^testing.T) {
 	point_str := "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
 	scalar_str := "cafebabecafebabecafebabecafebabecafebabecafebabecafebabecafebabe"