core/crypto/_edwards25519: Initial import

This commit is contained in:
Yawning Angel
2024-03-21 02:17:59 +09:00
parent fec42a6d74
commit 563c527419
7 changed files with 885 additions and 49 deletions

View File

@@ -0,0 +1,428 @@
package _edwards25519
/*
This implements the edwards25519 composite-order group, primarily for
the purpose of implementing X25519, Ed25519, and ristretto255. Use of
this package for other purposes is NOT RECOMMENDED.
See:
- https://eprint.iacr.org/2011/368.pdf
- https://datatracker.ietf.org/doc/html/rfc8032
- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html
*/
import "base:intrinsics"
import "core:crypto"
import field "core:crypto/_fiat/field_curve25519"
import "core:mem"
// Group_Element is an edwards25519 group element, as extended homogenous
// coordinates, which represents the affine point `(x, y)` as `(X, Y, Z, T)`,
// with the relations `x = X/Z`, `y = Y/Z`, and `x * y = T/Z`.
//
// d = -121665/121666 = 37095705934669439343138083508754565189542113879843219016388785533085940283555
// a = -1
//
// Notes:
// - There is considerable scope for optimization, however that
// will not change the external API, and this is simple and reasonably
// performant.
// - The API delibarately makes it hard to create arbitrary group
// elements that are not on the curve.
// - The group element decoding routine takes the opinionated stance of
// rejecting non-canonical encodings.
FE_D := field.Tight_Field_Element {
929955233495203,
466365720129213,
1662059464998953,
2033849074728123,
1442794654840575,
}
@(private)
FE_A := field.Tight_Field_Element {
2251799813685228,
2251799813685247,
2251799813685247,
2251799813685247,
2251799813685247,
}
@(private)
FE_D2 := field.Tight_Field_Element {
1859910466990425,
932731440258426,
1072319116312658,
1815898335770999,
633789495995903,
}
@(private)
GE_BASEPOINT := Group_Element {
field.Tight_Field_Element {
1738742601995546,
1146398526822698,
2070867633025821,
562264141797630,
587772402128613,
},
field.Tight_Field_Element {
1801439850948184,
1351079888211148,
450359962737049,
900719925474099,
1801439850948198,
},
field.Tight_Field_Element{1, 0, 0, 0, 0},
field.Tight_Field_Element {
1841354044333475,
16398895984059,
755974180946558,
900171276175154,
1821297809914039,
},
}
GE_IDENTITY := Group_Element {
field.Tight_Field_Element{0, 0, 0, 0, 0},
field.Tight_Field_Element{1, 0, 0, 0, 0},
field.Tight_Field_Element{1, 0, 0, 0, 0},
field.Tight_Field_Element{0, 0, 0, 0, 0},
}
Group_Element :: struct {
x: field.Tight_Field_Element,
y: field.Tight_Field_Element,
z: field.Tight_Field_Element,
t: field.Tight_Field_Element,
}
ge_clear :: proc "contextless" (ge: ^Group_Element) {
mem.zero_explicit(ge, size_of(Group_Element))
}
ge_set :: proc "contextless" (ge, a: ^Group_Element) {
field.fe_set(&ge.x, &a.x)
field.fe_set(&ge.y, &a.y)
field.fe_set(&ge.z, &a.z)
field.fe_set(&ge.t, &a.t)
}
@(require_results)
ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool {
if len(b) != 32 {
intrinsics.trap()
}
b_ := transmute(^[32]byte)(raw_data(b))
// Do the work in a scratch element, so that ge is unchanged on
// failure.
tmp: Group_Element = ---
defer ge_clear(&tmp)
field.fe_one(&tmp.z) // Z = 1
// The encoding is the y-coordinate, with the x-coordinate polarity
// (odd/even) encoded in the MSB.
field.fe_from_bytes(&tmp.y, b_) // ignores high bit
// Recover the candidate x-coordinate via the curve equation:
// x^2 = (y^2 - 1) / (d * y^2 + 1) (mod p)
fe_tmp := &tmp.t // Use this to store intermediaries.
fe_one := &tmp.z
// x = num = y^2 - 1
field.fe_carry_square(fe_tmp, field.fe_relax_cast(&tmp.y)) // fe_tmp = y^2
field.fe_carry_sub(&tmp.x, fe_tmp, fe_one)
// den = d * y^2 + 1
field.fe_carry_mul(fe_tmp, field.fe_relax_cast(fe_tmp), field.fe_relax_cast(&FE_D))
field.fe_carry_add(fe_tmp, fe_tmp, fe_one)
// x = invsqrt(den/num)
is_square := field.fe_carry_sqrt_ratio_m1(
&tmp.x,
field.fe_relax_cast(&tmp.x),
field.fe_relax_cast(fe_tmp),
)
if is_square == 0 {
return false
}
// Pick the right x-coordinate.
field.fe_cond_negate(&tmp.x, &tmp.x, int(b[31] >> 7))
// t = x * y
field.fe_carry_mul(&tmp.t, field.fe_relax_cast(&tmp.x), field.fe_relax_cast(&tmp.y))
// Reject non-canonical encodings of ge.
buf: [32]byte = ---
field.fe_to_bytes(&buf, &tmp.y)
buf[31] |= byte(field.fe_is_negative(&tmp.x)) << 7
is_canonical := crypto.compare_constant_time(b, buf[:])
ge_cond_assign(ge, &tmp, is_canonical)
mem.zero_explicit(&buf, size_of(buf))
return is_canonical == 1
}
ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) {
if len(dst) != 32 {
intrinsics.trap()
}
dst_ := transmute(^[32]byte)(raw_data(dst))
// Convert the element to affine (x, y) representation.
x, y, z_inv: field.Tight_Field_Element = ---, ---, ---
field.fe_carry_inv(&z_inv, field.fe_relax_cast(&ge.z))
field.fe_carry_mul(&x, field.fe_relax_cast(&ge.x), field.fe_relax_cast(&z_inv))
field.fe_carry_mul(&y, field.fe_relax_cast(&ge.y), field.fe_relax_cast(&z_inv))
// Encode the y-coordinate.
field.fe_to_bytes(dst_, &y)
// Copy the least significant bit of the x-coordinate to the most
// significant bit of the encoded y-coordinate.
dst_[31] |= byte((x[0] & 1) << 7)
field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &z_inv})
}
ge_identity :: proc "contextless" (ge: ^Group_Element) {
field.fe_zero(&ge.x)
field.fe_one(&ge.y)
field.fe_one(&ge.z)
field.fe_zero(&ge.t)
}
ge_generator :: proc "contextless" (ge: ^Group_Element) {
ge_set(ge, &GE_BASEPOINT)
}
@(private)
Addend_Group_Element :: struct {
y2_minus_x2: field.Loose_Field_Element, // t1
y2_plus_x2: field.Loose_Field_Element, // t3
k_times_t2: field.Tight_Field_Element, // t4
two_times_z2: field.Loose_Field_Element, // t5
}
@(private)
ge_addend_set :: proc "contextless" (ge_a: ^Addend_Group_Element, ge: ^Group_Element) {
field.fe_sub(&ge_a.y2_minus_x2, &ge.y, &ge.x)
field.fe_add(&ge_a.y2_plus_x2, &ge.y, &ge.x)
field.fe_carry_mul(&ge_a.k_times_t2, field.fe_relax_cast(&FE_D2), field.fe_relax_cast(&ge.t))
field.fe_add(&ge_a.two_times_z2, &ge.z, &ge.z)
}
@(private)
ge_addend_conditional_assign :: proc "contextless" (ge_a, a: ^Addend_Group_Element, ctrl: int) {
field.fe_cond_select(&ge_a.y2_minus_x2, &ge_a.y2_minus_x2, &a.y2_minus_x2, ctrl)
field.fe_cond_select(&ge_a.y2_plus_x2, &ge_a.y2_plus_x2, &a.y2_plus_x2, ctrl)
field.fe_cond_select(&ge_a.k_times_t2, &ge_a.k_times_t2, &a.k_times_t2, ctrl)
field.fe_cond_select(&ge_a.two_times_z2, &ge_a.two_times_z2, &a.two_times_z2, ctrl)
}
@(private)
Add_Scratch :: struct {
A, B, C, D: field.Tight_Field_Element,
E, F, G, H: field.Loose_Field_Element,
t0, t2: field.Loose_Field_Element,
}
ge_add :: proc "contextless" (ge, a, b: ^Group_Element) {
b_: Addend_Group_Element = ---
ge_addend_set(&b_, b)
scratch: Add_Scratch = ---
ge_add_addend(ge, a, &b_, &scratch)
mem.zero_explicit(&b_, size_of(Addend_Group_Element))
mem.zero_explicit(&scratch, size_of(Add_Scratch))
}
@(private)
ge_add_addend :: proc "contextless" (
ge, a: ^Group_Element,
b: ^Addend_Group_Element,
scratch: ^Add_Scratch,
) {
// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3
// Assumptions: k=2*d.
//
// t0 = Y1-X1
// t1 = Y2-X2
// A = t0*t1
// t2 = Y1+X1
// t3 = Y2+X2
// B = t2*t3
// t4 = k*T2
// C = T1*t4
// t5 = 2*Z2
// D = Z1*t5
// E = B-A
// F = D-C
// G = D+C
// H = B+A
// X3 = E*F
// Y3 = G*H
// T3 = E*H
// Z3 = F*G
//
// In order to make the scalar multiply faster, the addend is provided
// as a `Addend_Group_Element` with t1, t3, t4, and t5 precomputed, as
// it is trivially obvious that those are the only values used by the
// formula that are directly dependent on `b`, and are only dependent
// on `b` and constants. This saves 1 sub, 2 adds, and 1 multiply,
// each time the intermediate representation can be reused.
A, B, C, D := &scratch.A, &scratch.B, &scratch.C, &scratch.D
E, F, G, H := &scratch.E, &scratch.F, &scratch.G, &scratch.H
t0, t2 := &scratch.t0, &scratch.t2
field.fe_sub(t0, &a.y, &a.x)
t1 := &b.y2_minus_x2
field.fe_carry_mul(A, t0, t1)
field.fe_add(t2, &a.y, &a.x)
t3 := &b.y2_plus_x2
field.fe_carry_mul(B, t2, t3)
t4 := &b.k_times_t2
field.fe_carry_mul(C, field.fe_relax_cast(&a.t), field.fe_relax_cast(t4))
t5 := &b.two_times_z2
field.fe_carry_mul(D, field.fe_relax_cast(&a.z), t5)
field.fe_sub(E, B, A)
field.fe_sub(F, D, C)
field.fe_add(G, D, C)
field.fe_add(H, B, A)
field.fe_carry_mul(&ge.x, E, F)
field.fe_carry_mul(&ge.y, G, H)
field.fe_carry_mul(&ge.t, E, H)
field.fe_carry_mul(&ge.z, F, G)
}
@(private)
Double_Scratch :: struct {
A, B, C, D, G: field.Tight_Field_Element,
t0, t2, t3: field.Tight_Field_Element,
E, F, H: field.Loose_Field_Element,
t1: field.Loose_Field_Element,
}
ge_double :: proc "contextless" (ge, a: ^Group_Element, scratch: ^Double_Scratch = nil) {
// https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd
//
// A = X1^2
// B = Y1^2
// t0 = Z1^2
// C = 2*t0
// D = a*A
// t1 = X1+Y1
// t2 = t1^2
// t3 = t2-A
// E = t3-B
// G = D+B
// F = G-C
// H = D-B
// X3 = E*F
// Y3 = G*H
// T3 = E*H
// Z3 = F*G
sanitize, scratch := scratch == nil, scratch
if sanitize {
tmp: Double_Scratch = ---
scratch = &tmp
}
A, B, C, D, G := &scratch.A, &scratch.B, &scratch.C, &scratch.D, &scratch.G
t0, t2, t3 := &scratch.t0, &scratch.t2, &scratch.t3
E, F, H := &scratch.E, &scratch.F, &scratch.H
t1 := &scratch.t1
field.fe_carry_square(A, field.fe_relax_cast(&a.x))
field.fe_carry_square(B, field.fe_relax_cast(&a.y))
field.fe_carry_square(t0, field.fe_relax_cast(&a.z))
field.fe_carry_add(C, t0, t0)
field.fe_carry_mul(D, field.fe_relax_cast(&FE_A), field.fe_relax_cast(A))
field.fe_add(t1, &a.x, &a.y)
field.fe_carry_square(t2, t1)
field.fe_carry_sub(t3, t2, A)
field.fe_sub(E, t3, B)
field.fe_carry_add(G, D, B)
field.fe_sub(F, G, C)
field.fe_sub(H, D, B)
G_ := field.fe_relax_cast(G)
field.fe_carry_mul(&ge.x, E, F)
field.fe_carry_mul(&ge.y, G_, H)
field.fe_carry_mul(&ge.t, E, H)
field.fe_carry_mul(&ge.z, F, G_)
if sanitize {
mem.zero_explicit(scratch, size_of(Double_Scratch))
}
}
ge_negate :: proc "contextless" (ge, a: ^Group_Element) {
field.fe_carry_opp(&ge.x, &a.x)
field.fe_set(&ge.y, &a.y)
field.fe_set(&ge.z, &a.z)
field.fe_carry_opp(&ge.t, &a.t)
}
ge_cond_negate :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
tmp: Group_Element = ---
ge_negate(&tmp, a)
ge_cond_assign(ge, &tmp, ctrl)
ge_clear(&tmp)
}
ge_cond_assign :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) {
field.fe_cond_assign(&ge.x, &a.x, ctrl)
field.fe_cond_assign(&ge.y, &a.y, ctrl)
field.fe_cond_assign(&ge.z, &a.z, ctrl)
field.fe_cond_assign(&ge.t, &a.t, ctrl)
}
ge_cond_select :: proc "contextless" (ge, a, b: ^Group_Element, ctrl: int) {
field.fe_cond_select(&ge.x, &a.x, &b.x, ctrl)
field.fe_cond_select(&ge.y, &a.y, &b.y, ctrl)
field.fe_cond_select(&ge.z, &a.z, &b.z, ctrl)
field.fe_cond_select(&ge.t, &a.t, &b.t, ctrl)
}
@(require_results)
ge_equal :: proc "contextless" (a, b: ^Group_Element) -> int {
// (x, y) ?= (x', y') -> (X/Z, Y/Z) ?= (X'/Z', Y'/Z')
// X/Z ?= X'/Z', Y/Z ?= Y'/Z' -> X*Z' ?= X'*Z, Y*Z' ?= Y'*Z
ax_bz, bx_az, ay_bz, by_az: field.Tight_Field_Element = ---, ---, ---, ---
field.fe_carry_mul(&ax_bz, field.fe_relax_cast(&a.x), field.fe_relax_cast(&b.z))
field.fe_carry_mul(&bx_az, field.fe_relax_cast(&b.x), field.fe_relax_cast(&a.z))
field.fe_carry_mul(&ay_bz, field.fe_relax_cast(&a.y), field.fe_relax_cast(&b.z))
field.fe_carry_mul(&by_az, field.fe_relax_cast(&b.y), field.fe_relax_cast(&a.z))
ret := field.fe_equal(&ax_bz, &bx_az) & field.fe_equal(&ay_bz, &by_az)
field.fe_clear_vec([]^field.Tight_Field_Element{&ax_bz, &ay_bz, &bx_az, &by_az})
return ret
}
@(require_results)
ge_is_small_order :: proc "contextless" (ge: ^Group_Element) -> bool {
tmp: Group_Element = ---
ge_double(&tmp, ge)
ge_double(&tmp, &tmp)
ge_double(&tmp, &tmp)
return ge_equal(&tmp, &GE_IDENTITY) == 1
}
@(require_results)
ge_in_prime_order_subgroup_vartime :: proc "contextless" (ge: ^Group_Element) -> bool {
// This is currently *very* expensive. The faster method would be
// something like (https://eprint.iacr.org/2022/1164.pdf), however
// that is a ~50% speedup, and a lot of added complexity for something
// that is better solved by "just use ristretto255".
tmp: Group_Element = ---
_ge_scalarmult(&tmp, ge, &SC_ELL, true)
return ge_equal(&tmp, &GE_IDENTITY) == 1
}

View File

@@ -0,0 +1,61 @@
package _edwards25519
import "base:intrinsics"
import field "core:crypto/_fiat/field_scalar25519"
import "core:mem"
Scalar :: field.Montgomery_Domain_Field_Element
// WARNING: This is non-canonical and only to be used when checking if
// a group element is on the prime-order subgroup.
@(private)
SC_ELL := field.Non_Montgomery_Domain_Field_Element {
field.ELL[0],
field.ELL[1],
field.ELL[2],
field.ELL[3],
}
sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) {
tmp := field.Non_Montgomery_Domain_Field_Element{i, 0, 0, 0}
field.fe_to_montgomery(sc, &tmp)
mem.zero_explicit(&tmp, size_of(tmp))
}
@(require_results)
sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool {
if len(b) != 32 {
intrinsics.trap()
}
b_ := transmute(^[32]byte)(raw_data(b))
return field.fe_from_bytes(sc, b_)
}
sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) {
if len(b) != 32 {
intrinsics.trap()
}
b_ := transmute(^[32]byte)(raw_data(b))
field.fe_from_bytes_rfc8032(sc, b_)
}
sc_clear :: proc "contextless" (sc: ^Scalar) {
mem.zero_explicit(sc, size_of(Scalar))
}
sc_set :: field.fe_set
sc_set_bytes_wide :: field.fe_from_bytes_wide
sc_bytes :: field.fe_to_bytes
sc_zero :: field.fe_zero
sc_one :: field.fe_one
sc_add :: field.fe_add
sc_sub :: field.fe_sub
sc_negate :: field.fe_opp
sc_mul :: field.fe_mul
sc_square :: field.fe_square
sc_cond_assign :: field.fe_cond_assign
sc_equal :: field.fe_equal

View File

@@ -0,0 +1,288 @@
package _edwards25519
import field "core:crypto/_fiat/field_scalar25519"
import "core:math/bits"
import "core:mem"
// GE_BASEPOINT_TABLE is 1 * G, ... 15 * G, in precomputed format.
//
// Note: When generating, the values were reduced to Tight_Field_Element
// ranges, even though that is not required.
@(private)
GE_BASEPOINT_TABLE := Multiply_Table {
{
{62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585},
{1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563},
{301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142},
{2, 0, 0, 0, 0},
},
{
{1519297034332653, 1098796920435767, 1823476547744119, 808144629470969, 2110930855619772},
{338005982828284, 1667856962156925, 100399270107451, 1604566703601691, 1950338038771369},
{1920505767731247, 1443759578976892, 1659852098357048, 1484431291070208, 275018744912646},
{763163817085987, 2195095074806923, 2167883174351839, 1868059999999762, 911071066608705},
},
{
{960627541894068, 1314966688943942, 1126875971034044, 2059608312958945, 605975666152586},
{1714478358025626, 2209607666607510, 1600912834284834, 496072478982142, 481970031861896},
{851735079403194, 1088965826757164, 141569479297499, 602804610059257, 2004026468601520},
{197585529552380, 324719066578543, 564481854250498, 1173818332764578, 35452976395676},
},
{
{1152980410747203, 2196804280851952, 25745194962557, 1915167295473129, 1266299690309224},
{809905889679060, 979732230071345, 1509972345538142, 188492426534402, 818965583123815},
{997685409185036, 1451818320876327, 2126681166774509, 2000509606057528, 235432372486854},
{887734189279642, 1460338685162044, 877378220074262, 102436391401299, 153369156847490},
},
{
{2056621900836770, 1821657694132497, 1627986892909426, 1163363868678833, 1108873376459226},
{1187697490593623, 1066539945237335, 885654531892000, 1357534489491782, 359370291392448},
{1509033452137525, 1305318174298508, 613642471748944, 1987256352550234, 1044283663101541},
{220105720697037, 387661783287620, 328296827867762, 360035589590664, 795213236824054},
},
{
{1820794733038396, 1612235121681074, 757405923441402, 1094031020892801, 231025333128907},
{1639067873254194, 1484176557946322, 300800382144789, 1329915446659183, 1211704578730455},
{641900794791527, 1711751746971612, 179044712319955, 576455585963824, 1852617592509865},
{743549047192397, 685091042550147, 1952415336873496, 1965124675654685, 513364998442917},
},
{
{1004557076870448, 1762911374844520, 1330807633622723, 384072910939787, 953849032243810},
{2178275058221458, 257933183722891, 376684351537894, 2010189102001786, 1981824297484148},
{1332915663881114, 1286540505502549, 1741691283561518, 977214932156314, 1764059494778091},
{429702949064027, 1368332611650677, 2019867176450999, 2212258376161746, 526160996742554},
},
{
{2098932988258576, 2203688382075948, 2120400160059479, 1748488020948146, 1203264167282624},
{677131386735829, 1850249298025188, 672782146532031, 2144145693078904, 2088656272813787},
{1065622343976192, 1573853211848116, 223560413590068, 333846833073379, 27832122205830},
{1781008836504573, 917619542051793, 544322748939913, 882577394308384, 1720521246471195},
},
{
{660120928379860, 2081944024858618, 1878411111349191, 424587356517195, 2111317439894005},
{1834193977811532, 1864164086863319, 797334633289424, 150410812403062, 2085177078466389},
{1438117271371866, 783915531014482, 388731514584658, 292113935417795, 1945855002546714},
{1678140823166658, 679103239148744, 614102761596238, 1052962498997885, 1863983323810390},
},
{
{1690309392496233, 1116333140326275, 1377242323631039, 717196888780674, 82724646713353},
{1722370213432106, 74265192976253, 264239578448472, 1714909985012994, 2216984958602173},
{2010482366920922, 1294036471886319, 566466395005815, 1631955803657320, 1751698647538458},
{1073230604155753, 1159087041338551, 1664057985455483, 127472702826203, 1339591128522371},
},
{
{478053307175577, 2179515791720985, 21146535423512, 1831683844029536, 462805561553981},
{1945267486565588, 1298536818409655, 2214511796262989, 1904981051429012, 252904800782086},
{268945954671210, 222740425595395, 1208025911856230, 1080418823003555, 75929831922483},
{1884784014268948, 643868448202966, 978736549726821, 46385971089796, 1296884812292320},
},
{
{1861159462859103, 7077532564710, 963010365896826, 1938780006785270, 766241051941647},
{1778966986051906, 1713995999765361, 1394565822271816, 1366699246468722, 1213407027149475},
{1978989286560907, 2135084162045594, 1951565508865477, 671788336314416, 293123929458176},
{902608944504080, 2167765718046481, 1285718473078022, 1222562171329269, 492109027844479},
},
{
{1820807832746213, 1029220580458586, 1101997555432203, 1039081975563572, 202477981158221},
{1866134980680205, 2222325502763386, 1830284629571201, 1046966214478970, 418381946936795},
{1783460633291322, 1719505443254998, 1810489639976220, 877049370713018, 2187801198742619},
{197118243000763, 305493867565736, 518814410156522, 1656246186645170, 901894734874934},
},
{
{225454942125915, 478410476654509, 600524586037746, 643450007230715, 1018615928259319},
{1733330584845708, 881092297970296, 507039890129464, 496397090721598, 2230888519577628},
{690155664737246, 1010454785646677, 753170144375012, 1651277613844874, 1622648796364156},
{1321310321891618, 1089655277873603, 235891750867089, 815878279563688, 1709264240047556},
},
{
{805027036551342, 1387174275567452, 1156538511461704, 1465897486692171, 1208567094120903},
{2228417017817483, 202885584970535, 2182114782271881, 2077405042592934, 1029684358182774},
{460447547653983, 627817697755692, 524899434670834, 1228019344939427, 740684787777653},
{849757462467675, 447476306919899, 422618957298818, 302134659227815, 675831828440895},
},
}
ge_scalarmult :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
tmp: field.Non_Montgomery_Domain_Field_Element
field.fe_from_montgomery(&tmp, sc)
_ge_scalarmult(ge, p, &tmp)
mem.zero_explicit(&tmp, size_of(tmp))
}
ge_scalarmult_basepoint :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) {
// Something like the comb method from "Fast and compact elliptic-curve
// cryptography" Section 3.3, would be more performant, but more
// complex.
//
// - https://eprint.iacr.org/2012/309
ge_scalarmult(ge, &GE_BASEPOINT, sc)
}
ge_scalarmult_vartime :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) {
tmp: field.Non_Montgomery_Domain_Field_Element
field.fe_from_montgomery(&tmp, sc)
_ge_scalarmult(ge, p, &tmp, true)
}
ge_double_scalarmult_basepoint_vartime :: proc "contextless" (
ge: ^Group_Element,
a: ^Scalar,
A: ^Group_Element,
b: ^Scalar,
) {
// Strauss-Shamir, commonly referred to as the "Shamir trick",
// saves half the doublings, relative to doing this the naive way.
//
// ABGLSV-Pornin (https://eprint.iacr.org/2020/454) is faster,
// but significantly more complex, and has incompatibilities with
// mixed-order group elements.
tmp_add: Add_Scratch = ---
tmp_addend: Addend_Group_Element = ---
tmp_dbl: Double_Scratch = ---
tmp: Group_Element = ---
A_tbl: Multiply_Table = ---
mul_tbl_set(&A_tbl, A, &tmp_add)
sc_a, sc_b: field.Non_Montgomery_Domain_Field_Element
field.fe_from_montgomery(&sc_a, a)
field.fe_from_montgomery(&sc_b, b)
ge_identity(&tmp)
for i := 31; i >= 0; i = i - 1 {
limb := i / 8
shift := uint(i & 7) * 8
limb_byte_a := sc_a[limb] >> shift
limb_byte_b := sc_b[limb] >> shift
hi_a, lo_a := (limb_byte_a >> 4) & 0x0f, limb_byte_a & 0x0f
hi_b, lo_b := (limb_byte_b >> 4) & 0x0f, limb_byte_b & 0x0f
if i != 31 {
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
}
mul_tbl_add(&tmp, &A_tbl, hi_a, &tmp_add, &tmp_addend, true)
mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, hi_b, &tmp_add, &tmp_addend, true)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
mul_tbl_add(&tmp, &A_tbl, lo_a, &tmp_add, &tmp_addend, true)
mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, lo_b, &tmp_add, &tmp_addend, true)
}
ge_set(ge, &tmp)
}
@(private)
_ge_scalarmult :: proc "contextless" (
ge, p: ^Group_Element,
sc: ^field.Non_Montgomery_Domain_Field_Element,
unsafe_is_vartime := false,
) {
// Do the simplest possible thing that works and provides adequate,
// performance, which is windowed add-then-multiply.
tmp_add: Add_Scratch = ---
tmp_addend: Addend_Group_Element = ---
tmp_dbl: Double_Scratch = ---
tmp: Group_Element = ---
p_tbl: Multiply_Table = ---
mul_tbl_set(&p_tbl, p, &tmp_add)
ge_identity(&tmp)
for i := 31; i >= 0; i = i - 1 {
limb := i / 8
shift := uint(i & 7) * 8
limb_byte := sc[limb] >> shift
hi, lo := (limb_byte >> 4) & 0x0f, limb_byte & 0x0f
if i != 31 {
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
}
mul_tbl_add(&tmp, &p_tbl, hi, &tmp_add, &tmp_addend, unsafe_is_vartime)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
ge_double(&tmp, &tmp, &tmp_dbl)
mul_tbl_add(&tmp, &p_tbl, lo, &tmp_add, &tmp_addend, unsafe_is_vartime)
}
ge_set(ge, &tmp)
if !unsafe_is_vartime {
ge_clear(&tmp)
mem.zero_explicit(&tmp_add, size_of(Add_Scratch))
mem.zero_explicit(&tmp_addend, size_of(Addend_Group_Element))
mem.zero_explicit(&tmp_dbl, size_of(Double_Scratch))
}
}
@(private)
Multiply_Table :: [15]Addend_Group_Element // 0 = inf, which is implicit.
@(private)
mul_tbl_set :: proc "contextless" (
tbl: ^Multiply_Table,
ge: ^Group_Element,
tmp_add: ^Add_Scratch,
) {
tmp: Group_Element = ---
ge_set(&tmp, ge)
ge_addend_set(&tbl[0], ge)
for i := 1; i < 15; i = i + 1 {
ge_add_addend(&tmp, &tmp, &tbl[0], tmp_add)
ge_addend_set(&tbl[i], &tmp)
}
ge_clear(&tmp)
}
@(private)
mul_tbl_add :: proc "contextless" (
ge: ^Group_Element,
tbl: ^Multiply_Table,
idx: u64,
tmp_add: ^Add_Scratch,
tmp_addend: ^Addend_Group_Element,
unsafe_is_vartime: bool,
) {
// Variable time lookup, with the addition omitted entirely if idx == 0.
if unsafe_is_vartime {
// Skip adding the point at infinity.
if idx != 0 {
ge_add_addend(ge, ge, &tbl[idx - 1], tmp_add)
}
return
}
// Constant time lookup.
tmp_addend^ = {
// Point at infinity (0, 1, 1, 0) in precomputed form
{1, 0, 0, 0, 0}, // y - x
{1, 0, 0, 0, 0}, // y + x
{0, 0, 0, 0, 0}, // t * 2d
{2, 0, 0, 0, 0}, // z * 2
}
for i := u64(1); i < 16; i = i + 1 {
_, ctrl := bits.sub_u64(0, (i ~ idx), 0)
ge_addend_conditional_assign(tmp_addend, &tbl[i - 1], int(~ctrl) & 1)
}
ge_add_addend(ge, ge, tmp_addend, tmp_add)
}

View File

@@ -15,6 +15,20 @@ fe_tighten_cast :: #force_inline proc "contextless" (
return transmute(^Tight_Field_Element)(arg1)
}
fe_clear :: proc "contextless" (
arg1: $T,
) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
mem.zero_explicit(arg1, size_of(arg1^))
}
fe_clear_vec :: proc "contextless" (
arg1: $T,
) where T == []^Tight_Field_Element || T == []^Loose_Field_Element {
for fe in arg1 {
fe_clear(fe)
}
}
fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) {
// Ignore the unused bit by copying the input and masking the bit off
// prior to deserialization.
@@ -27,12 +41,25 @@ fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte
mem.zero_explicit(&tmp1, size_of(tmp1))
}
fe_is_negative :: proc "contextless" (arg1: ^Tight_Field_Element) -> int {
tmp1: [32]byte = ---
fe_to_bytes(&tmp1, arg1)
ret := tmp1[0] & 1
mem.zero_explicit(&tmp1, size_of(tmp1))
return int(ret)
}
fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int {
tmp2: [32]byte = ---
tmp1, tmp2: [32]byte = ---, ---
fe_to_bytes(&tmp1, arg1)
fe_to_bytes(&tmp2, arg2)
ret := fe_equal_bytes(arg1, &tmp2)
ret := crypto.compare_constant_time(tmp1[:], tmp2[:])
mem.zero_explicit(&tmp1, size_of(tmp1))
mem.zero_explicit(&tmp2, size_of(tmp2))
return ret
@@ -67,25 +94,37 @@ fe_carry_pow2k :: proc "contextless" (
}
}
fe_carry_add :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
fe_add(fe_relax_cast(out1), arg1, arg2)
fe_carry(out1, fe_relax_cast(out1))
}
fe_carry_sub :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) {
fe_sub(fe_relax_cast(out1), arg1, arg2)
fe_carry(out1, fe_relax_cast(out1))
}
fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) {
fe_opp(fe_relax_cast(out1), arg1)
fe_carry(out1, fe_relax_cast(out1))
}
fe_carry_invsqrt :: proc "contextless" (
fe_carry_sqrt_ratio_m1 :: proc "contextless" (
out1: ^Tight_Field_Element,
arg1: ^Loose_Field_Element,
arg1: ^Loose_Field_Element, // u
arg2: ^Loose_Field_Element, // v
) -> int {
// Inverse square root taken from Monocypher.
// SQRT_RATIO_M1(u, v) from RFC 9496 - 4.2, based on the inverse
// square root from Monocypher.
w: Tight_Field_Element = ---
fe_carry_mul(&w, arg1, arg2) // u * v
// r = tmp1 = u * w^((p-5)/8)
tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, ---
// t0 = x^((p-5)/8)
// Can be achieved with a simple double & add ladder,
// but it would be slower.
fe_carry_pow2k(&tmp1, arg1, 1)
fe_carry_pow2k(&tmp1, fe_relax_cast(&w), 1)
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2)
fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2))
fe_carry_mul(&tmp2, fe_relax_cast(&w), fe_relax_cast(&tmp2))
fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2))
fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
@@ -104,48 +143,49 @@ fe_carry_invsqrt :: proc "contextless" (
fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1))
fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&w)) // w^((p-5)/8)
// quartic = x^((p-1)/4)
quartic := &tmp2
fe_carry_square(quartic, fe_relax_cast(&tmp1))
fe_carry_mul(quartic, fe_relax_cast(quartic), arg1)
fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) // u * w^((p-5)/8)
// Serialize quartic once to save on repeated serialization/sanitization.
quartic_buf: [32]byte = ---
fe_to_bytes(&quartic_buf, quartic)
check := &tmp3
// Serialize `check` once to save on repeated serialization.
r, check := &tmp1, &tmp2
b: [32]byte = ---
fe_carry_square(check, fe_relax_cast(r))
fe_carry_mul(check, fe_relax_cast(check), arg2) // check * v
fe_to_bytes(&b, check)
fe_one(check)
p1 := fe_equal_bytes(check, &quartic_buf)
fe_carry_opp(check, check)
m1 := fe_equal_bytes(check, &quartic_buf)
fe_carry_opp(check, &SQRT_M1)
ms := fe_equal_bytes(check, &quartic_buf)
u, neg_u, neg_u_i := &tmp3, &w, check
fe_carry(u, arg1)
fe_carry_opp(neg_u, u)
fe_carry_mul(neg_u_i, fe_relax_cast(neg_u), fe_relax_cast(&FE_SQRT_M1))
// if quartic == -1 or sqrt(-1)
// then isr = x^((p-1)/4) * sqrt(-1)
// else isr = x^((p-1)/4)
fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1))
fe_cond_assign(out1, &tmp1, (m1 | ms) ~ 1)
correct_sign_sqrt := fe_equal_bytes(u, &b)
flipped_sign_sqrt := fe_equal_bytes(neg_u, &b)
flipped_sign_sqrt_i := fe_equal_bytes(neg_u_i, &b)
mem.zero_explicit(&tmp1, size_of(tmp1))
mem.zero_explicit(&tmp2, size_of(tmp2))
mem.zero_explicit(&tmp3, size_of(tmp3))
mem.zero_explicit(&quartic_buf, size_of(quartic_buf))
r_prime := check
fe_carry_mul(r_prime, fe_relax_cast(r), fe_relax_cast(&FE_SQRT_M1))
fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i)
return p1 | m1
// Pick the non-negative square root.
fe_carry_opp(r_prime, r)
fe_cond_select(out1, r, r_prime, fe_is_negative(r))
fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3})
mem.zero_explicit(&b, size_of(b))
return correct_sign_sqrt | flipped_sign_sqrt
}
fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) {
tmp1: Tight_Field_Element
fe_carry_square(&tmp1, arg1)
_ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1))
_ = fe_carry_sqrt_ratio_m1(&tmp1, fe_relax_cast(&FE_ONE), fe_relax_cast(&tmp1))
fe_carry_square(&tmp1, fe_relax_cast(&tmp1))
fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1)
mem.zero_explicit(&tmp1, size_of(tmp1))
fe_clear(&tmp1)
}
fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) {
@@ -196,3 +236,21 @@ fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_El
out1[3], out2[3] = x4, y4
out1[4], out2[4] = x5, y5
}
@(optimization_mode = "none")
fe_cond_select :: #force_no_inline proc "contextless" (
out1, arg1, arg2: $T,
arg3: int,
) where T == ^Tight_Field_Element || T == ^Loose_Field_Element {
mask := (u64(arg3) * 0xffffffffffffffff)
x1 := ((mask & arg2[0]) | ((~mask) & arg1[0]))
x2 := ((mask & arg2[1]) | ((~mask) & arg1[1]))
x3 := ((mask & arg2[2]) | ((~mask) & arg1[2]))
x4 := ((mask & arg2[3]) | ((~mask) & arg1[3]))
x5 := ((mask & arg2[4]) | ((~mask) & arg1[4]))
out1[0] = x1
out1[1] = x2
out1[2] = x3
out1[3] = x4
out1[4] = x5
}

View File

@@ -42,7 +42,10 @@ import "core:math/bits"
Loose_Field_Element :: distinct [5]u64
Tight_Field_Element :: distinct [5]u64
SQRT_M1 := Tight_Field_Element {
FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0}
FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0}
FE_SQRT_M1 := Tight_Field_Element {
1718705420411056,
234908883556509,
2233514472574048,

View File

@@ -20,6 +20,10 @@ _TWO_336 := Montgomery_Domain_Field_Element {
0x3d217f5be65cb5c,
}
fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) {
mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element))
}
fe_from_bytes :: proc "contextless" (
out1: ^Montgomery_Domain_Field_Element,
arg1: ^[32]byte,
@@ -85,7 +89,7 @@ fe_from_bytes_wide :: proc "contextless" (
fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336
fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336
mem.zero_explicit(&tmp, size_of(tmp))
fe_clear(&tmp)
}
@(private)
@@ -125,7 +129,7 @@ fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) ->
// which will be 1.
_, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0)
mem.zero_explicit(&tmp, size_of(tmp))
fe_clear(&tmp)
return int(borrow)
}

View File

@@ -94,13 +94,8 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) {
field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2))
field.fe_to_bytes(out, &x2)
mem.zero_explicit(&x1, size_of(x1))
mem.zero_explicit(&x2, size_of(x2))
mem.zero_explicit(&x3, size_of(x3))
mem.zero_explicit(&z2, size_of(z2))
mem.zero_explicit(&z3, size_of(z3))
mem.zero_explicit(&t0, size_of(t0))
mem.zero_explicit(&t1, size_of(t1))
field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3})
field.fe_clear_vec([]^field.Loose_Field_Element{&t0, &t1})
}
// scalarmult "multiplies" the provided scalar and point, and writes the
@@ -137,6 +132,5 @@ scalarmult :: proc(dst, scalar, point: []byte) {
// scalarmult_basepoint "multiplies" the provided scalar with the X25519
// base point and writes the resulting point to dst.
scalarmult_basepoint :: proc(dst, scalar: []byte) {
// TODO/perf: Switch to using a precomputed table.
scalarmult(dst, scalar, _BASE_POINT[:])
}