From 563c52741903d3a930fd4c4f8128c275fefc1399 Mon Sep 17 00:00:00 2001 From: Yawning Angel Date: Thu, 21 Mar 2024 02:17:59 +0900 Subject: [PATCH] core/crypto/_edwards25519: Initial import --- core/crypto/_edwards25519/edwards25519.odin | 428 ++++++++++++++++++ .../_edwards25519/edwards25519_scalar.odin | 61 +++ .../edwards25519_scalar_mul.odin | 288 ++++++++++++ core/crypto/_fiat/field_curve25519/field.odin | 134 ++++-- .../_fiat/field_curve25519/field51.odin | 5 +- .../crypto/_fiat/field_scalar25519/field.odin | 8 +- core/crypto/x25519/x25519.odin | 10 +- 7 files changed, 885 insertions(+), 49 deletions(-) create mode 100644 core/crypto/_edwards25519/edwards25519.odin create mode 100644 core/crypto/_edwards25519/edwards25519_scalar.odin create mode 100644 core/crypto/_edwards25519/edwards25519_scalar_mul.odin diff --git a/core/crypto/_edwards25519/edwards25519.odin b/core/crypto/_edwards25519/edwards25519.odin new file mode 100644 index 000000000..952bb9ef8 --- /dev/null +++ b/core/crypto/_edwards25519/edwards25519.odin @@ -0,0 +1,428 @@ +package _edwards25519 + +/* +This implements the edwards25519 composite-order group, primarily for +the purpose of implementing X25519, Ed25519, and ristretto255. Use of +this package for other purposes is NOT RECOMMENDED. + +See: +- https://eprint.iacr.org/2011/368.pdf +- https://datatracker.ietf.org/doc/html/rfc8032 +- https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html +*/ + +import "base:intrinsics" +import "core:crypto" +import field "core:crypto/_fiat/field_curve25519" +import "core:mem" + +// Group_Element is an edwards25519 group element, as extended homogenous +// coordinates, which represents the affine point `(x, y)` as `(X, Y, Z, T)`, +// with the relations `x = X/Z`, `y = Y/Z`, and `x * y = T/Z`. +// +// d = -121665/121666 = 37095705934669439343138083508754565189542113879843219016388785533085940283555 +// a = -1 +// +// Notes: +// - There is considerable scope for optimization, however that +// will not change the external API, and this is simple and reasonably +// performant. +// - The API delibarately makes it hard to create arbitrary group +// elements that are not on the curve. +// - The group element decoding routine takes the opinionated stance of +// rejecting non-canonical encodings. + +FE_D := field.Tight_Field_Element { + 929955233495203, + 466365720129213, + 1662059464998953, + 2033849074728123, + 1442794654840575, +} +@(private) +FE_A := field.Tight_Field_Element { + 2251799813685228, + 2251799813685247, + 2251799813685247, + 2251799813685247, + 2251799813685247, +} +@(private) +FE_D2 := field.Tight_Field_Element { + 1859910466990425, + 932731440258426, + 1072319116312658, + 1815898335770999, + 633789495995903, +} +@(private) +GE_BASEPOINT := Group_Element { + field.Tight_Field_Element { + 1738742601995546, + 1146398526822698, + 2070867633025821, + 562264141797630, + 587772402128613, + }, + field.Tight_Field_Element { + 1801439850948184, + 1351079888211148, + 450359962737049, + 900719925474099, + 1801439850948198, + }, + field.Tight_Field_Element{1, 0, 0, 0, 0}, + field.Tight_Field_Element { + 1841354044333475, + 16398895984059, + 755974180946558, + 900171276175154, + 1821297809914039, + }, +} +GE_IDENTITY := Group_Element { + field.Tight_Field_Element{0, 0, 0, 0, 0}, + field.Tight_Field_Element{1, 0, 0, 0, 0}, + field.Tight_Field_Element{1, 0, 0, 0, 0}, + field.Tight_Field_Element{0, 0, 0, 0, 0}, +} + +Group_Element :: struct { + x: field.Tight_Field_Element, + y: field.Tight_Field_Element, + z: field.Tight_Field_Element, + t: field.Tight_Field_Element, +} + +ge_clear :: proc "contextless" (ge: ^Group_Element) { + mem.zero_explicit(ge, size_of(Group_Element)) +} + +ge_set :: proc "contextless" (ge, a: ^Group_Element) { + field.fe_set(&ge.x, &a.x) + field.fe_set(&ge.y, &a.y) + field.fe_set(&ge.z, &a.z) + field.fe_set(&ge.t, &a.t) +} + +@(require_results) +ge_set_bytes :: proc "contextless" (ge: ^Group_Element, b: []byte) -> bool { + if len(b) != 32 { + intrinsics.trap() + } + b_ := transmute(^[32]byte)(raw_data(b)) + + // Do the work in a scratch element, so that ge is unchanged on + // failure. + tmp: Group_Element = --- + defer ge_clear(&tmp) + field.fe_one(&tmp.z) // Z = 1 + + // The encoding is the y-coordinate, with the x-coordinate polarity + // (odd/even) encoded in the MSB. + field.fe_from_bytes(&tmp.y, b_) // ignores high bit + + // Recover the candidate x-coordinate via the curve equation: + // x^2 = (y^2 - 1) / (d * y^2 + 1) (mod p) + + fe_tmp := &tmp.t // Use this to store intermediaries. + fe_one := &tmp.z + + // x = num = y^2 - 1 + field.fe_carry_square(fe_tmp, field.fe_relax_cast(&tmp.y)) // fe_tmp = y^2 + field.fe_carry_sub(&tmp.x, fe_tmp, fe_one) + + // den = d * y^2 + 1 + field.fe_carry_mul(fe_tmp, field.fe_relax_cast(fe_tmp), field.fe_relax_cast(&FE_D)) + field.fe_carry_add(fe_tmp, fe_tmp, fe_one) + + // x = invsqrt(den/num) + is_square := field.fe_carry_sqrt_ratio_m1( + &tmp.x, + field.fe_relax_cast(&tmp.x), + field.fe_relax_cast(fe_tmp), + ) + if is_square == 0 { + return false + } + + // Pick the right x-coordinate. + field.fe_cond_negate(&tmp.x, &tmp.x, int(b[31] >> 7)) + + // t = x * y + field.fe_carry_mul(&tmp.t, field.fe_relax_cast(&tmp.x), field.fe_relax_cast(&tmp.y)) + + // Reject non-canonical encodings of ge. + buf: [32]byte = --- + field.fe_to_bytes(&buf, &tmp.y) + buf[31] |= byte(field.fe_is_negative(&tmp.x)) << 7 + is_canonical := crypto.compare_constant_time(b, buf[:]) + + ge_cond_assign(ge, &tmp, is_canonical) + + mem.zero_explicit(&buf, size_of(buf)) + + return is_canonical == 1 +} + +ge_bytes :: proc "contextless" (ge: ^Group_Element, dst: []byte) { + if len(dst) != 32 { + intrinsics.trap() + } + dst_ := transmute(^[32]byte)(raw_data(dst)) + + // Convert the element to affine (x, y) representation. + x, y, z_inv: field.Tight_Field_Element = ---, ---, --- + field.fe_carry_inv(&z_inv, field.fe_relax_cast(&ge.z)) + field.fe_carry_mul(&x, field.fe_relax_cast(&ge.x), field.fe_relax_cast(&z_inv)) + field.fe_carry_mul(&y, field.fe_relax_cast(&ge.y), field.fe_relax_cast(&z_inv)) + + // Encode the y-coordinate. + field.fe_to_bytes(dst_, &y) + + // Copy the least significant bit of the x-coordinate to the most + // significant bit of the encoded y-coordinate. + dst_[31] |= byte((x[0] & 1) << 7) + + field.fe_clear_vec([]^field.Tight_Field_Element{&x, &y, &z_inv}) +} + +ge_identity :: proc "contextless" (ge: ^Group_Element) { + field.fe_zero(&ge.x) + field.fe_one(&ge.y) + field.fe_one(&ge.z) + field.fe_zero(&ge.t) +} + +ge_generator :: proc "contextless" (ge: ^Group_Element) { + ge_set(ge, &GE_BASEPOINT) +} + +@(private) +Addend_Group_Element :: struct { + y2_minus_x2: field.Loose_Field_Element, // t1 + y2_plus_x2: field.Loose_Field_Element, // t3 + k_times_t2: field.Tight_Field_Element, // t4 + two_times_z2: field.Loose_Field_Element, // t5 +} + +@(private) +ge_addend_set :: proc "contextless" (ge_a: ^Addend_Group_Element, ge: ^Group_Element) { + field.fe_sub(&ge_a.y2_minus_x2, &ge.y, &ge.x) + field.fe_add(&ge_a.y2_plus_x2, &ge.y, &ge.x) + field.fe_carry_mul(&ge_a.k_times_t2, field.fe_relax_cast(&FE_D2), field.fe_relax_cast(&ge.t)) + field.fe_add(&ge_a.two_times_z2, &ge.z, &ge.z) +} + +@(private) +ge_addend_conditional_assign :: proc "contextless" (ge_a, a: ^Addend_Group_Element, ctrl: int) { + field.fe_cond_select(&ge_a.y2_minus_x2, &ge_a.y2_minus_x2, &a.y2_minus_x2, ctrl) + field.fe_cond_select(&ge_a.y2_plus_x2, &ge_a.y2_plus_x2, &a.y2_plus_x2, ctrl) + field.fe_cond_select(&ge_a.k_times_t2, &ge_a.k_times_t2, &a.k_times_t2, ctrl) + field.fe_cond_select(&ge_a.two_times_z2, &ge_a.two_times_z2, &a.two_times_z2, ctrl) +} + +@(private) +Add_Scratch :: struct { + A, B, C, D: field.Tight_Field_Element, + E, F, G, H: field.Loose_Field_Element, + t0, t2: field.Loose_Field_Element, +} + +ge_add :: proc "contextless" (ge, a, b: ^Group_Element) { + b_: Addend_Group_Element = --- + ge_addend_set(&b_, b) + + scratch: Add_Scratch = --- + ge_add_addend(ge, a, &b_, &scratch) + + mem.zero_explicit(&b_, size_of(Addend_Group_Element)) + mem.zero_explicit(&scratch, size_of(Add_Scratch)) +} + +@(private) +ge_add_addend :: proc "contextless" ( + ge, a: ^Group_Element, + b: ^Addend_Group_Element, + scratch: ^Add_Scratch, +) { + // https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#addition-add-2008-hwcd-3 + // Assumptions: k=2*d. + // + // t0 = Y1-X1 + // t1 = Y2-X2 + // A = t0*t1 + // t2 = Y1+X1 + // t3 = Y2+X2 + // B = t2*t3 + // t4 = k*T2 + // C = T1*t4 + // t5 = 2*Z2 + // D = Z1*t5 + // E = B-A + // F = D-C + // G = D+C + // H = B+A + // X3 = E*F + // Y3 = G*H + // T3 = E*H + // Z3 = F*G + // + // In order to make the scalar multiply faster, the addend is provided + // as a `Addend_Group_Element` with t1, t3, t4, and t5 precomputed, as + // it is trivially obvious that those are the only values used by the + // formula that are directly dependent on `b`, and are only dependent + // on `b` and constants. This saves 1 sub, 2 adds, and 1 multiply, + // each time the intermediate representation can be reused. + + A, B, C, D := &scratch.A, &scratch.B, &scratch.C, &scratch.D + E, F, G, H := &scratch.E, &scratch.F, &scratch.G, &scratch.H + t0, t2 := &scratch.t0, &scratch.t2 + + field.fe_sub(t0, &a.y, &a.x) + t1 := &b.y2_minus_x2 + field.fe_carry_mul(A, t0, t1) + field.fe_add(t2, &a.y, &a.x) + t3 := &b.y2_plus_x2 + field.fe_carry_mul(B, t2, t3) + t4 := &b.k_times_t2 + field.fe_carry_mul(C, field.fe_relax_cast(&a.t), field.fe_relax_cast(t4)) + t5 := &b.two_times_z2 + field.fe_carry_mul(D, field.fe_relax_cast(&a.z), t5) + field.fe_sub(E, B, A) + field.fe_sub(F, D, C) + field.fe_add(G, D, C) + field.fe_add(H, B, A) + field.fe_carry_mul(&ge.x, E, F) + field.fe_carry_mul(&ge.y, G, H) + field.fe_carry_mul(&ge.t, E, H) + field.fe_carry_mul(&ge.z, F, G) +} + +@(private) +Double_Scratch :: struct { + A, B, C, D, G: field.Tight_Field_Element, + t0, t2, t3: field.Tight_Field_Element, + E, F, H: field.Loose_Field_Element, + t1: field.Loose_Field_Element, +} + +ge_double :: proc "contextless" (ge, a: ^Group_Element, scratch: ^Double_Scratch = nil) { + // https://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd + // + // A = X1^2 + // B = Y1^2 + // t0 = Z1^2 + // C = 2*t0 + // D = a*A + // t1 = X1+Y1 + // t2 = t1^2 + // t3 = t2-A + // E = t3-B + // G = D+B + // F = G-C + // H = D-B + // X3 = E*F + // Y3 = G*H + // T3 = E*H + // Z3 = F*G + + sanitize, scratch := scratch == nil, scratch + if sanitize { + tmp: Double_Scratch = --- + scratch = &tmp + } + + A, B, C, D, G := &scratch.A, &scratch.B, &scratch.C, &scratch.D, &scratch.G + t0, t2, t3 := &scratch.t0, &scratch.t2, &scratch.t3 + E, F, H := &scratch.E, &scratch.F, &scratch.H + t1 := &scratch.t1 + + field.fe_carry_square(A, field.fe_relax_cast(&a.x)) + field.fe_carry_square(B, field.fe_relax_cast(&a.y)) + field.fe_carry_square(t0, field.fe_relax_cast(&a.z)) + field.fe_carry_add(C, t0, t0) + field.fe_carry_mul(D, field.fe_relax_cast(&FE_A), field.fe_relax_cast(A)) + field.fe_add(t1, &a.x, &a.y) + field.fe_carry_square(t2, t1) + field.fe_carry_sub(t3, t2, A) + field.fe_sub(E, t3, B) + field.fe_carry_add(G, D, B) + field.fe_sub(F, G, C) + field.fe_sub(H, D, B) + G_ := field.fe_relax_cast(G) + field.fe_carry_mul(&ge.x, E, F) + field.fe_carry_mul(&ge.y, G_, H) + field.fe_carry_mul(&ge.t, E, H) + field.fe_carry_mul(&ge.z, F, G_) + + if sanitize { + mem.zero_explicit(scratch, size_of(Double_Scratch)) + } +} + +ge_negate :: proc "contextless" (ge, a: ^Group_Element) { + field.fe_carry_opp(&ge.x, &a.x) + field.fe_set(&ge.y, &a.y) + field.fe_set(&ge.z, &a.z) + field.fe_carry_opp(&ge.t, &a.t) +} + +ge_cond_negate :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) { + tmp: Group_Element = --- + ge_negate(&tmp, a) + ge_cond_assign(ge, &tmp, ctrl) + + ge_clear(&tmp) +} + +ge_cond_assign :: proc "contextless" (ge, a: ^Group_Element, ctrl: int) { + field.fe_cond_assign(&ge.x, &a.x, ctrl) + field.fe_cond_assign(&ge.y, &a.y, ctrl) + field.fe_cond_assign(&ge.z, &a.z, ctrl) + field.fe_cond_assign(&ge.t, &a.t, ctrl) +} + +ge_cond_select :: proc "contextless" (ge, a, b: ^Group_Element, ctrl: int) { + field.fe_cond_select(&ge.x, &a.x, &b.x, ctrl) + field.fe_cond_select(&ge.y, &a.y, &b.y, ctrl) + field.fe_cond_select(&ge.z, &a.z, &b.z, ctrl) + field.fe_cond_select(&ge.t, &a.t, &b.t, ctrl) +} + +@(require_results) +ge_equal :: proc "contextless" (a, b: ^Group_Element) -> int { + // (x, y) ?= (x', y') -> (X/Z, Y/Z) ?= (X'/Z', Y'/Z') + // X/Z ?= X'/Z', Y/Z ?= Y'/Z' -> X*Z' ?= X'*Z, Y*Z' ?= Y'*Z + ax_bz, bx_az, ay_bz, by_az: field.Tight_Field_Element = ---, ---, ---, --- + field.fe_carry_mul(&ax_bz, field.fe_relax_cast(&a.x), field.fe_relax_cast(&b.z)) + field.fe_carry_mul(&bx_az, field.fe_relax_cast(&b.x), field.fe_relax_cast(&a.z)) + field.fe_carry_mul(&ay_bz, field.fe_relax_cast(&a.y), field.fe_relax_cast(&b.z)) + field.fe_carry_mul(&by_az, field.fe_relax_cast(&b.y), field.fe_relax_cast(&a.z)) + + ret := field.fe_equal(&ax_bz, &bx_az) & field.fe_equal(&ay_bz, &by_az) + + field.fe_clear_vec([]^field.Tight_Field_Element{&ax_bz, &ay_bz, &bx_az, &by_az}) + + return ret +} + +@(require_results) +ge_is_small_order :: proc "contextless" (ge: ^Group_Element) -> bool { + tmp: Group_Element = --- + ge_double(&tmp, ge) + ge_double(&tmp, &tmp) + ge_double(&tmp, &tmp) + return ge_equal(&tmp, &GE_IDENTITY) == 1 +} + +@(require_results) +ge_in_prime_order_subgroup_vartime :: proc "contextless" (ge: ^Group_Element) -> bool { + // This is currently *very* expensive. The faster method would be + // something like (https://eprint.iacr.org/2022/1164.pdf), however + // that is a ~50% speedup, and a lot of added complexity for something + // that is better solved by "just use ristretto255". + tmp: Group_Element = --- + _ge_scalarmult(&tmp, ge, &SC_ELL, true) + return ge_equal(&tmp, &GE_IDENTITY) == 1 +} diff --git a/core/crypto/_edwards25519/edwards25519_scalar.odin b/core/crypto/_edwards25519/edwards25519_scalar.odin new file mode 100644 index 000000000..2644fe5f7 --- /dev/null +++ b/core/crypto/_edwards25519/edwards25519_scalar.odin @@ -0,0 +1,61 @@ +package _edwards25519 + +import "base:intrinsics" +import field "core:crypto/_fiat/field_scalar25519" +import "core:mem" + +Scalar :: field.Montgomery_Domain_Field_Element + +// WARNING: This is non-canonical and only to be used when checking if +// a group element is on the prime-order subgroup. +@(private) +SC_ELL := field.Non_Montgomery_Domain_Field_Element { + field.ELL[0], + field.ELL[1], + field.ELL[2], + field.ELL[3], +} + +sc_set_u64 :: proc "contextless" (sc: ^Scalar, i: u64) { + tmp := field.Non_Montgomery_Domain_Field_Element{i, 0, 0, 0} + field.fe_to_montgomery(sc, &tmp) + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +@(require_results) +sc_set_bytes :: proc "contextless" (sc: ^Scalar, b: []byte) -> bool { + if len(b) != 32 { + intrinsics.trap() + } + b_ := transmute(^[32]byte)(raw_data(b)) + return field.fe_from_bytes(sc, b_) +} + +sc_set_bytes_rfc8032 :: proc "contextless" (sc: ^Scalar, b: []byte) { + if len(b) != 32 { + intrinsics.trap() + } + b_ := transmute(^[32]byte)(raw_data(b)) + field.fe_from_bytes_rfc8032(sc, b_) +} + +sc_clear :: proc "contextless" (sc: ^Scalar) { + mem.zero_explicit(sc, size_of(Scalar)) +} + +sc_set :: field.fe_set +sc_set_bytes_wide :: field.fe_from_bytes_wide +sc_bytes :: field.fe_to_bytes + +sc_zero :: field.fe_zero +sc_one :: field.fe_one + +sc_add :: field.fe_add +sc_sub :: field.fe_sub +sc_negate :: field.fe_opp +sc_mul :: field.fe_mul +sc_square :: field.fe_square + +sc_cond_assign :: field.fe_cond_assign +sc_equal :: field.fe_equal diff --git a/core/crypto/_edwards25519/edwards25519_scalar_mul.odin b/core/crypto/_edwards25519/edwards25519_scalar_mul.odin new file mode 100644 index 000000000..757a51257 --- /dev/null +++ b/core/crypto/_edwards25519/edwards25519_scalar_mul.odin @@ -0,0 +1,288 @@ +package _edwards25519 + +import field "core:crypto/_fiat/field_scalar25519" +import "core:math/bits" +import "core:mem" + +// GE_BASEPOINT_TABLE is 1 * G, ... 15 * G, in precomputed format. +// +// Note: When generating, the values were reduced to Tight_Field_Element +// ranges, even though that is not required. +@(private) +GE_BASEPOINT_TABLE := Multiply_Table { + { + {62697248952638, 204681361388450, 631292143396476, 338455783676468, 1213667448819585}, + {1288382639258501, 245678601348599, 269427782077623, 1462984067271730, 137412439391563}, + {301289933810280, 1259582250014073, 1422107436869536, 796239922652654, 1953934009299142}, + {2, 0, 0, 0, 0}, + }, + { + {1519297034332653, 1098796920435767, 1823476547744119, 808144629470969, 2110930855619772}, + {338005982828284, 1667856962156925, 100399270107451, 1604566703601691, 1950338038771369}, + {1920505767731247, 1443759578976892, 1659852098357048, 1484431291070208, 275018744912646}, + {763163817085987, 2195095074806923, 2167883174351839, 1868059999999762, 911071066608705}, + }, + { + {960627541894068, 1314966688943942, 1126875971034044, 2059608312958945, 605975666152586}, + {1714478358025626, 2209607666607510, 1600912834284834, 496072478982142, 481970031861896}, + {851735079403194, 1088965826757164, 141569479297499, 602804610059257, 2004026468601520}, + {197585529552380, 324719066578543, 564481854250498, 1173818332764578, 35452976395676}, + }, + { + {1152980410747203, 2196804280851952, 25745194962557, 1915167295473129, 1266299690309224}, + {809905889679060, 979732230071345, 1509972345538142, 188492426534402, 818965583123815}, + {997685409185036, 1451818320876327, 2126681166774509, 2000509606057528, 235432372486854}, + {887734189279642, 1460338685162044, 877378220074262, 102436391401299, 153369156847490}, + }, + { + {2056621900836770, 1821657694132497, 1627986892909426, 1163363868678833, 1108873376459226}, + {1187697490593623, 1066539945237335, 885654531892000, 1357534489491782, 359370291392448}, + {1509033452137525, 1305318174298508, 613642471748944, 1987256352550234, 1044283663101541}, + {220105720697037, 387661783287620, 328296827867762, 360035589590664, 795213236824054}, + }, + { + {1820794733038396, 1612235121681074, 757405923441402, 1094031020892801, 231025333128907}, + {1639067873254194, 1484176557946322, 300800382144789, 1329915446659183, 1211704578730455}, + {641900794791527, 1711751746971612, 179044712319955, 576455585963824, 1852617592509865}, + {743549047192397, 685091042550147, 1952415336873496, 1965124675654685, 513364998442917}, + }, + { + {1004557076870448, 1762911374844520, 1330807633622723, 384072910939787, 953849032243810}, + {2178275058221458, 257933183722891, 376684351537894, 2010189102001786, 1981824297484148}, + {1332915663881114, 1286540505502549, 1741691283561518, 977214932156314, 1764059494778091}, + {429702949064027, 1368332611650677, 2019867176450999, 2212258376161746, 526160996742554}, + }, + { + {2098932988258576, 2203688382075948, 2120400160059479, 1748488020948146, 1203264167282624}, + {677131386735829, 1850249298025188, 672782146532031, 2144145693078904, 2088656272813787}, + {1065622343976192, 1573853211848116, 223560413590068, 333846833073379, 27832122205830}, + {1781008836504573, 917619542051793, 544322748939913, 882577394308384, 1720521246471195}, + }, + { + {660120928379860, 2081944024858618, 1878411111349191, 424587356517195, 2111317439894005}, + {1834193977811532, 1864164086863319, 797334633289424, 150410812403062, 2085177078466389}, + {1438117271371866, 783915531014482, 388731514584658, 292113935417795, 1945855002546714}, + {1678140823166658, 679103239148744, 614102761596238, 1052962498997885, 1863983323810390}, + }, + { + {1690309392496233, 1116333140326275, 1377242323631039, 717196888780674, 82724646713353}, + {1722370213432106, 74265192976253, 264239578448472, 1714909985012994, 2216984958602173}, + {2010482366920922, 1294036471886319, 566466395005815, 1631955803657320, 1751698647538458}, + {1073230604155753, 1159087041338551, 1664057985455483, 127472702826203, 1339591128522371}, + }, + { + {478053307175577, 2179515791720985, 21146535423512, 1831683844029536, 462805561553981}, + {1945267486565588, 1298536818409655, 2214511796262989, 1904981051429012, 252904800782086}, + {268945954671210, 222740425595395, 1208025911856230, 1080418823003555, 75929831922483}, + {1884784014268948, 643868448202966, 978736549726821, 46385971089796, 1296884812292320}, + }, + { + {1861159462859103, 7077532564710, 963010365896826, 1938780006785270, 766241051941647}, + {1778966986051906, 1713995999765361, 1394565822271816, 1366699246468722, 1213407027149475}, + {1978989286560907, 2135084162045594, 1951565508865477, 671788336314416, 293123929458176}, + {902608944504080, 2167765718046481, 1285718473078022, 1222562171329269, 492109027844479}, + }, + { + {1820807832746213, 1029220580458586, 1101997555432203, 1039081975563572, 202477981158221}, + {1866134980680205, 2222325502763386, 1830284629571201, 1046966214478970, 418381946936795}, + {1783460633291322, 1719505443254998, 1810489639976220, 877049370713018, 2187801198742619}, + {197118243000763, 305493867565736, 518814410156522, 1656246186645170, 901894734874934}, + }, + { + {225454942125915, 478410476654509, 600524586037746, 643450007230715, 1018615928259319}, + {1733330584845708, 881092297970296, 507039890129464, 496397090721598, 2230888519577628}, + {690155664737246, 1010454785646677, 753170144375012, 1651277613844874, 1622648796364156}, + {1321310321891618, 1089655277873603, 235891750867089, 815878279563688, 1709264240047556}, + }, + { + {805027036551342, 1387174275567452, 1156538511461704, 1465897486692171, 1208567094120903}, + {2228417017817483, 202885584970535, 2182114782271881, 2077405042592934, 1029684358182774}, + {460447547653983, 627817697755692, 524899434670834, 1228019344939427, 740684787777653}, + {849757462467675, 447476306919899, 422618957298818, 302134659227815, 675831828440895}, + }, +} + +ge_scalarmult :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) { + tmp: field.Non_Montgomery_Domain_Field_Element + field.fe_from_montgomery(&tmp, sc) + + _ge_scalarmult(ge, p, &tmp) + + mem.zero_explicit(&tmp, size_of(tmp)) +} + +ge_scalarmult_basepoint :: proc "contextless" (ge: ^Group_Element, sc: ^Scalar) { + // Something like the comb method from "Fast and compact elliptic-curve + // cryptography" Section 3.3, would be more performant, but more + // complex. + // + // - https://eprint.iacr.org/2012/309 + ge_scalarmult(ge, &GE_BASEPOINT, sc) +} + +ge_scalarmult_vartime :: proc "contextless" (ge, p: ^Group_Element, sc: ^Scalar) { + tmp: field.Non_Montgomery_Domain_Field_Element + field.fe_from_montgomery(&tmp, sc) + + _ge_scalarmult(ge, p, &tmp, true) +} + +ge_double_scalarmult_basepoint_vartime :: proc "contextless" ( + ge: ^Group_Element, + a: ^Scalar, + A: ^Group_Element, + b: ^Scalar, +) { + // Strauss-Shamir, commonly referred to as the "Shamir trick", + // saves half the doublings, relative to doing this the naive way. + // + // ABGLSV-Pornin (https://eprint.iacr.org/2020/454) is faster, + // but significantly more complex, and has incompatibilities with + // mixed-order group elements. + + tmp_add: Add_Scratch = --- + tmp_addend: Addend_Group_Element = --- + tmp_dbl: Double_Scratch = --- + tmp: Group_Element = --- + + A_tbl: Multiply_Table = --- + mul_tbl_set(&A_tbl, A, &tmp_add) + + sc_a, sc_b: field.Non_Montgomery_Domain_Field_Element + field.fe_from_montgomery(&sc_a, a) + field.fe_from_montgomery(&sc_b, b) + + ge_identity(&tmp) + for i := 31; i >= 0; i = i - 1 { + limb := i / 8 + shift := uint(i & 7) * 8 + + limb_byte_a := sc_a[limb] >> shift + limb_byte_b := sc_b[limb] >> shift + + hi_a, lo_a := (limb_byte_a >> 4) & 0x0f, limb_byte_a & 0x0f + hi_b, lo_b := (limb_byte_b >> 4) & 0x0f, limb_byte_b & 0x0f + + if i != 31 { + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + } + mul_tbl_add(&tmp, &A_tbl, hi_a, &tmp_add, &tmp_addend, true) + mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, hi_b, &tmp_add, &tmp_addend, true) + + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + mul_tbl_add(&tmp, &A_tbl, lo_a, &tmp_add, &tmp_addend, true) + mul_tbl_add(&tmp, &GE_BASEPOINT_TABLE, lo_b, &tmp_add, &tmp_addend, true) + } + + ge_set(ge, &tmp) +} + +@(private) +_ge_scalarmult :: proc "contextless" ( + ge, p: ^Group_Element, + sc: ^field.Non_Montgomery_Domain_Field_Element, + unsafe_is_vartime := false, +) { + // Do the simplest possible thing that works and provides adequate, + // performance, which is windowed add-then-multiply. + + tmp_add: Add_Scratch = --- + tmp_addend: Addend_Group_Element = --- + tmp_dbl: Double_Scratch = --- + tmp: Group_Element = --- + + p_tbl: Multiply_Table = --- + mul_tbl_set(&p_tbl, p, &tmp_add) + + ge_identity(&tmp) + for i := 31; i >= 0; i = i - 1 { + limb := i / 8 + shift := uint(i & 7) * 8 + limb_byte := sc[limb] >> shift + + hi, lo := (limb_byte >> 4) & 0x0f, limb_byte & 0x0f + + if i != 31 { + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + } + mul_tbl_add(&tmp, &p_tbl, hi, &tmp_add, &tmp_addend, unsafe_is_vartime) + + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + ge_double(&tmp, &tmp, &tmp_dbl) + mul_tbl_add(&tmp, &p_tbl, lo, &tmp_add, &tmp_addend, unsafe_is_vartime) + } + + ge_set(ge, &tmp) + + if !unsafe_is_vartime { + ge_clear(&tmp) + mem.zero_explicit(&tmp_add, size_of(Add_Scratch)) + mem.zero_explicit(&tmp_addend, size_of(Addend_Group_Element)) + mem.zero_explicit(&tmp_dbl, size_of(Double_Scratch)) + } +} + +@(private) +Multiply_Table :: [15]Addend_Group_Element // 0 = inf, which is implicit. + +@(private) +mul_tbl_set :: proc "contextless" ( + tbl: ^Multiply_Table, + ge: ^Group_Element, + tmp_add: ^Add_Scratch, +) { + tmp: Group_Element = --- + ge_set(&tmp, ge) + + ge_addend_set(&tbl[0], ge) + for i := 1; i < 15; i = i + 1 { + ge_add_addend(&tmp, &tmp, &tbl[0], tmp_add) + ge_addend_set(&tbl[i], &tmp) + } + + ge_clear(&tmp) +} + +@(private) +mul_tbl_add :: proc "contextless" ( + ge: ^Group_Element, + tbl: ^Multiply_Table, + idx: u64, + tmp_add: ^Add_Scratch, + tmp_addend: ^Addend_Group_Element, + unsafe_is_vartime: bool, +) { + // Variable time lookup, with the addition omitted entirely if idx == 0. + if unsafe_is_vartime { + // Skip adding the point at infinity. + if idx != 0 { + ge_add_addend(ge, ge, &tbl[idx - 1], tmp_add) + } + return + } + + // Constant time lookup. + tmp_addend^ = { + // Point at infinity (0, 1, 1, 0) in precomputed form + {1, 0, 0, 0, 0}, // y - x + {1, 0, 0, 0, 0}, // y + x + {0, 0, 0, 0, 0}, // t * 2d + {2, 0, 0, 0, 0}, // z * 2 + } + for i := u64(1); i < 16; i = i + 1 { + _, ctrl := bits.sub_u64(0, (i ~ idx), 0) + ge_addend_conditional_assign(tmp_addend, &tbl[i - 1], int(~ctrl) & 1) + } + ge_add_addend(ge, ge, tmp_addend, tmp_add) +} diff --git a/core/crypto/_fiat/field_curve25519/field.odin b/core/crypto/_fiat/field_curve25519/field.odin index 64f9f8a1f..6b2d3b595 100644 --- a/core/crypto/_fiat/field_curve25519/field.odin +++ b/core/crypto/_fiat/field_curve25519/field.odin @@ -15,6 +15,20 @@ fe_tighten_cast :: #force_inline proc "contextless" ( return transmute(^Tight_Field_Element)(arg1) } +fe_clear :: proc "contextless" ( + arg1: $T, +) where T == ^Tight_Field_Element || T == ^Loose_Field_Element { + mem.zero_explicit(arg1, size_of(arg1^)) +} + +fe_clear_vec :: proc "contextless" ( + arg1: $T, +) where T == []^Tight_Field_Element || T == []^Loose_Field_Element { + for fe in arg1 { + fe_clear(fe) + } +} + fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte) { // Ignore the unused bit by copying the input and masking the bit off // prior to deserialization. @@ -27,12 +41,25 @@ fe_from_bytes :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^[32]byte mem.zero_explicit(&tmp1, size_of(tmp1)) } +fe_is_negative :: proc "contextless" (arg1: ^Tight_Field_Element) -> int { + tmp1: [32]byte = --- + + fe_to_bytes(&tmp1, arg1) + ret := tmp1[0] & 1 + + mem.zero_explicit(&tmp1, size_of(tmp1)) + + return int(ret) +} + fe_equal :: proc "contextless" (arg1, arg2: ^Tight_Field_Element) -> int { - tmp2: [32]byte = --- + tmp1, tmp2: [32]byte = ---, --- + fe_to_bytes(&tmp1, arg1) fe_to_bytes(&tmp2, arg2) - ret := fe_equal_bytes(arg1, &tmp2) + ret := crypto.compare_constant_time(tmp1[:], tmp2[:]) + mem.zero_explicit(&tmp1, size_of(tmp1)) mem.zero_explicit(&tmp2, size_of(tmp2)) return ret @@ -67,25 +94,37 @@ fe_carry_pow2k :: proc "contextless" ( } } +fe_carry_add :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) { + fe_add(fe_relax_cast(out1), arg1, arg2) + fe_carry(out1, fe_relax_cast(out1)) +} + +fe_carry_sub :: #force_inline proc "contextless" (out1, arg1, arg2: ^Tight_Field_Element) { + fe_sub(fe_relax_cast(out1), arg1, arg2) + fe_carry(out1, fe_relax_cast(out1)) +} + fe_carry_opp :: #force_inline proc "contextless" (out1, arg1: ^Tight_Field_Element) { fe_opp(fe_relax_cast(out1), arg1) fe_carry(out1, fe_relax_cast(out1)) } -fe_carry_invsqrt :: proc "contextless" ( +fe_carry_sqrt_ratio_m1 :: proc "contextless" ( out1: ^Tight_Field_Element, - arg1: ^Loose_Field_Element, + arg1: ^Loose_Field_Element, // u + arg2: ^Loose_Field_Element, // v ) -> int { - // Inverse square root taken from Monocypher. + // SQRT_RATIO_M1(u, v) from RFC 9496 - 4.2, based on the inverse + // square root from Monocypher. + w: Tight_Field_Element = --- + fe_carry_mul(&w, arg1, arg2) // u * v + + // r = tmp1 = u * w^((p-5)/8) tmp1, tmp2, tmp3: Tight_Field_Element = ---, ---, --- - - // t0 = x^((p-5)/8) - // Can be achieved with a simple double & add ladder, - // but it would be slower. - fe_carry_pow2k(&tmp1, arg1, 1) + fe_carry_pow2k(&tmp1, fe_relax_cast(&w), 1) fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp1), 2) - fe_carry_mul(&tmp2, arg1, fe_relax_cast(&tmp2)) + fe_carry_mul(&tmp2, fe_relax_cast(&w), fe_relax_cast(&tmp2)) fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&tmp2)) fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 1) fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) @@ -104,48 +143,49 @@ fe_carry_invsqrt :: proc "contextless" ( fe_carry_pow2k(&tmp2, fe_relax_cast(&tmp2), 50) fe_carry_mul(&tmp1, fe_relax_cast(&tmp2), fe_relax_cast(&tmp1)) fe_carry_pow2k(&tmp1, fe_relax_cast(&tmp1), 2) - fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), fe_relax_cast(&w)) // w^((p-5)/8) - // quartic = x^((p-1)/4) - quartic := &tmp2 - fe_carry_square(quartic, fe_relax_cast(&tmp1)) - fe_carry_mul(quartic, fe_relax_cast(quartic), arg1) + fe_carry_mul(&tmp1, fe_relax_cast(&tmp1), arg1) // u * w^((p-5)/8) - // Serialize quartic once to save on repeated serialization/sanitization. - quartic_buf: [32]byte = --- - fe_to_bytes(&quartic_buf, quartic) - check := &tmp3 + // Serialize `check` once to save on repeated serialization. + r, check := &tmp1, &tmp2 + b: [32]byte = --- + fe_carry_square(check, fe_relax_cast(r)) + fe_carry_mul(check, fe_relax_cast(check), arg2) // check * v + fe_to_bytes(&b, check) - fe_one(check) - p1 := fe_equal_bytes(check, &quartic_buf) - fe_carry_opp(check, check) - m1 := fe_equal_bytes(check, &quartic_buf) - fe_carry_opp(check, &SQRT_M1) - ms := fe_equal_bytes(check, &quartic_buf) + u, neg_u, neg_u_i := &tmp3, &w, check + fe_carry(u, arg1) + fe_carry_opp(neg_u, u) + fe_carry_mul(neg_u_i, fe_relax_cast(neg_u), fe_relax_cast(&FE_SQRT_M1)) - // if quartic == -1 or sqrt(-1) - // then isr = x^((p-1)/4) * sqrt(-1) - // else isr = x^((p-1)/4) - fe_carry_mul(out1, fe_relax_cast(&tmp1), fe_relax_cast(&SQRT_M1)) - fe_cond_assign(out1, &tmp1, (m1 | ms) ~ 1) + correct_sign_sqrt := fe_equal_bytes(u, &b) + flipped_sign_sqrt := fe_equal_bytes(neg_u, &b) + flipped_sign_sqrt_i := fe_equal_bytes(neg_u_i, &b) - mem.zero_explicit(&tmp1, size_of(tmp1)) - mem.zero_explicit(&tmp2, size_of(tmp2)) - mem.zero_explicit(&tmp3, size_of(tmp3)) - mem.zero_explicit(&quartic_buf, size_of(quartic_buf)) + r_prime := check + fe_carry_mul(r_prime, fe_relax_cast(r), fe_relax_cast(&FE_SQRT_M1)) + fe_cond_assign(r, r_prime, flipped_sign_sqrt | flipped_sign_sqrt_i) - return p1 | m1 + // Pick the non-negative square root. + fe_carry_opp(r_prime, r) + fe_cond_select(out1, r, r_prime, fe_is_negative(r)) + + fe_clear_vec([]^Tight_Field_Element{&w, &tmp1, &tmp2, &tmp3}) + mem.zero_explicit(&b, size_of(b)) + + return correct_sign_sqrt | flipped_sign_sqrt } fe_carry_inv :: proc "contextless" (out1: ^Tight_Field_Element, arg1: ^Loose_Field_Element) { tmp1: Tight_Field_Element fe_carry_square(&tmp1, arg1) - _ = fe_carry_invsqrt(&tmp1, fe_relax_cast(&tmp1)) + _ = fe_carry_sqrt_ratio_m1(&tmp1, fe_relax_cast(&FE_ONE), fe_relax_cast(&tmp1)) fe_carry_square(&tmp1, fe_relax_cast(&tmp1)) fe_carry_mul(out1, fe_relax_cast(&tmp1), arg1) - mem.zero_explicit(&tmp1, size_of(tmp1)) + fe_clear(&tmp1) } fe_zero :: proc "contextless" (out1: ^Tight_Field_Element) { @@ -196,3 +236,21 @@ fe_cond_swap :: #force_no_inline proc "contextless" (out1, out2: ^Tight_Field_El out1[3], out2[3] = x4, y4 out1[4], out2[4] = x5, y5 } + +@(optimization_mode = "none") +fe_cond_select :: #force_no_inline proc "contextless" ( + out1, arg1, arg2: $T, + arg3: int, +) where T == ^Tight_Field_Element || T == ^Loose_Field_Element { + mask := (u64(arg3) * 0xffffffffffffffff) + x1 := ((mask & arg2[0]) | ((~mask) & arg1[0])) + x2 := ((mask & arg2[1]) | ((~mask) & arg1[1])) + x3 := ((mask & arg2[2]) | ((~mask) & arg1[2])) + x4 := ((mask & arg2[3]) | ((~mask) & arg1[3])) + x5 := ((mask & arg2[4]) | ((~mask) & arg1[4])) + out1[0] = x1 + out1[1] = x2 + out1[2] = x3 + out1[3] = x4 + out1[4] = x5 +} diff --git a/core/crypto/_fiat/field_curve25519/field51.odin b/core/crypto/_fiat/field_curve25519/field51.odin index 81dca19e2..d039bd411 100644 --- a/core/crypto/_fiat/field_curve25519/field51.odin +++ b/core/crypto/_fiat/field_curve25519/field51.odin @@ -42,7 +42,10 @@ import "core:math/bits" Loose_Field_Element :: distinct [5]u64 Tight_Field_Element :: distinct [5]u64 -SQRT_M1 := Tight_Field_Element { +FE_ZERO := Tight_Field_Element{0, 0, 0, 0, 0} +FE_ONE := Tight_Field_Element{1, 0, 0, 0, 0} + +FE_SQRT_M1 := Tight_Field_Element { 1718705420411056, 234908883556509, 2233514472574048, diff --git a/core/crypto/_fiat/field_scalar25519/field.odin b/core/crypto/_fiat/field_scalar25519/field.odin index c741d30cf..9b40661b7 100644 --- a/core/crypto/_fiat/field_scalar25519/field.odin +++ b/core/crypto/_fiat/field_scalar25519/field.odin @@ -20,6 +20,10 @@ _TWO_336 := Montgomery_Domain_Field_Element { 0x3d217f5be65cb5c, } +fe_clear :: proc "contextless" (arg1: ^Montgomery_Domain_Field_Element) { + mem.zero_explicit(arg1, size_of(Montgomery_Domain_Field_Element)) +} + fe_from_bytes :: proc "contextless" ( out1: ^Montgomery_Domain_Field_Element, arg1: ^[32]byte, @@ -85,7 +89,7 @@ fe_from_bytes_wide :: proc "contextless" ( fe_mul(&tmp, &tmp, &_TWO_336) // c * 2^336 fe_add(out1, out1, &tmp) // a + b * 2^168 + c * 2^336 - mem.zero_explicit(&tmp, size_of(tmp)) + fe_clear(&tmp) } @(private) @@ -125,7 +129,7 @@ fe_equal :: proc "contextless" (arg1, arg2: ^Montgomery_Domain_Field_Element) -> // which will be 1. _, borrow := bits.sub_u64(fe_non_zero(&tmp), 1, 0) - mem.zero_explicit(&tmp, size_of(tmp)) + fe_clear(&tmp) return int(borrow) } diff --git a/core/crypto/x25519/x25519.odin b/core/crypto/x25519/x25519.odin index 3cd247cf8..f8a301810 100644 --- a/core/crypto/x25519/x25519.odin +++ b/core/crypto/x25519/x25519.odin @@ -94,13 +94,8 @@ _scalarmult :: proc "contextless" (out, scalar, point: ^[32]byte) { field.fe_carry_mul(&x2, field.fe_relax_cast(&x2), field.fe_relax_cast(&z2)) field.fe_to_bytes(out, &x2) - mem.zero_explicit(&x1, size_of(x1)) - mem.zero_explicit(&x2, size_of(x2)) - mem.zero_explicit(&x3, size_of(x3)) - mem.zero_explicit(&z2, size_of(z2)) - mem.zero_explicit(&z3, size_of(z3)) - mem.zero_explicit(&t0, size_of(t0)) - mem.zero_explicit(&t1, size_of(t1)) + field.fe_clear_vec([]^field.Tight_Field_Element{&x1, &x2, &x3, &z2, &z3}) + field.fe_clear_vec([]^field.Loose_Field_Element{&t0, &t1}) } // scalarmult "multiplies" the provided scalar and point, and writes the @@ -137,6 +132,5 @@ scalarmult :: proc(dst, scalar, point: []byte) { // scalarmult_basepoint "multiplies" the provided scalar with the X25519 // base point and writes the resulting point to dst. scalarmult_basepoint :: proc(dst, scalar: []byte) { - // TODO/perf: Switch to using a precomputed table. scalarmult(dst, scalar, _BASE_POINT[:]) }