// -*- Mode: C -*- // // Use field-element code generated by fiat-crypto to implement ecdsa with curve secp256r1. // #include #include #include // generated with `fiat-crypto/src/ExtractOCaml/word_by_word_montgomery` // [see files for details of the invocation] #include "p256_64.c" #include "n256_64.c" // four uint64_t limbs, LITTLE endian. typedef fiat_p256_montgomery_domain_field_element fe; // elliptic curve point in homogeneous coordinates. [i.e. (X : Y : Z) form] typedef fe ecph[3]; #define FE_ADD fiat_p256_add #define FE_SUB fiat_p256_sub #define FE_MUL fiat_p256_mul // these are in the montgomery domain, limbs in little-endian order. // see irken-compiler/demo/ec/redc.scm static const fe c_a = {0xfffffffffffffffc, 0x00000003ffffffff, 0x0000000000000000, 0xfffffffc00000004}; static const fe c_b3 = {0x89d69e267d4e399f, 0x06d01166698c91b2, 0xb0e66203e5638c84, 0x949012590d95d89c}; // b * 3 static const fe c_n = {0x9ad169483335f568, 0x89b1054463fe9931, 0x9c0166ce652e96b7, 0x98648c200fe30feb}; // n // the point at infinity, homogeneous coords, montgomery domain. static const ecph p_inf = { {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // (0 {0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe}, // :1 {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000} // :0) }; // the generator, homogeneous coords, montgomery domain. static const ecph p_g = { {0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6}, // g.x {0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85}, // g.y {0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe} // 1 }; // copy a field element static void fe_copy (fe out1, const fe arg1) { for (size_t i = 0; i < 4; i++) { out1[i] = arg1[i]; } } // copy a curve point static void ecph_copy (ecph out1, const ecph arg1) { for (size_t i = 0; i < 3; i++) { fe_copy (out1[i], arg1[i]); } } // from montgomery static void p256_fm (fe out, const fe arg) { fe one = {1,0,0,0}; fiat_p256_mul (out, arg, one); } #if 0 // not needed. // to montgomery static void p256_tm (fe out, const fe arg) { // fm(fm(1)) fe r2n = {0x3, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x4fffffffd}; fiat_p256_mul (out, arg, r2n); } #endif // from montgomery static void n256_fm (fe out, const fe arg) { fe one = {1,0,0,0}; fiat_n256_mul (out, arg, one); } // to montgomery static void n256_tm (fe out, const fe arg) { // fm(fm(1)) fe r2n = {0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}; fiat_n256_mul (out, arg, r2n); } // input: u8[32], BIG endian // output: fe, NOT montgomery form static void fe_from_bytes (fe out, const uint8_t s[32]) { for (size_t i = 0; i < 4; i++) { uint64_t limb = 0; for (size_t j = 0; j < 8; j++) { uint8_t b = s[i * 8 + j]; limb <<= 8; limb |= b; } out[3-i] = limb; } } // input: fe, NOT montgomery form, little-endian // output: u8[32], BIG endian. static void fe_to_bytes (uint8_t out[32], const fe arg1) { for (size_t i = 0; i < 4; i++) { uint64_t limb = arg1[3-i]; for (size_t j = 0; j < 8; j++) { out[i * 8 + j] = 0xff & (limb >> ((7-j)*8)); } } } // 2015 Renes–Costello–Batina "Complete addition formulas for prime order elliptic curves" // https://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective.html#doubling-dbl-2015-rcb static void dbl_2015_rcb (ecph ro, const ecph ri) // ro = ri + ri { fe t0,t1,t2,t3; #define X1 ri[0] #define Y1 ri[1] #define Z1 ri[2] #define X3 ro[0] #define Y3 ro[1] #define Z3 ro[2] FE_MUL (t0, X1, X1); FE_MUL (t1, Y1, Y1); FE_MUL (t2, Z1, Z1); FE_MUL (t3, X1, Y1); FE_ADD (t3, t3, t3); FE_MUL (Z3, X1, Z1); FE_ADD (Z3, Z3, Z3); FE_MUL (X3, c_a, Z3); FE_MUL (Y3, c_b3, t2); FE_ADD (Y3, X3, Y3); FE_SUB (X3, t1, Y3); FE_ADD (Y3, t1, Y3); FE_MUL (Y3, X3, Y3); FE_MUL (X3, t3, X3); FE_MUL (Z3, c_b3, Z3); FE_MUL (t2, c_a, t2); FE_SUB (t3, t0, t2); FE_MUL (t3, c_a, t3); FE_ADD (t3, t3, Z3); FE_ADD (Z3, t0, t0); FE_ADD (t0, Z3, t0); FE_ADD (t0, t0, t2); FE_MUL (t0, t0, t3); FE_ADD (Y3, Y3, t0); FE_MUL (t2, Y1, Z1); FE_ADD (t2, t2, t2); FE_MUL (t0, t2, t3); FE_SUB (X3, X3, t0); FE_MUL (Z3, t2, t1); FE_ADD (Z3, Z3, Z3); FE_ADD (Z3, Z3, Z3); #undef X1 #undef Y1 #undef Z1 #undef X3 #undef Y3 #undef Z3 } // https://www.hyperelliptic.org/EFD/g1p/auto-shortw-projective.html#addition-add-2015-rcb static void add_2015_rcb (ecph ro, const ecph ra, const ecph rb) // ro = ra + rb { fe t0,t1,t2,t3,t4,t5; #define X1 ra[0] #define Y1 ra[1] #define Z1 ra[2] #define X2 rb[0] #define Y2 rb[1] #define Z2 rb[2] #define X3 ro[0] #define Y3 ro[1] #define Z3 ro[2] FE_MUL (t0, X1, X2); FE_MUL (t1, Y1, Y2); FE_MUL (t2, Z1, Z2); FE_ADD (t3, X1, Y1); FE_ADD (t4, X2, Y2); FE_MUL (t3, t3, t4); FE_ADD (t4, t0, t1); FE_SUB (t3, t3, t4); FE_ADD (t4, X1, Z1); FE_ADD (t5, X2, Z2); FE_MUL (t4, t4, t5); FE_ADD (t5, t0, t2); FE_SUB (t4, t4, t5); FE_ADD (t5, Y1, Z1); FE_ADD (X3, Y2, Z2); FE_MUL (t5, t5, X3); FE_ADD (X3, t1, t2); FE_SUB (t5, t5, X3); FE_MUL (Z3, c_a, t4); FE_MUL (X3, c_b3, t2); FE_ADD (Z3, X3, Z3); FE_SUB (X3, t1, Z3); FE_ADD (Z3, t1, Z3); FE_MUL (Y3, X3, Z3); FE_ADD (t1, t0, t0); FE_ADD (t1, t1, t0); FE_MUL (t2, c_a, t2); FE_MUL (t4, c_b3, t4); FE_ADD (t1, t1, t2); FE_SUB (t2, t0, t2); FE_MUL (t2, c_a, t2); FE_ADD (t4, t4, t2); FE_MUL (t0, t1, t4); FE_ADD (Y3, Y3, t0); FE_MUL (t0, t5, t4); FE_MUL (X3, t3, X3); FE_SUB (X3, X3, t0); FE_MUL (t0, t3, t1); FE_MUL (Z3, t5, Z3); FE_ADD (Z3, Z3, t0); #undef X1 #undef Y1 #undef Z1 #undef X2 #undef Y2 #undef Z2 #undef X3 #undef Y3 #undef Z3 }; static void fe_sat_sign_bit (uint8_t * out1, const uint64_t arg1[5]) { uint8_t x1; x1 = (uint8_t)((arg1[4]) >> 63); *out1 = x1; } // compute kG using the left-to-right montgomery ladder. // input: k[32] is in big-endian form, NOT montgomery form. // P is a homogenous elliptic curve point, coordinates in montgomery form. // output: x coordinate in the montgomery domain. static void p256_scalarmult (ecph out, const uint8_t k[32], const ecph P) { ecph r0, r1, r2, r3; uint8_t flag = 0; // note: the top of the loop moves r0, r1 = r2, r3 ecph_copy (r2, p_inf); // r0 = (0 : 1 : 0) ecph_copy (r3, P); // r1 = (x : y : 1) for (size_t i = 0; i < 32; i++) { uint8_t di = k[i]; for (size_t j = 0; j < 8; j++) { uint8_t bit = (di>>(7-j)) & 1; // XXX use cmov here? if (!flag && bit) { // start the *actual* computation when we see the first high bit. ecph_copy (r0, p_inf); ecph_copy (r1, P); flag = 1; } else { ecph_copy (r0, r2); ecph_copy (r1, r3); flag = 1; } if (bit) { add_2015_rcb (r2, r0, r1); dbl_2015_rcb (r3, r1); } else { add_2015_rcb (r3, r0, r1); dbl_2015_rcb (r2, r0); } } } ecph_copy (out, r2); } // Bernstein-Yang Inversion. divstep is generated by fiat-crypto. // "High-assurance field inversion for curve-based cryptography" // https://eprint.iacr.org/2021/549.pdf // https://github.com/bshvass/fiat-crypto/ // based on `inversion/c/montgomery_template.c`, static void inverse (uint64_t out[4], uint64_t g[5]) { // NOTE: this is the precomp value from the bshvass fork, which _differs_ from the one generated by the original! uint64_t h[4], precomp[4] = {0x8000000, 0, 0, 0}; // this is the prime, little-endian form uint64_t f1[5], g1[5], f[5] = {0xffffffffffffffff, 0xffffffff, 0x0, 0xffffffff00000001, 0x0}; uint64_t v1[4], v[4] = {0, 0, 0, 0}; uint64_t r1[4], r[4] = {1, 0, 0, 0}; uint64_t d, d1; uint8_t s; d = 1; for (int i=0; i < 740; i += 2) { // iterations = 741 fiat_p256_divstep (&d1, f1, g1, v1, r1, d, f, g, v, r); fiat_p256_divstep (&d, f, g, v, r, d1, f1, g1, v1, r1); } // the iteration count is odd, so this last round isn't optional. fiat_p256_divstep (&d1, f1, g1, v1, r1, d, f, g, v, r); fe_sat_sign_bit (&s, f1); fiat_p256_mul (out, v1, precomp); fiat_p256_opp (h, out); fiat_p256_selectznz (out, s, out, h); } // same, but for the prime secp256r1.n (the group order). static void inverse_n (uint64_t out[4], uint64_t g[5]) { // NOTE: this is the precomp value from the bshvass fork, which _differs_ from the one generated by the original! uint64_t h[4], precomp[4] = {0x8000000, 0, 0, 0}; // this is the prime, little-endian form uint64_t f1[5], g1[5], f[5] = {0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000, 0x0}; uint64_t v1[4], v[4] = {0, 0, 0, 0}; uint64_t r1[4], r[4] = {1, 0, 0, 0}; uint64_t d, d1; uint8_t s; d = 1; for (int i=0; i < 740; i += 2) { // iterations = 741 fiat_n256_divstep (&d1, f1, g1, v1, r1, d, f, g, v, r); fiat_n256_divstep (&d, f, g, v, r, d1, f1, g1, v1, r1); } // the iteration count is odd, so this last round isn't optional. fiat_n256_divstep (&d1, f1, g1, v1, r1, d, f, g, v, r); fe_sat_sign_bit (&s, f1); fiat_n256_mul (out, v1, precomp); fiat_n256_opp (h, out); fiat_p256_selectznz (out, s, out, h); } // input: P = homogeneous point in montgomery form. // output: (out_x, out_y) = standard elliptic curve point, coords in montgomery form. static void from_homogenous (fe out_x, fe out_y, const ecph P) { // compute x = X/Z = X * Z^-1 and y = Y/Z = Y * Z^-1 // extra limb need by `inverse` uint64_t Zsat[5] = {P[2][0], P[2][1], P[2][2], P[2][3], 0}; fe Zinv; inverse (Zinv, Zsat); fiat_p256_mul (out_x, P[0], Zinv); fiat_p256_mul (out_y, P[1], Zinv); } static void be_bytes_to_mont_n (fe out, const uint8_t in[32]) { fe out0; fe_from_bytes (out0, in); n256_tm (out, out0); } // inputs: arg1, arg2, NOT montgomery form. // out = (arg1 < arg2) void u256_greater_than (uint8_t *out, const fe arg1, const fe arg2) { uint64_t x1,x3,x5,x7; uint8_t x2,x4,x6,x8; fiat_p256_subborrowx_u64(&x1, &x2, 0x0, (arg2[0]), (arg1[0])); fiat_p256_subborrowx_u64(&x3, &x4, x2, (arg2[1]), (arg1[1])); fiat_p256_subborrowx_u64(&x5, &x6, x4, (arg2[2]), (arg1[2])); fiat_p256_subborrowx_u64(&x7, &x8, x6, (arg2[3]), (arg1[3])); *out = (x8 != 0); } // input: p256 montgomery in 0 <= x < p // output: either x or x-n static void sub_mod_n (fe out, const fe x) { fe x0, x1; uint8_t gt; static const fe N = {0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000}; p256_fm (x0, x); u256_greater_than (>, x0, N); fiat_p256_sub (x1, x, c_n); fiat_p256_selectznz (out, gt, x, x1); } // inputs: all are in BIG endian form. // sk (secret key) // k (nonce/rfc6979) // h (hashed msg) // outputs: in BIG endian form. // out_r, out_s void p256_ecdsa_sign (uint8_t out_r[32], uint8_t out_s[32], const uint8_t sk[32], const uint8_t k[32], const uint8_t h[32]) { ecph kG; fe kG_x, kG_y; p256_scalarmult (kG, k, p_g); from_homogenous (kG_x, kG_y, kG); fe xm_n, xm_s; p256_fm (xm_s, kG_x); n256_tm (xm_n, xm_s); fe km; fe kinv; fe sk_m; fe zm, rm, sm; fe t0, t1, t2; fe r0, s0; sub_mod_n (rm, xm_n); be_bytes_to_mont_n (km, k); uint64_t ksat[5] = {km[0], km[1], km[2], km[3], 0}; inverse_n (kinv, ksat); be_bytes_to_mont_n (sk_m, sk); be_bytes_to_mont_n (zm, h); fiat_n256_mul (t0, rm, sk_m); fiat_n256_add (t1, zm, t0); fiat_n256_mul (t2, t1, kinv); sub_mod_n (sm, t2); n256_fm (r0, rm); n256_fm (s0, sm); fe_to_bytes (out_r, r0); fe_to_bytes (out_s, s0); }