ecmult_const_impl.h
1 /*********************************************************************** 2 * Copyright (c) 2015, 2022 Pieter Wuille, Andrew Poelstra * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.* 5 ***********************************************************************/ 6 7 #ifndef SECP256K1_ECMULT_CONST_IMPL_H 8 #define SECP256K1_ECMULT_CONST_IMPL_H 9 10 #include "scalar.h" 11 #include "group.h" 12 #include "ecmult_const.h" 13 #include "ecmult_impl.h" 14 15 #if defined(EXHAUSTIVE_TEST_ORDER) 16 /* We need 2^ECMULT_CONST_GROUP_SIZE - 1 to be less than EXHAUSTIVE_TEST_ORDER, because 17 * the tables cannot have infinities in them (this breaks the effective-affine technique's 18 * z-ratio tracking) */ 19 # if EXHAUSTIVE_TEST_ORDER == 199 20 # define ECMULT_CONST_GROUP_SIZE 4 21 # elif EXHAUSTIVE_TEST_ORDER == 13 22 # define ECMULT_CONST_GROUP_SIZE 3 23 # elif EXHAUSTIVE_TEST_ORDER == 7 24 # define ECMULT_CONST_GROUP_SIZE 2 25 # else 26 # error "Unknown EXHAUSTIVE_TEST_ORDER" 27 # endif 28 #else 29 /* Group size 4 or 5 appears optimal. */ 30 # define ECMULT_CONST_GROUP_SIZE 5 31 #endif 32 33 #define ECMULT_CONST_TABLE_SIZE (1L << (ECMULT_CONST_GROUP_SIZE - 1)) 34 #define ECMULT_CONST_GROUPS ((129 + ECMULT_CONST_GROUP_SIZE - 1) / ECMULT_CONST_GROUP_SIZE) 35 #define ECMULT_CONST_BITS (ECMULT_CONST_GROUPS * ECMULT_CONST_GROUP_SIZE) 36 37 /** Fill a table 'pre' with precomputed odd multiples of a. 38 * 39 * The resulting point set is brought to a single constant Z denominator, stores the X and Y 40 * coordinates as ge points in pre, and stores the global Z in globalz. 41 * 42 * 'pre' must be an array of size ECMULT_CONST_TABLE_SIZE. 43 */ 44 static void secp256k1_ecmult_const_odd_multiples_table_globalz(secp256k1_ge *pre, secp256k1_fe *globalz, const secp256k1_gej *a) { 45 secp256k1_fe zr[ECMULT_CONST_TABLE_SIZE]; 46 47 secp256k1_ecmult_odd_multiples_table(ECMULT_CONST_TABLE_SIZE, pre, zr, globalz, a); 48 secp256k1_ge_table_set_globalz(ECMULT_CONST_TABLE_SIZE, pre, zr); 49 } 50 51 /* Given a table 'pre' with odd multiples of a point, put in r the signed-bit multiplication of n with that point. 52 * 53 * For example, if ECMULT_CONST_GROUP_SIZE is 4, then pre is expected to contain 8 entries: 54 * [1*P, 3*P, 5*P, 7*P, 9*P, 11*P, 13*P, 15*P]. n is then expected to be a 4-bit integer (range 0-15), and its 55 * bits are interpreted as signs of powers of two to look up. 56 * 57 * For example, if n=4, which is 0100 in binary, which is interpreted as [- + - -], so the looked up value is 58 * [ -(2^3) + (2^2) - (2^1) - (2^0) ]*P = -7*P. Every valid n translates to an odd number in range [-15,15], 59 * which means we just need to look up one of the precomputed values, and optionally negate it. 60 */ 61 #define ECMULT_CONST_TABLE_GET_GE(r,pre,n) do { \ 62 unsigned int m = 0; \ 63 /* If the top bit of n is 0, we want the negation. */ \ 64 volatile unsigned int negative = ((n) >> (ECMULT_CONST_GROUP_SIZE - 1)) ^ 1; \ 65 /* Let n[i] be the i-th bit of n, then the index is 66 * sum(cnot(n[i]) * 2^i, i=0..l-2) 67 * where cnot(b) = b if n[l-1] = 1 and 1 - b otherwise. 68 * For example, if n = 4, in binary 0100, the index is 3, in binary 011. 69 * 70 * Proof: 71 * Let 72 * x = sum((2*n[i] - 1)*2^i, i=0..l-1) 73 * = 2*sum(n[i] * 2^i, i=0..l-1) - 2^l + 1 74 * be the value represented by n. 75 * The index is (x - 1)/2 if x > 0 and -(x + 1)/2 otherwise. 76 * Case x > 0: 77 * n[l-1] = 1 78 * index = sum(n[i] * 2^i, i=0..l-1) - 2^(l-1) 79 * = sum(n[i] * 2^i, i=0..l-2) 80 * Case x <= 0: 81 * n[l-1] = 0 82 * index = -(2*sum(n[i] * 2^i, i=0..l-1) - 2^l + 2)/2 83 * = 2^(l-1) - 1 - sum(n[i] * 2^i, i=0..l-1) 84 * = sum((1 - n[i]) * 2^i, i=0..l-2) 85 */ \ 86 unsigned int index = ((unsigned int)(-negative) ^ n) & ((1U << (ECMULT_CONST_GROUP_SIZE - 1)) - 1U); \ 87 secp256k1_fe neg_y; \ 88 VERIFY_CHECK((n) < (1U << ECMULT_CONST_GROUP_SIZE)); \ 89 VERIFY_CHECK(index < (1U << (ECMULT_CONST_GROUP_SIZE - 1))); \ 90 /* Unconditionally set r->x = (pre)[m].x and r->y = (pre)[m].y because it's either the correct one 91 * or will get replaced in the later iterations, this is needed to make sure `r` is initialized. */ \ 92 secp256k1_ge_set_xy((r), &(pre)[m].x, &(pre)[m].y); \ 93 for (m = 1; m < ECMULT_CONST_TABLE_SIZE; m++) { \ 94 /* This loop is used to avoid secret data in array indices. See 95 * the comment in ecmult_gen_impl.h for rationale. */ \ 96 secp256k1_fe_cmov(&(r)->x, &(pre)[m].x, m == index); \ 97 secp256k1_fe_cmov(&(r)->y, &(pre)[m].y, m == index); \ 98 } \ 99 secp256k1_fe_negate(&neg_y, &(r)->y, 1); \ 100 secp256k1_fe_cmov(&(r)->y, &neg_y, negative); \ 101 } while(0) 102 103 /* For K as defined in the comment of secp256k1_ecmult_const, we have several precomputed 104 * formulas/constants. 105 * - in exhaustive test mode, we give an explicit expression to compute it at compile time: */ 106 #ifdef EXHAUSTIVE_TEST_ORDER 107 static const secp256k1_scalar secp256k1_ecmult_const_K = ((SECP256K1_SCALAR_CONST(0, 0, 0, (1U << (ECMULT_CONST_BITS - 128)) - 2U, 0, 0, 0, 0) + EXHAUSTIVE_TEST_ORDER - 1U) * (1U + EXHAUSTIVE_TEST_LAMBDA)) % EXHAUSTIVE_TEST_ORDER; 108 /* - for the real secp256k1 group we have constants for various ECMULT_CONST_BITS values. */ 109 #elif ECMULT_CONST_BITS == 129 110 /* For GROUP_SIZE = 1,3. */ 111 static const secp256k1_scalar secp256k1_ecmult_const_K = SECP256K1_SCALAR_CONST(0xac9c52b3ul, 0x3fa3cf1ful, 0x5ad9e3fdul, 0x77ed9ba4ul, 0xa880b9fcul, 0x8ec739c2ul, 0xe0cfc810ul, 0xb51283ceul); 112 #elif ECMULT_CONST_BITS == 130 113 /* For GROUP_SIZE = 2,5. */ 114 static const secp256k1_scalar secp256k1_ecmult_const_K = SECP256K1_SCALAR_CONST(0xa4e88a7dul, 0xcb13034eul, 0xc2bdd6bful, 0x7c118d6bul, 0x589ae848ul, 0x26ba29e4ul, 0xb5c2c1dcul, 0xde9798d9ul); 115 #elif ECMULT_CONST_BITS == 132 116 /* For GROUP_SIZE = 4,6 */ 117 static const secp256k1_scalar secp256k1_ecmult_const_K = SECP256K1_SCALAR_CONST(0x76b1d93dul, 0x0fae3c6bul, 0x3215874bul, 0x94e93813ul, 0x7937fe0dul, 0xb66bcaaful, 0xb3749ca5ul, 0xd7b6171bul); 118 #else 119 # error "Unknown ECMULT_CONST_BITS" 120 #endif 121 122 static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, const secp256k1_scalar *q) { 123 /* The approach below combines the signed-digit logic from Mike Hamburg's 124 * "Fast and compact elliptic-curve cryptography" (https://eprint.iacr.org/2012/309) 125 * Section 3.3, with the GLV endomorphism. 126 * 127 * The idea there is to interpret the bits of a scalar as signs (1 = +, 0 = -), and compute a 128 * point multiplication in that fashion. Let v be an n-bit non-negative integer (0 <= v < 2^n), 129 * and v[i] its i'th bit (so v = sum(v[i] * 2^i, i=0..n-1)). Then define: 130 * 131 * C_l(v, A) = sum((2*v[i] - 1) * 2^i*A, i=0..l-1) 132 * 133 * Then it holds that C_l(v, A) = sum((2*v[i] - 1) * 2^i*A, i=0..l-1) 134 * = (2*sum(v[i] * 2^i, i=0..l-1) + 1 - 2^l) * A 135 * = (2*v + 1 - 2^l) * A 136 * 137 * Thus, one can compute q*A as C_256((q + 2^256 - 1) / 2, A). This is the basis for the 138 * paper's signed-digit multi-comb algorithm for multiplication using a precomputed table. 139 * 140 * It is appealing to try to combine this with the GLV optimization: the idea that a scalar 141 * s can be written as s1 + lambda*s2, where lambda is a curve-specific constant such that 142 * lambda*A is easy to compute, and where s1 and s2 are small. In particular we have the 143 * secp256k1_scalar_split_lambda function which performs such a split with the resulting s1 144 * and s2 in range (-2^128, 2^128) mod n. This does work, but is uninteresting: 145 * 146 * To compute q*A: 147 * - Let s1, s2 = split_lambda(q) 148 * - Let R1 = C_256((s1 + 2^256 - 1) / 2, A) 149 * - Let R2 = C_256((s2 + 2^256 - 1) / 2, lambda*A) 150 * - Return R1 + R2 151 * 152 * The issue is that while s1 and s2 are small-range numbers, (s1 + 2^256 - 1) / 2 (mod n) 153 * and (s2 + 2^256 - 1) / 2 (mod n) are not, undoing the benefit of the splitting. 154 * 155 * To make it work, we want to modify the input scalar q first, before splitting, and then only 156 * add a 2^128 offset of the split results (so that they end up in the single 129-bit range 157 * [0,2^129]). A slightly smaller offset would work due to the bounds on the split, but we pick 158 * 2^128 for simplicity. Let s be the scalar fed to split_lambda, and f(q) the function to 159 * compute it from q: 160 * 161 * To compute q*A: 162 * - Compute s = f(q) 163 * - Let s1, s2 = split_lambda(s) 164 * - Let v1 = s1 + 2^128 (mod n) 165 * - Let v2 = s2 + 2^128 (mod n) 166 * - Let R1 = C_l(v1, A) 167 * - Let R2 = C_l(v2, lambda*A) 168 * - Return R1 + R2 169 * 170 * l will thus need to be at least 129, but we may overshoot by a few bits (see 171 * further), so keep it as a variable. 172 * 173 * To solve for s, we reason: 174 * q*A = R1 + R2 175 * <=> q*A = C_l(s1 + 2^128, A) + C_l(s2 + 2^128, lambda*A) 176 * <=> q*A = (2*(s1 + 2^128) + 1 - 2^l) * A + (2*(s2 + 2^128) + 1 - 2^l) * lambda*A 177 * <=> q*A = (2*(s1 + s2*lambda) + (2^129 + 1 - 2^l) * (1 + lambda)) * A 178 * <=> q = 2*(s1 + s2*lambda) + (2^129 + 1 - 2^l) * (1 + lambda) (mod n) 179 * <=> q = 2*s + (2^129 + 1 - 2^l) * (1 + lambda) (mod n) 180 * <=> s = (q + (2^l - 2^129 - 1) * (1 + lambda)) / 2 (mod n) 181 * <=> f(q) = (q + K) / 2 (mod n) 182 * where K = (2^l - 2^129 - 1)*(1 + lambda) (mod n) 183 * 184 * We will process the computation of C_l(v1, A) and C_l(v2, lambda*A) in groups of 185 * ECMULT_CONST_GROUP_SIZE, so we set l to the smallest multiple of ECMULT_CONST_GROUP_SIZE 186 * that is not less than 129; this equals ECMULT_CONST_BITS. 187 */ 188 189 /* The offset to add to s1 and s2 to make them non-negative. Equal to 2^128. */ 190 static const secp256k1_scalar S_OFFSET = SECP256K1_SCALAR_CONST(0, 0, 0, 1, 0, 0, 0, 0); 191 secp256k1_scalar s, v1, v2; 192 secp256k1_ge pre_a[ECMULT_CONST_TABLE_SIZE]; 193 secp256k1_ge pre_a_lam[ECMULT_CONST_TABLE_SIZE]; 194 secp256k1_fe global_z; 195 int group, i; 196 197 /* We're allowed to be non-constant time in the point, and the code below (in particular, 198 * secp256k1_ecmult_const_odd_multiples_table_globalz) cannot deal with infinity in a 199 * constant-time manner anyway. */ 200 if (secp256k1_ge_is_infinity(a)) { 201 secp256k1_gej_set_infinity(r); 202 return; 203 } 204 205 /* Compute v1 and v2. */ 206 secp256k1_scalar_add(&s, q, &secp256k1_ecmult_const_K); 207 secp256k1_scalar_half(&s, &s); 208 secp256k1_scalar_split_lambda(&v1, &v2, &s); 209 secp256k1_scalar_add(&v1, &v1, &S_OFFSET); 210 secp256k1_scalar_add(&v2, &v2, &S_OFFSET); 211 212 #ifdef VERIFY 213 /* Verify that v1 and v2 are in range [0, 2^129-1]. */ 214 for (i = 129; i < 256; ++i) { 215 VERIFY_CHECK(secp256k1_scalar_get_bits_limb32(&v1, i, 1) == 0); 216 VERIFY_CHECK(secp256k1_scalar_get_bits_limb32(&v2, i, 1) == 0); 217 } 218 #endif 219 220 /* Calculate odd multiples of A and A*lambda. 221 * All multiples are brought to the same Z 'denominator', which is stored 222 * in global_z. Due to secp256k1' isomorphism we can do all operations pretending 223 * that the Z coordinate was 1, use affine addition formulae, and correct 224 * the Z coordinate of the result once at the end. 225 */ 226 secp256k1_gej_set_ge(r, a); 227 secp256k1_ecmult_const_odd_multiples_table_globalz(pre_a, &global_z, r); 228 for (i = 0; i < ECMULT_CONST_TABLE_SIZE; i++) { 229 secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]); 230 } 231 232 /* Next, we compute r = C_l(v1, A) + C_l(v2, lambda*A). 233 * 234 * We proceed in groups of ECMULT_CONST_GROUP_SIZE bits, operating on that many bits 235 * at a time, from high in v1, v2 to low. Call these bits1 (from v1) and bits2 (from v2). 236 * 237 * Now note that ECMULT_CONST_TABLE_GET_GE(&t, pre_a, bits1) loads into t a point equal 238 * to C_{ECMULT_CONST_GROUP_SIZE}(bits1, A), and analogously for pre_lam_a / bits2. 239 * This means that all we need to do is add these looked up values together, multiplied 240 * by 2^(ECMULT_GROUP_SIZE * group). 241 */ 242 for (group = ECMULT_CONST_GROUPS - 1; group >= 0; --group) { 243 /* Using the _var get_bits function is ok here, since it's only variable in offset and count, not in the scalar. */ 244 unsigned int bits1 = secp256k1_scalar_get_bits_var(&v1, group * ECMULT_CONST_GROUP_SIZE, ECMULT_CONST_GROUP_SIZE); 245 unsigned int bits2 = secp256k1_scalar_get_bits_var(&v2, group * ECMULT_CONST_GROUP_SIZE, ECMULT_CONST_GROUP_SIZE); 246 secp256k1_ge t; 247 int j; 248 249 ECMULT_CONST_TABLE_GET_GE(&t, pre_a, bits1); 250 if (group == ECMULT_CONST_GROUPS - 1) { 251 /* Directly set r in the first iteration. */ 252 secp256k1_gej_set_ge(r, &t); 253 } else { 254 /* Shift the result so far up. */ 255 for (j = 0; j < ECMULT_CONST_GROUP_SIZE; ++j) { 256 secp256k1_gej_double(r, r); 257 } 258 secp256k1_gej_add_ge(r, r, &t); 259 } 260 ECMULT_CONST_TABLE_GET_GE(&t, pre_a_lam, bits2); 261 secp256k1_gej_add_ge(r, r, &t); 262 } 263 264 /* Map the result back to the secp256k1 curve from the isomorphic curve. */ 265 secp256k1_fe_mul(&r->z, &r->z, &global_z); 266 } 267 268 static int secp256k1_ecmult_const_xonly(secp256k1_fe* r, const secp256k1_fe *n, const secp256k1_fe *d, const secp256k1_scalar *q, int known_on_curve) { 269 270 /* This algorithm is a generalization of Peter Dettman's technique for 271 * avoiding the square root in a random-basepoint x-only multiplication 272 * on a Weierstrass curve: 273 * https://mailarchive.ietf.org/arch/msg/cfrg/7DyYY6gg32wDgHAhgSb6XxMDlJA/ 274 * 275 * 276 * === Background: the effective affine technique === 277 * 278 * Let phi_u be the isomorphism that maps (x, y) on secp256k1 curve y^2 = x^3 + 7 to 279 * x' = u^2*x, y' = u^3*y on curve y'^2 = x'^3 + u^6*7. This new curve has the same order as 280 * the original (it is isomorphic), but moreover, has the same addition/doubling formulas, as 281 * the curve b=7 coefficient does not appear in those formulas (or at least does not appear in 282 * the formulas implemented in this codebase, both affine and Jacobian). See also Example 9.5.2 283 * in https://www.math.auckland.ac.nz/~sgal018/crypto-book/ch9.pdf. 284 * 285 * This means any linear combination of secp256k1 points can be computed by applying phi_u 286 * (with non-zero u) on all input points (including the generator, if used), computing the 287 * linear combination on the isomorphic curve (using the same group laws), and then applying 288 * phi_u^{-1} to get back to secp256k1. 289 * 290 * Switching to Jacobian coordinates, note that phi_u applied to (X, Y, Z) is simply 291 * (X, Y, Z/u). Thus, if we want to compute (X1, Y1, Z) + (X2, Y2, Z), with identical Z 292 * coordinates, we can use phi_Z to transform it to (X1, Y1, 1) + (X2, Y2, 1) on an isomorphic 293 * curve where the affine addition formula can be used instead. 294 * If (X3, Y3, Z3) = (X1, Y1) + (X2, Y2) on that curve, then our answer on secp256k1 is 295 * (X3, Y3, Z3*Z). 296 * 297 * This is the effective affine technique: if we have a linear combination of group elements 298 * to compute, and all those group elements have the same Z coordinate, we can simply pretend 299 * that all those Z coordinates are 1, perform the computation that way, and then multiply the 300 * original Z coordinate back in. 301 * 302 * The technique works on any a=0 short Weierstrass curve. It is possible to generalize it to 303 * other curves too, but there the isomorphic curves will have different 'a' coefficients, 304 * which typically does affect the group laws. 305 * 306 * 307 * === Avoiding the square root for x-only point multiplication === 308 * 309 * In this function, we want to compute the X coordinate of q*(n/d, y), for 310 * y = sqrt((n/d)^3 + 7). Its negation would also be a valid Y coordinate, but by convention 311 * we pick whatever sqrt returns (which we assume to be a deterministic function). 312 * 313 * Let g = y^2*d^3 = n^3 + 7*d^3. This also means y = sqrt(g/d^3). 314 * Further let v = sqrt(d*g), which must exist as d*g = y^2*d^4 = (y*d^2)^2. 315 * 316 * The input point (n/d, y) also has Jacobian coordinates: 317 * 318 * (n/d, y, 1) 319 * = (n/d * v^2, y * v^3, v) 320 * = (n/d * d*g, y * sqrt(d^3*g^3), v) 321 * = (n/d * d*g, sqrt(y^2 * d^3*g^3), v) 322 * = (n*g, sqrt(g/d^3 * d^3*g^3), v) 323 * = (n*g, sqrt(g^4), v) 324 * = (n*g, g^2, v) 325 * 326 * It is easy to verify that both (n*g, g^2, v) and its negation (n*g, -g^2, v) have affine X 327 * coordinate n/d, and this holds even when the square root function doesn't have a 328 * deterministic sign. We choose the (n*g, g^2, v) version. 329 * 330 * Now switch to the effective affine curve using phi_v, where the input point has coordinates 331 * (n*g, g^2). Compute (X, Y, Z) = q * (n*g, g^2) there. 332 * 333 * Back on secp256k1, that means q * (n*g, g^2, v) = (X, Y, v*Z). This last point has affine X 334 * coordinate X / (v^2*Z^2) = X / (d*g*Z^2). Determining the affine Y coordinate would involve 335 * a square root, but as long as we only care about the resulting X coordinate, no square root 336 * is needed anywhere in this computation. 337 */ 338 339 secp256k1_fe g, i; 340 secp256k1_ge p; 341 secp256k1_gej rj; 342 343 /* Compute g = (n^3 + B*d^3). */ 344 secp256k1_fe_sqr(&g, n); 345 secp256k1_fe_mul(&g, &g, n); 346 if (d) { 347 secp256k1_fe b; 348 VERIFY_CHECK(!secp256k1_fe_normalizes_to_zero(d)); 349 secp256k1_fe_sqr(&b, d); 350 VERIFY_CHECK(SECP256K1_B <= 8); /* magnitude of b will be <= 8 after the next call */ 351 secp256k1_fe_mul_int(&b, SECP256K1_B); 352 secp256k1_fe_mul(&b, &b, d); 353 secp256k1_fe_add(&g, &b); 354 if (!known_on_curve) { 355 /* We need to determine whether (n/d)^3 + 7 is square. 356 * 357 * is_square((n/d)^3 + 7) 358 * <=> is_square(((n/d)^3 + 7) * d^4) 359 * <=> is_square((n^3 + 7*d^3) * d) 360 * <=> is_square(g * d) 361 */ 362 secp256k1_fe c; 363 secp256k1_fe_mul(&c, &g, d); 364 if (!secp256k1_fe_is_square_var(&c)) return 0; 365 } 366 } else { 367 secp256k1_fe_add_int(&g, SECP256K1_B); 368 if (!known_on_curve) { 369 /* g at this point equals x^3 + 7. Test if it is square. */ 370 if (!secp256k1_fe_is_square_var(&g)) return 0; 371 } 372 } 373 374 SECP256K1_FE_VERIFY_MAGNITUDE(&g, 2); 375 376 /* Compute base point P = (n*g, g^2), the effective affine version of 377 * (n*g, g^2, v), which has corresponding affine X coordinate n/d. */ 378 { 379 secp256k1_fe x, y; 380 secp256k1_fe_mul(&x, &g, n); 381 secp256k1_fe_sqr(&y, &g); 382 secp256k1_ge_set_xy(&p, &x, &y); 383 } 384 385 /* Perform x-only EC multiplication of P with q. */ 386 VERIFY_CHECK(!secp256k1_scalar_is_zero(q)); 387 secp256k1_ecmult_const(&rj, &p, q); 388 VERIFY_CHECK(!secp256k1_gej_is_infinity(&rj)); 389 390 /* The resulting (X, Y, Z) point on the effective-affine isomorphic curve corresponds to 391 * (X, Y, Z*v) on the secp256k1 curve. The affine version of that has X coordinate 392 * (X / (Z^2*d*g)). */ 393 secp256k1_fe_sqr(&i, &rj.z); 394 secp256k1_fe_mul(&i, &i, &g); 395 if (d) secp256k1_fe_mul(&i, &i, d); 396 secp256k1_fe_inv(&i, &i); 397 secp256k1_fe_mul(r, &rj.x, &i); 398 399 return 1; 400 } 401 402 #endif /* SECP256K1_ECMULT_CONST_IMPL_H */