/ src / ec / ec_prime_i31.c
ec_prime_i31.c
  1  /*
  2   * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
  3   *
  4   * Permission is hereby granted, free of charge, to any person obtaining 
  5   * a copy of this software and associated documentation files (the
  6   * "Software"), to deal in the Software without restriction, including
  7   * without limitation the rights to use, copy, modify, merge, publish,
  8   * distribute, sublicense, and/or sell copies of the Software, and to
  9   * permit persons to whom the Software is furnished to do so, subject to
 10   * the following conditions:
 11   *
 12   * The above copyright notice and this permission notice shall be 
 13   * included in all copies or substantial portions of the Software.
 14   *
 15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 16   * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 17   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 18   * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 19   * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 20   * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21   * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22   * SOFTWARE.
 23   */
 24  
 25  #include "inner.h"
 26  
 27  /*
 28   * Parameters for supported curves (field modulus, and 'b' equation
 29   * parameter; both values use the 'i31' format, and 'b' is in Montgomery
 30   * representation).
 31   */
 32  
 33  static const uint32_t P256_P[] = {
 34  	0x00000108,
 35  	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
 36  	0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
 37  	0x000000FF
 38  };
 39  
 40  static const uint32_t P256_R2[] = {
 41  	0x00000108,
 42  	0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
 43  	0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
 44  	0x00000000
 45  };
 46  
 47  static const uint32_t P256_B[] = {
 48  	0x00000108,
 49  	0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
 50  	0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
 51  	0x0000000E
 52  };
 53  
 54  static const uint32_t P384_P[] = {
 55  	0x0000018C,
 56  	0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
 57  	0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
 58  	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
 59  	0x00000FFF
 60  };
 61  
 62  static const uint32_t P384_R2[] = {
 63  	0x0000018C,
 64  	0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
 65  	0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
 66  	0x00008000, 0x00008000, 0x00000000, 0x00000000,
 67  	0x00000000
 68  };
 69  
 70  static const uint32_t P384_B[] = {
 71  	0x0000018C,
 72  	0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
 73  	0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
 74  	0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
 75  	0x000008A5
 76  };
 77  
 78  static const uint32_t P521_P[] = {
 79  	0x00000219,
 80  	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
 81  	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
 82  	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
 83  	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
 84  	0x01FFFFFF
 85  };
 86  
 87  static const uint32_t P521_R2[] = {
 88  	0x00000219,
 89  	0x00001000, 0x00000000, 0x00000000, 0x00000000,
 90  	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 91  	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 92  	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 93  	0x00000000
 94  };
 95  
 96  static const uint32_t P521_B[] = {
 97  	0x00000219,
 98  	0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
 99  	0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
100  	0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
101  	0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
102  	0x00654FAE
103  };
104  
105  typedef struct {
106  	const uint32_t *p;
107  	const uint32_t *b;
108  	const uint32_t *R2;
109  	uint32_t p0i;
110  	size_t point_len;
111  } curve_params;
112  
113  static inline const curve_params *
114  id_to_curve(int curve)
115  {
116  	static const curve_params pp[] = {
117  		{ P256_P, P256_B, P256_R2, 0x00000001,  65 },
118  		{ P384_P, P384_B, P384_R2, 0x00000001,  97 },
119  		{ P521_P, P521_B, P521_R2, 0x00000001, 133 }
120  	};
121  
122  	return &pp[curve - BR_EC_secp256r1];
123  }
124  
125  #define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
126  
127  /*
128   * Type for a point in Jacobian coordinates:
129   * -- three values, x, y and z, in Montgomery representation
130   * -- affine coordinates are X = x / z^2 and Y = y / z^3
131   * -- for the point at infinity, z = 0
132   */
133  typedef struct {
134  	uint32_t c[3][I31_LEN];
135  } jacobian;
136  
137  /*
138   * We use a custom interpreter that uses a dozen registers, and
139   * only six operations:
140   *    MSET(d, a)       copy a into d
141   *    MADD(d, a)       d = d+a (modular)
142   *    MSUB(d, a)       d = d-a (modular)
143   *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
144   *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
145   *    MTZ(d)           clear return value if d = 0
146   * Destination of MMUL (d) must be distinct from operands (a and b).
147   * There is no such constraint for MSUB and MADD.
148   *
149   * Registers include the operand coordinates, and temporaries.
150   */
151  #define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
152  #define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
153  #define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
154  #define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
155  #define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
156  #define MTZ(d)          (0x5000 + ((d) << 8))
157  #define ENDCODE         0
158  
159  /*
160   * Registers for the input operands.
161   */
162  #define P1x    0
163  #define P1y    1
164  #define P1z    2
165  #define P2x    3
166  #define P2y    4
167  #define P2z    5
168  
169  /*
170   * Alternate names for the first input operand.
171   */
172  #define Px     0
173  #define Py     1
174  #define Pz     2
175  
176  /*
177   * Temporaries.
178   */
179  #define t1     6
180  #define t2     7
181  #define t3     8
182  #define t4     9
183  #define t5    10
184  #define t6    11
185  #define t7    12
186  
187  /*
188   * Extra scratch registers available when there is no second operand (e.g.
189   * for "double" and "affine").
190   */
191  #define t8     3
192  #define t9     4
193  #define t10    5
194  
195  /*
196   * Doubling formulas are:
197   *
198   *   s = 4*x*y^2
199   *   m = 3*(x + z^2)*(x - z^2)
200   *   x' = m^2 - 2*s
201   *   y' = m*(s - x') - 8*y^4
202   *   z' = 2*y*z
203   *
204   * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
205   * should. This case should not happen anyway, because our curves have
206   * prime order, and thus do not contain any point of order 2.
207   *
208   * If P is infinity (z = 0), then again the formulas yield infinity,
209   * which is correct. Thus, this code works for all points.
210   *
211   * Cost: 8 multiplications
212   */
213  static const uint16_t code_double[] = {
214  	/*
215  	 * Compute z^2 (in t1).
216  	 */
217  	MMUL(t1, Pz, Pz),
218  
219  	/*
220  	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
221  	 */
222  	MSET(t2, Px),
223  	MSUB(t2, t1),
224  	MADD(t1, Px),
225  
226  	/*
227  	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
228  	 */
229  	MMUL(t3, t1, t2),
230  	MSET(t1, t3),
231  	MADD(t1, t3),
232  	MADD(t1, t3),
233  
234  	/*
235  	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
236  	 */
237  	MMUL(t3, Py, Py),
238  	MADD(t3, t3),
239  	MMUL(t2, Px, t3),
240  	MADD(t2, t2),
241  
242  	/*
243  	 * Compute x' = m^2 - 2*s.
244  	 */
245  	MMUL(Px, t1, t1),
246  	MSUB(Px, t2),
247  	MSUB(Px, t2),
248  
249  	/*
250  	 * Compute z' = 2*y*z.
251  	 */
252  	MMUL(t4, Py, Pz),
253  	MSET(Pz, t4),
254  	MADD(Pz, t4),
255  
256  	/*
257  	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
258  	 * 2*y^2 in t3.
259  	 */
260  	MSUB(t2, Px),
261  	MMUL(Py, t1, t2),
262  	MMUL(t4, t3, t3),
263  	MSUB(Py, t4),
264  	MSUB(Py, t4),
265  
266  	ENDCODE
267  };
268  
269  /*
270   * Addtions formulas are:
271   *
272   *   u1 = x1 * z2^2
273   *   u2 = x2 * z1^2
274   *   s1 = y1 * z2^3
275   *   s2 = y2 * z1^3
276   *   h = u2 - u1
277   *   r = s2 - s1
278   *   x3 = r^2 - h^3 - 2 * u1 * h^2
279   *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
280   *   z3 = h * z1 * z2
281   *
282   * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
283   * z3 == 0, so the result is correct.
284   * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
285   * not correct.
286   * h == 0 only if u1 == u2; this happens in two cases:
287   * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
288   * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
289   *
290   * Thus, the following situations are not handled correctly:
291   * -- P1 = 0 and P2 != 0
292   * -- P1 != 0 and P2 = 0
293   * -- P1 = P2
294   * All other cases are properly computed. However, even in "incorrect"
295   * situations, the three coordinates still are properly formed field
296   * elements.
297   *
298   * The returned flag is cleared if r == 0. This happens in the following
299   * cases:
300   * -- Both points are on the same horizontal line (same Y coordinate).
301   * -- Both points are infinity.
302   * -- One point is infinity and the other is on line Y = 0.
303   * The third case cannot happen with our curves (there is no valid point
304   * on line Y = 0 since that would be a point of order 2). If the two
305   * source points are non-infinity, then remains only the case where the
306   * two points are on the same horizontal line.
307   *
308   * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
309   * P2 != 0:
310   * -- If the returned value is not the point at infinity, then it was properly
311   * computed.
312   * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
313   * is indeed the point at infinity.
314   * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
315   * use the 'double' code.
316   *
317   * Cost: 16 multiplications
318   */
319  static const uint16_t code_add[] = {
320  	/*
321  	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
322  	 */
323  	MMUL(t3, P2z, P2z),
324  	MMUL(t1, P1x, t3),
325  	MMUL(t4, P2z, t3),
326  	MMUL(t3, P1y, t4),
327  
328  	/*
329  	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
330  	 */
331  	MMUL(t4, P1z, P1z),
332  	MMUL(t2, P2x, t4),
333  	MMUL(t5, P1z, t4),
334  	MMUL(t4, P2y, t5),
335  
336  	/*
337  	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
338  	 */
339  	MSUB(t2, t1),
340  	MSUB(t4, t3),
341  
342  	/*
343  	 * Report cases where r = 0 through the returned flag.
344  	 */
345  	MTZ(t4),
346  
347  	/*
348  	 * Compute u1*h^2 (in t6) and h^3 (in t5).
349  	 */
350  	MMUL(t7, t2, t2),
351  	MMUL(t6, t1, t7),
352  	MMUL(t5, t7, t2),
353  
354  	/*
355  	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
356  	 * t1 and t7 can be used as scratch registers.
357  	 */
358  	MMUL(P1x, t4, t4),
359  	MSUB(P1x, t5),
360  	MSUB(P1x, t6),
361  	MSUB(P1x, t6),
362  
363  	/*
364  	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
365  	 */
366  	MSUB(t6, P1x),
367  	MMUL(P1y, t4, t6),
368  	MMUL(t1, t5, t3),
369  	MSUB(P1y, t1),
370  
371  	/*
372  	 * Compute z3 = h*z1*z2.
373  	 */
374  	MMUL(t1, P1z, P2z),
375  	MMUL(P1z, t1, t2),
376  
377  	ENDCODE
378  };
379  
380  /*
381   * Check that the point is on the curve. This code snippet assumes the
382   * following conventions:
383   * -- Coordinates x and y have been freshly decoded in P1 (but not
384   * converted to Montgomery coordinates yet).
385   * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
386   */
387  static const uint16_t code_check[] = {
388  
389  	/* Convert x and y to Montgomery representation. */
390  	MMUL(t1, P1x, P2x),
391  	MMUL(t2, P1y, P2x),
392  	MSET(P1x, t1),
393  	MSET(P1y, t2),
394  
395  	/* Compute x^3 in t1. */
396  	MMUL(t2, P1x, P1x),
397  	MMUL(t1, P1x, t2),
398  
399  	/* Subtract 3*x from t1. */
400  	MSUB(t1, P1x),
401  	MSUB(t1, P1x),
402  	MSUB(t1, P1x),
403  
404  	/* Add b. */
405  	MADD(t1, P2y),
406  
407  	/* Compute y^2 in t2. */
408  	MMUL(t2, P1y, P1y),
409  
410  	/* Compare y^2 with x^3 - 3*x + b; they must match. */
411  	MSUB(t1, t2),
412  	MTZ(t1),
413  
414  	/* Set z to 1 (in Montgomery representation). */
415  	MMUL(P1z, P2x, P2z),
416  
417  	ENDCODE
418  };
419  
420  /*
421   * Conversion back to affine coordinates. This code snippet assumes that
422   * the z coordinate of P2 is set to 1 (not in Montgomery representation).
423   */
424  static const uint16_t code_affine[] = {
425  
426  	/* Save z*R in t1. */
427  	MSET(t1, P1z),
428  
429  	/* Compute z^3 in t2. */
430  	MMUL(t2, P1z, P1z),
431  	MMUL(t3, P1z, t2),
432  	MMUL(t2, t3, P2z),
433  
434  	/* Invert to (1/z^3) in t2. */
435  	MINV(t2, t3, t4),
436  
437  	/* Compute y. */
438  	MSET(t3, P1y),
439  	MMUL(P1y, t2, t3),
440  
441  	/* Compute (1/z^2) in t3. */
442  	MMUL(t3, t2, t1),
443  
444  	/* Compute x. */
445  	MSET(t2, P1x),
446  	MMUL(P1x, t2, t3),
447  
448  	ENDCODE
449  };
450  
451  static uint32_t
452  run_code(jacobian *P1, const jacobian *P2,
453  	const curve_params *cc, const uint16_t *code)
454  {
455  	uint32_t r;
456  	uint32_t t[13][I31_LEN];
457  	size_t u;
458  
459  	r = 1;
460  
461  	/*
462  	 * Copy the two operands in the dedicated registers.
463  	 */
464  	memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
465  	memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
466  
467  	/*
468  	 * Run formulas.
469  	 */
470  	for (u = 0;; u ++) {
471  		unsigned op, d, a, b;
472  
473  		op = code[u];
474  		if (op == 0) {
475  			break;
476  		}
477  		d = (op >> 8) & 0x0F;
478  		a = (op >> 4) & 0x0F;
479  		b = op & 0x0F;
480  		op >>= 12;
481  		switch (op) {
482  			uint32_t ctl;
483  			size_t plen;
484  			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
485  
486  		case 0:
487  			memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
488  			break;
489  		case 1:
490  			ctl = br_i31_add(t[d], t[a], 1);
491  			ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
492  			br_i31_sub(t[d], cc->p, ctl);
493  			break;
494  		case 2:
495  			br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
496  			break;
497  		case 3:
498  			br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
499  			break;
500  		case 4:
501  			plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
502  			br_i31_encode(tp, plen, cc->p);
503  			tp[plen - 1] -= 2;
504  			br_i31_modpow(t[d], tp, plen,
505  				cc->p, cc->p0i, t[a], t[b]);
506  			break;
507  		default:
508  			r &= ~br_i31_iszero(t[d]);
509  			break;
510  		}
511  	}
512  
513  	/*
514  	 * Copy back result.
515  	 */
516  	memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
517  	return r;
518  }
519  
520  static void
521  set_one(uint32_t *x, const uint32_t *p)
522  {
523  	size_t plen;
524  
525  	plen = (p[0] + 63) >> 5;
526  	memset(x, 0, plen * sizeof *x);
527  	x[0] = p[0];
528  	x[1] = 0x00000001;
529  }
530  
531  static void
532  point_zero(jacobian *P, const curve_params *cc)
533  {
534  	memset(P, 0, sizeof *P);
535  	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
536  }
537  
538  static inline void
539  point_double(jacobian *P, const curve_params *cc)
540  {
541  	run_code(P, P, cc, code_double);
542  }
543  
544  static inline uint32_t
545  point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
546  {
547  	return run_code(P1, P2, cc, code_add);
548  }
549  
550  static void
551  point_mul(jacobian *P, const unsigned char *x, size_t xlen,
552  	const curve_params *cc)
553  {
554  	/*
555  	 * We do a simple double-and-add ladder with a 2-bit window
556  	 * to make only one add every two doublings. We thus first
557  	 * precompute 2P and 3P in some local buffers.
558  	 *
559  	 * We always perform two doublings and one addition; the
560  	 * addition is with P, 2P and 3P and is done in a temporary
561  	 * array.
562  	 *
563  	 * The addition code cannot handle cases where one of the
564  	 * operands is infinity, which is the case at the start of the
565  	 * ladder. We therefore need to maintain a flag that controls
566  	 * this situation.
567  	 */
568  	uint32_t qz;
569  	jacobian P2, P3, Q, T, U;
570  
571  	memcpy(&P2, P, sizeof P2);
572  	point_double(&P2, cc);
573  	memcpy(&P3, P, sizeof P3);
574  	point_add(&P3, &P2, cc);
575  
576  	point_zero(&Q, cc);
577  	qz = 1;
578  	while (xlen -- > 0) {
579  		int k;
580  
581  		for (k = 6; k >= 0; k -= 2) {
582  			uint32_t bits;
583  			uint32_t bnz;
584  
585  			point_double(&Q, cc);
586  			point_double(&Q, cc);
587  			memcpy(&T, P, sizeof T);
588  			memcpy(&U, &Q, sizeof U);
589  			bits = (*x >> k) & (uint32_t)3;
590  			bnz = NEQ(bits, 0);
591  			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
592  			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
593  			point_add(&U, &T, cc);
594  			CCOPY(bnz & qz, &Q, &T, sizeof Q);
595  			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
596  			qz &= ~bnz;
597  		}
598  		x ++;
599  	}
600  	memcpy(P, &Q, sizeof Q);
601  }
602  
603  /*
604   * Decode point into Jacobian coordinates. This function does not support
605   * the point at infinity. If the point is invalid then this returns 0, but
606   * the coordinates are still set to properly formed field elements.
607   */
608  static uint32_t
609  point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
610  {
611  	/*
612  	 * Points must use uncompressed format:
613  	 * -- first byte is 0x04;
614  	 * -- coordinates X and Y use unsigned big-endian, with the same
615  	 *    length as the field modulus.
616  	 *
617  	 * We don't support hybrid format (uncompressed, but first byte
618  	 * has value 0x06 or 0x07, depending on the least significant bit
619  	 * of Y) because it is rather useless, and explicitly forbidden
620  	 * by PKIX (RFC 5480, section 2.2).
621  	 *
622  	 * We don't support compressed format either, because it is not
623  	 * much used in practice (there are or were patent-related
624  	 * concerns about point compression, which explains the lack of
625  	 * generalised support). Also, point compression support would
626  	 * need a bit more code.
627  	 */
628  	const unsigned char *buf;
629  	size_t plen, zlen;
630  	uint32_t r;
631  	jacobian Q;
632  
633  	buf = src;
634  	point_zero(P, cc);
635  	plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
636  	if (len != 1 + (plen << 1)) {
637  		return 0;
638  	}
639  	r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
640  	r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
641  
642  	/*
643  	 * Check first byte.
644  	 */
645  	r &= EQ(buf[0], 0x04);
646  	/* obsolete
647  	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
648  		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
649  	*/
650  
651  	/*
652  	 * Convert coordinates and check that the point is valid.
653  	 */
654  	zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
655  	memcpy(Q.c[0], cc->R2, zlen);
656  	memcpy(Q.c[1], cc->b, zlen);
657  	set_one(Q.c[2], cc->p);
658  	r &= ~run_code(P, &Q, cc, code_check);
659  	return r;
660  }
661  
662  /*
663   * Encode a point. This method assumes that the point is correct and is
664   * not the point at infinity. Encoded size is always 1+2*plen, where
665   * plen is the field modulus length, in bytes.
666   */
667  static void
668  point_encode(void *dst, const jacobian *P, const curve_params *cc)
669  {
670  	unsigned char *buf;
671  	uint32_t xbl;
672  	size_t plen;
673  	jacobian Q, T;
674  
675  	buf = dst;
676  	xbl = cc->p[0];
677  	xbl -= (xbl >> 5);
678  	plen = (xbl + 7) >> 3;
679  	buf[0] = 0x04;
680  	memcpy(&Q, P, sizeof *P);
681  	set_one(T.c[2], cc->p);
682  	run_code(&Q, &T, cc, code_affine);
683  	br_i31_encode(buf + 1, plen, Q.c[0]);
684  	br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
685  }
686  
687  static const br_ec_curve_def *
688  id_to_curve_def(int curve)
689  {
690  	switch (curve) {
691  	case BR_EC_secp256r1:
692  		return &br_secp256r1;
693  	case BR_EC_secp384r1:
694  		return &br_secp384r1;
695  	case BR_EC_secp521r1:
696  		return &br_secp521r1;
697  	}
698  	return NULL;
699  }
700  
701  static const unsigned char *
702  api_generator(int curve, size_t *len)
703  {
704  	const br_ec_curve_def *cd;
705  
706  	cd = id_to_curve_def(curve);
707  	*len = cd->generator_len;
708  	return cd->generator;
709  }
710  
711  static const unsigned char *
712  api_order(int curve, size_t *len)
713  {
714  	const br_ec_curve_def *cd;
715  
716  	cd = id_to_curve_def(curve);
717  	*len = cd->order_len;
718  	return cd->order;
719  }
720  
721  static size_t
722  api_xoff(int curve, size_t *len)
723  {
724  	api_generator(curve, len);
725  	*len >>= 1;
726  	return 1;
727  }
728  
729  static uint32_t
730  api_mul(unsigned char *G, size_t Glen,
731  	const unsigned char *x, size_t xlen, int curve)
732  {
733  	uint32_t r;
734  	const curve_params *cc;
735  	jacobian P;
736  
737  	cc = id_to_curve(curve);
738  	if (Glen != cc->point_len) {
739  		return 0;
740  	}
741  	r = point_decode(&P, G, Glen, cc);
742  	point_mul(&P, x, xlen, cc);
743  	point_encode(G, &P, cc);
744  	return r;
745  }
746  
747  static size_t
748  api_mulgen(unsigned char *R,
749  	const unsigned char *x, size_t xlen, int curve)
750  {
751  	const unsigned char *G;
752  	size_t Glen;
753  
754  	G = api_generator(curve, &Glen);
755  	memcpy(R, G, Glen);
756  	api_mul(R, Glen, x, xlen, curve);
757  	return Glen;
758  }
759  
760  static uint32_t
761  api_muladd(unsigned char *A, const unsigned char *B, size_t len,
762  	const unsigned char *x, size_t xlen,
763  	const unsigned char *y, size_t ylen, int curve)
764  {
765  	uint32_t r, t, z;
766  	const curve_params *cc;
767  	jacobian P, Q;
768  
769  	/*
770  	 * TODO: see about merging the two ladders. Right now, we do
771  	 * two independent point multiplications, which is a bit
772  	 * wasteful of CPU resources (but yields short code).
773  	 */
774  
775  	cc = id_to_curve(curve);
776  	if (len != cc->point_len) {
777  		return 0;
778  	}
779  	r = point_decode(&P, A, len, cc);
780  	if (B == NULL) {
781  		size_t Glen;
782  
783  		B = api_generator(curve, &Glen);
784  	}
785  	r &= point_decode(&Q, B, len, cc);
786  	point_mul(&P, x, xlen, cc);
787  	point_mul(&Q, y, ylen, cc);
788  
789  	/*
790  	 * We want to compute P+Q. Since the base points A and B are distinct
791  	 * from infinity, and the multipliers are non-zero and lower than the
792  	 * curve order, then we know that P and Q are non-infinity. This
793  	 * leaves two special situations to test for:
794  	 * -- If P = Q then we must use point_double().
795  	 * -- If P+Q = 0 then we must report an error.
796  	 */
797  	t = point_add(&P, &Q, cc);
798  	point_double(&Q, cc);
799  	z = br_i31_iszero(P.c[2]);
800  
801  	/*
802  	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
803  	 * have the following:
804  	 *
805  	 *   z = 0, t = 0   return P (normal addition)
806  	 *   z = 0, t = 1   return P (normal addition)
807  	 *   z = 1, t = 0   return Q (a 'double' case)
808  	 *   z = 1, t = 1   report an error (P+Q = 0)
809  	 */
810  	CCOPY(z & ~t, &P, &Q, sizeof Q);
811  	point_encode(A, &P, cc);
812  	r &= ~(z & t);
813  
814  	return r;
815  }
816  
817  /* see bearssl_ec.h */
818  const br_ec_impl br_ec_prime_i31 = {
819  	(uint32_t)0x03800000,
820  	&api_generator,
821  	&api_order,
822  	&api_xoff,
823  	&api_mul,
824  	&api_mulgen,
825  	&api_muladd
826  };