/ src / secp256k1 / src / bench_ecmult.c
bench_ecmult.c
  1  /***********************************************************************
  2   * Copyright (c) 2017 Pieter Wuille                                    *
  3   * Distributed under the MIT software license, see the accompanying    *
  4   * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
  5   ***********************************************************************/
  6  #include <stdio.h>
  7  #include <stdlib.h>
  8  
  9  #include "secp256k1.c"
 10  #include "../include/secp256k1.h"
 11  
 12  #include "util.h"
 13  #include "hash_impl.h"
 14  #include "field_impl.h"
 15  #include "group_impl.h"
 16  #include "scalar_impl.h"
 17  #include "ecmult_impl.h"
 18  #include "bench.h"
 19  
 20  #define POINTS 32768
 21  
 22  static void help(const char *executable_path, int default_iters) {
 23      printf("Benchmark EC multiplication algorithms\n");
 24      printf("\n");
 25      printf("The default number of iterations for each benchmark is %d. This can be\n", default_iters);
 26      printf("customized using the SECP256K1_BENCH_ITERS environment variable.\n");
 27      printf("\n");
 28      printf("Usage: %s [args]\n", executable_path);
 29      printf("The output shows the number of multiplied and summed points right after the\n");
 30      printf("function name. The letter 'g' indicates that one of the points is the generator.\n");
 31      printf("The benchmarks are divided by the number of points.\n");
 32      printf("\n");
 33      printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n");
 34      printf("                        batch size\n");
 35      printf("pippenger_wnaf:         for all batch sizes\n");
 36      printf("strauss_wnaf:           for all batch sizes\n");
 37      printf("simple:                 multiply and sum each point individually\n");
 38  }
 39  
 40  typedef struct {
 41      /* Setup once in advance */
 42      secp256k1_context* ctx;
 43      secp256k1_scratch_space* scratch;
 44      secp256k1_scalar* scalars;
 45      secp256k1_ge* pubkeys;
 46      secp256k1_gej* pubkeys_gej;
 47      secp256k1_scalar* seckeys;
 48      secp256k1_gej* expected_output;
 49      secp256k1_ecmult_multi_func ecmult_multi;
 50  
 51      /* Changes per benchmark */
 52      size_t count;
 53      int includes_g;
 54  
 55      /* Changes per benchmark iteration, used to pick different scalars and pubkeys
 56       * in each run. */
 57      size_t offset1;
 58      size_t offset2;
 59  
 60      /* Benchmark output. */
 61      secp256k1_gej* output;
 62      secp256k1_fe* output_xonly;
 63  } bench_data;
 64  
 65  /* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
 66  static void hash_into_offset(bench_data* data, size_t x) {
 67      data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS;
 68      data->offset2 = (x * 0x7f6f537b + 0x6a1a8f49) % POINTS;
 69  }
 70  
 71  /* Check correctness of the benchmark by computing
 72   * sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */
 73  static void bench_ecmult_teardown_helper(bench_data* data, size_t* seckey_offset, size_t* scalar_offset, size_t* scalar_gen_offset, int iters) {
 74      int i;
 75      secp256k1_gej sum_output, tmp;
 76      secp256k1_scalar sum_scalars;
 77  
 78      secp256k1_gej_set_infinity(&sum_output);
 79      secp256k1_scalar_set_int(&sum_scalars, 0);
 80      for (i = 0; i < iters; ++i) {
 81          secp256k1_gej_add_var(&sum_output, &sum_output, &data->output[i], NULL);
 82          if (scalar_gen_offset != NULL) {
 83              secp256k1_scalar_add(&sum_scalars, &sum_scalars, &data->scalars[(*scalar_gen_offset+i) % POINTS]);
 84          }
 85          if (seckey_offset != NULL) {
 86              secp256k1_scalar s = data->seckeys[(*seckey_offset+i) % POINTS];
 87              secp256k1_scalar_mul(&s, &s, &data->scalars[(*scalar_offset+i) % POINTS]);
 88              secp256k1_scalar_add(&sum_scalars, &sum_scalars, &s);
 89          }
 90      }
 91      secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &tmp, &sum_scalars);
 92      CHECK(secp256k1_gej_eq_var(&tmp, &sum_output));
 93  }
 94  
 95  static void bench_ecmult_setup(void* arg) {
 96      bench_data* data = (bench_data*)arg;
 97      /* Re-randomize offset to ensure that we're using different scalars and
 98       * group elements in each run. */
 99      hash_into_offset(data, data->offset1);
100  }
101  
102  static void bench_ecmult_gen(void* arg, int iters) {
103      bench_data* data = (bench_data*)arg;
104      int i;
105  
106      for (i = 0; i < iters; ++i) {
107          secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &data->output[i], &data->scalars[(data->offset1+i) % POINTS]);
108      }
109  }
110  
111  static void bench_ecmult_gen_teardown(void* arg, int iters) {
112      bench_data* data = (bench_data*)arg;
113      bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
114  }
115  
116  static void bench_ecmult_const(void* arg, int iters) {
117      bench_data* data = (bench_data*)arg;
118      int i;
119  
120      for (i = 0; i < iters; ++i) {
121          secp256k1_ecmult_const(&data->output[i], &data->pubkeys[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS]);
122      }
123  }
124  
125  static void bench_ecmult_const_teardown(void* arg, int iters) {
126      bench_data* data = (bench_data*)arg;
127      bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
128  }
129  
130  static void bench_ecmult_const_xonly(void* arg, int iters) {
131      bench_data* data = (bench_data*)arg;
132      int i;
133  
134      for (i = 0; i < iters; ++i) {
135          const secp256k1_ge* pubkey = &data->pubkeys[(data->offset1+i) % POINTS];
136          const secp256k1_scalar* scalar = &data->scalars[(data->offset2+i) % POINTS];
137          int known_on_curve = 1;
138          secp256k1_ecmult_const_xonly(&data->output_xonly[i], &pubkey->x, NULL, scalar, known_on_curve);
139      }
140  }
141  
142  static void bench_ecmult_const_xonly_teardown(void* arg, int iters) {
143      bench_data* data = (bench_data*)arg;
144      int i;
145  
146      /* verify by comparing with x coordinate of regular ecmult result */
147      for (i = 0; i < iters; ++i) {
148          const secp256k1_gej* pubkey_gej = &data->pubkeys_gej[(data->offset1+i) % POINTS];
149          const secp256k1_scalar* scalar = &data->scalars[(data->offset2+i) % POINTS];
150          secp256k1_gej expected_gej;
151          secp256k1_ecmult(&expected_gej, pubkey_gej, scalar, NULL);
152          CHECK(secp256k1_gej_eq_x_var(&data->output_xonly[i], &expected_gej));
153      }
154  }
155  
156  static void bench_ecmult_1p(void* arg, int iters) {
157      bench_data* data = (bench_data*)arg;
158      int i;
159  
160      for (i = 0; i < iters; ++i) {
161          secp256k1_ecmult(&data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], NULL);
162      }
163  }
164  
165  static void bench_ecmult_1p_teardown(void* arg, int iters) {
166      bench_data* data = (bench_data*)arg;
167      bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
168  }
169  
170  static void bench_ecmult_0p_g(void* arg, int iters) {
171      bench_data* data = (bench_data*)arg;
172      int i;
173  
174      for (i = 0; i < iters; ++i) {
175          secp256k1_ecmult(&data->output[i], NULL, &secp256k1_scalar_zero, &data->scalars[(data->offset1+i) % POINTS]);
176      }
177  }
178  
179  static void bench_ecmult_0p_g_teardown(void* arg, int iters) {
180      bench_data* data = (bench_data*)arg;
181      bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
182  }
183  
184  static void bench_ecmult_1p_g(void* arg, int iters) {
185      bench_data* data = (bench_data*)arg;
186      int i;
187  
188      for (i = 0; i < iters/2; ++i) {
189          secp256k1_ecmult(&data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], &data->scalars[(data->offset1+i) % POINTS]);
190      }
191  }
192  
193  static void bench_ecmult_1p_g_teardown(void* arg, int iters) {
194      bench_data* data = (bench_data*)arg;
195      bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, &data->offset1, iters/2);
196  }
197  
198  static void run_ecmult_bench(bench_data* data, int iters) {
199      char str[32];
200      sprintf(str, "ecmult_gen");
201      run_benchmark(str, bench_ecmult_gen, bench_ecmult_setup, bench_ecmult_gen_teardown, data, 10, iters);
202      sprintf(str, "ecmult_const");
203      run_benchmark(str, bench_ecmult_const, bench_ecmult_setup, bench_ecmult_const_teardown, data, 10, iters);
204      sprintf(str, "ecmult_const_xonly");
205      run_benchmark(str, bench_ecmult_const_xonly, bench_ecmult_setup, bench_ecmult_const_xonly_teardown, data, 10, iters);
206      /* ecmult with non generator point */
207      sprintf(str, "ecmult_1p");
208      run_benchmark(str, bench_ecmult_1p, bench_ecmult_setup, bench_ecmult_1p_teardown, data, 10, iters);
209      /* ecmult with generator point */
210      sprintf(str, "ecmult_0p_g");
211      run_benchmark(str, bench_ecmult_0p_g, bench_ecmult_setup, bench_ecmult_0p_g_teardown, data, 10, iters);
212      /* ecmult with generator and non-generator point. The reported time is per point. */
213      sprintf(str, "ecmult_1p_g");
214      run_benchmark(str, bench_ecmult_1p_g, bench_ecmult_setup, bench_ecmult_1p_g_teardown, data, 10, 2*iters);
215  }
216  
217  static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
218      bench_data* data = (bench_data*)arg;
219      if (data->includes_g) ++idx;
220      if (idx == 0) {
221          *sc = data->scalars[data->offset1];
222          *ge = secp256k1_ge_const_g;
223      } else {
224          *sc = data->scalars[(data->offset1 + idx) % POINTS];
225          *ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS];
226      }
227      return 1;
228  }
229  
230  static void bench_ecmult_multi(void* arg, int iters) {
231      bench_data* data = (bench_data*)arg;
232  
233      int includes_g = data->includes_g;
234      int iter;
235      int count = data->count;
236      iters = iters / data->count;
237  
238      for (iter = 0; iter < iters; ++iter) {
239          data->ecmult_multi(&data->ctx->error_callback, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g);
240          data->offset1 = (data->offset1 + count) % POINTS;
241          data->offset2 = (data->offset2 + count - 1) % POINTS;
242      }
243  }
244  
245  static void bench_ecmult_multi_setup(void* arg) {
246      bench_data* data = (bench_data*)arg;
247      hash_into_offset(data, data->count);
248  }
249  
250  static void bench_ecmult_multi_teardown(void* arg, int iters) {
251      bench_data* data = (bench_data*)arg;
252      int iter;
253      iters = iters / data->count;
254      /* Verify the results in teardown, to avoid doing comparisons while benchmarking. */
255      for (iter = 0; iter < iters; ++iter) {
256          secp256k1_gej tmp;
257          secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL);
258          CHECK(secp256k1_gej_is_infinity(&tmp));
259      }
260  }
261  
262  static void generate_scalar(const secp256k1_context *ctx, uint32_t num, secp256k1_scalar* scalar) {
263      secp256k1_sha256 sha256;
264      unsigned char c[10] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
265      unsigned char buf[32];
266      int overflow = 0;
267      c[6] = num;
268      c[7] = num >> 8;
269      c[8] = num >> 16;
270      c[9] = num >> 24;
271      secp256k1_sha256_initialize(&sha256);
272      secp256k1_sha256_write(secp256k1_get_hash_context(ctx), &sha256, c, sizeof(c));
273      secp256k1_sha256_finalize(secp256k1_get_hash_context(ctx), &sha256, buf);
274      secp256k1_scalar_set_b32(scalar, buf, &overflow);
275      CHECK(!overflow);
276  }
277  
278  static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_g, int num_iters) {
279      char str[32];
280      size_t iters = 1 + num_iters / count;
281      size_t iter;
282  
283      data->count = count;
284      data->includes_g = includes_g;
285  
286      /* Compute (the negation of) the expected results directly. */
287      hash_into_offset(data, data->count);
288      for (iter = 0; iter < iters; ++iter) {
289          secp256k1_scalar tmp;
290          secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
291          size_t i = 0;
292          for (i = 0; i + 1 < count; ++i) {
293              secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]);
294              secp256k1_scalar_add(&total, &total, &tmp);
295          }
296          secp256k1_scalar_negate(&total, &total);
297          secp256k1_ecmult(&data->expected_output[iter], NULL, &secp256k1_scalar_zero, &total);
298      }
299  
300      /* Run the benchmark. */
301      if (includes_g) {
302          sprintf(str, "ecmult_multi_%ip_g", (int)count - 1);
303      } else {
304          sprintf(str, "ecmult_multi_%ip", (int)count);
305      }
306      run_benchmark(str, bench_ecmult_multi, bench_ecmult_multi_setup, bench_ecmult_multi_teardown, data, 10, count * iters);
307  }
308  
309  int main(int argc, char **argv) {
310      bench_data data;
311      int i, p;
312      size_t scratch_size;
313  
314      int default_iters = 10000;
315      int iters = get_iters(default_iters);
316      if (iters == 0) {
317          help(argv[0], default_iters);
318          return EXIT_FAILURE;
319      }
320  
321      data.ecmult_multi = secp256k1_ecmult_multi_var;
322  
323      if (argc > 1) {
324          if(have_flag(argc, argv, "-h")
325             || have_flag(argc, argv, "--help")
326             || have_flag(argc, argv, "help")) {
327              help(argv[0], default_iters);
328              return EXIT_SUCCESS;
329          } else if(have_flag(argc, argv, "pippenger_wnaf")) {
330              printf("Using pippenger_wnaf:\n");
331              data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
332          } else if(have_flag(argc, argv, "strauss_wnaf")) {
333              printf("Using strauss_wnaf:\n");
334              data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
335          } else if(have_flag(argc, argv, "simple")) {
336              printf("Using simple algorithm:\n");
337          } else {
338              fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]);
339              help(argv[0], default_iters);
340              return EXIT_FAILURE;
341          }
342      }
343  
344      data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_NONE);
345      scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT;
346      if (!have_flag(argc, argv, "simple")) {
347          data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
348      } else {
349          data.scratch = NULL;
350      }
351  
352      /* Allocate stuff */
353      data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
354      data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
355      data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
356      data.pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
357      data.expected_output = malloc(sizeof(secp256k1_gej) * (iters + 1));
358      data.output = malloc(sizeof(secp256k1_gej) * (iters + 1));
359      data.output_xonly = malloc(sizeof(secp256k1_fe) * (iters + 1));
360  
361      /* Generate a set of scalars, and private/public keypairs. */
362      secp256k1_gej_set_ge(&data.pubkeys_gej[0], &secp256k1_ge_const_g);
363      secp256k1_scalar_set_int(&data.seckeys[0], 1);
364      for (i = 0; i < POINTS; ++i) {
365          generate_scalar(data.ctx, i, &data.scalars[i]);
366          if (i) {
367              secp256k1_gej_double_var(&data.pubkeys_gej[i], &data.pubkeys_gej[i - 1], NULL);
368              secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
369          }
370      }
371      secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS);
372  
373  
374      print_output_table_header_row();
375      /* Initialize offset1 and offset2 */
376      hash_into_offset(&data, 0);
377      run_ecmult_bench(&data, iters);
378  
379      for (i = 1; i <= 8; ++i) {
380          run_ecmult_multi_bench(&data, i, 1, iters);
381      }
382  
383      /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
384      * and the higher it goes the longer the computation takes(more points)
385      * So we don't run this benchmark with low iterations to prevent slow down */
386       if (iters > 2) {
387          for (p = 0; p <= 11; ++p) {
388              for (i = 9; i <= 16; ++i) {
389                  run_ecmult_multi_bench(&data, i << p, 1, iters);
390              }
391          }
392      } else {
393          printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n");
394      }
395  
396      if (data.scratch != NULL) {
397          secp256k1_scratch_space_destroy(data.ctx, data.scratch);
398      }
399      secp256k1_context_destroy(data.ctx);
400      free(data.scalars);
401      free(data.pubkeys);
402      free(data.pubkeys_gej);
403      free(data.seckeys);
404      free(data.output_xonly);
405      free(data.output);
406      free(data.expected_output);
407  
408      return EXIT_SUCCESS;
409  }