bench_ecmult.c
1 /*********************************************************************** 2 * Copyright (c) 2017 Pieter Wuille * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.* 5 ***********************************************************************/ 6 #include <stdio.h> 7 #include <stdlib.h> 8 9 #include "secp256k1.c" 10 #include "../include/secp256k1.h" 11 12 #include "util.h" 13 #include "hash_impl.h" 14 #include "field_impl.h" 15 #include "group_impl.h" 16 #include "scalar_impl.h" 17 #include "ecmult_impl.h" 18 #include "bench.h" 19 20 #define POINTS 32768 21 22 static void help(const char *executable_path, int default_iters) { 23 printf("Benchmark EC multiplication algorithms\n"); 24 printf("\n"); 25 printf("The default number of iterations for each benchmark is %d. This can be\n", default_iters); 26 printf("customized using the SECP256K1_BENCH_ITERS environment variable.\n"); 27 printf("\n"); 28 printf("Usage: %s [args]\n", executable_path); 29 printf("The output shows the number of multiplied and summed points right after the\n"); 30 printf("function name. The letter 'g' indicates that one of the points is the generator.\n"); 31 printf("The benchmarks are divided by the number of points.\n"); 32 printf("\n"); 33 printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n"); 34 printf(" batch size\n"); 35 printf("pippenger_wnaf: for all batch sizes\n"); 36 printf("strauss_wnaf: for all batch sizes\n"); 37 printf("simple: multiply and sum each point individually\n"); 38 } 39 40 typedef struct { 41 /* Setup once in advance */ 42 secp256k1_context* ctx; 43 secp256k1_scratch_space* scratch; 44 secp256k1_scalar* scalars; 45 secp256k1_ge* pubkeys; 46 secp256k1_gej* pubkeys_gej; 47 secp256k1_scalar* seckeys; 48 secp256k1_gej* expected_output; 49 secp256k1_ecmult_multi_func ecmult_multi; 50 51 /* Changes per benchmark */ 52 size_t count; 53 int includes_g; 54 55 /* Changes per benchmark iteration, used to pick different scalars and pubkeys 56 * in each run. */ 57 size_t offset1; 58 size_t offset2; 59 60 /* Benchmark output. */ 61 secp256k1_gej* output; 62 secp256k1_fe* output_xonly; 63 } bench_data; 64 65 /* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */ 66 static void hash_into_offset(bench_data* data, size_t x) { 67 data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS; 68 data->offset2 = (x * 0x7f6f537b + 0x6a1a8f49) % POINTS; 69 } 70 71 /* Check correctness of the benchmark by computing 72 * sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */ 73 static void bench_ecmult_teardown_helper(bench_data* data, size_t* seckey_offset, size_t* scalar_offset, size_t* scalar_gen_offset, int iters) { 74 int i; 75 secp256k1_gej sum_output, tmp; 76 secp256k1_scalar sum_scalars; 77 78 secp256k1_gej_set_infinity(&sum_output); 79 secp256k1_scalar_set_int(&sum_scalars, 0); 80 for (i = 0; i < iters; ++i) { 81 secp256k1_gej_add_var(&sum_output, &sum_output, &data->output[i], NULL); 82 if (scalar_gen_offset != NULL) { 83 secp256k1_scalar_add(&sum_scalars, &sum_scalars, &data->scalars[(*scalar_gen_offset+i) % POINTS]); 84 } 85 if (seckey_offset != NULL) { 86 secp256k1_scalar s = data->seckeys[(*seckey_offset+i) % POINTS]; 87 secp256k1_scalar_mul(&s, &s, &data->scalars[(*scalar_offset+i) % POINTS]); 88 secp256k1_scalar_add(&sum_scalars, &sum_scalars, &s); 89 } 90 } 91 secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &tmp, &sum_scalars); 92 CHECK(secp256k1_gej_eq_var(&tmp, &sum_output)); 93 } 94 95 static void bench_ecmult_setup(void* arg) { 96 bench_data* data = (bench_data*)arg; 97 /* Re-randomize offset to ensure that we're using different scalars and 98 * group elements in each run. */ 99 hash_into_offset(data, data->offset1); 100 } 101 102 static void bench_ecmult_gen(void* arg, int iters) { 103 bench_data* data = (bench_data*)arg; 104 int i; 105 106 for (i = 0; i < iters; ++i) { 107 secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &data->output[i], &data->scalars[(data->offset1+i) % POINTS]); 108 } 109 } 110 111 static void bench_ecmult_gen_teardown(void* arg, int iters) { 112 bench_data* data = (bench_data*)arg; 113 bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters); 114 } 115 116 static void bench_ecmult_const(void* arg, int iters) { 117 bench_data* data = (bench_data*)arg; 118 int i; 119 120 for (i = 0; i < iters; ++i) { 121 secp256k1_ecmult_const(&data->output[i], &data->pubkeys[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS]); 122 } 123 } 124 125 static void bench_ecmult_const_teardown(void* arg, int iters) { 126 bench_data* data = (bench_data*)arg; 127 bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters); 128 } 129 130 static void bench_ecmult_const_xonly(void* arg, int iters) { 131 bench_data* data = (bench_data*)arg; 132 int i; 133 134 for (i = 0; i < iters; ++i) { 135 const secp256k1_ge* pubkey = &data->pubkeys[(data->offset1+i) % POINTS]; 136 const secp256k1_scalar* scalar = &data->scalars[(data->offset2+i) % POINTS]; 137 int known_on_curve = 1; 138 secp256k1_ecmult_const_xonly(&data->output_xonly[i], &pubkey->x, NULL, scalar, known_on_curve); 139 } 140 } 141 142 static void bench_ecmult_const_xonly_teardown(void* arg, int iters) { 143 bench_data* data = (bench_data*)arg; 144 int i; 145 146 /* verify by comparing with x coordinate of regular ecmult result */ 147 for (i = 0; i < iters; ++i) { 148 const secp256k1_gej* pubkey_gej = &data->pubkeys_gej[(data->offset1+i) % POINTS]; 149 const secp256k1_scalar* scalar = &data->scalars[(data->offset2+i) % POINTS]; 150 secp256k1_gej expected_gej; 151 secp256k1_ecmult(&expected_gej, pubkey_gej, scalar, NULL); 152 CHECK(secp256k1_gej_eq_x_var(&data->output_xonly[i], &expected_gej)); 153 } 154 } 155 156 static void bench_ecmult_1p(void* arg, int iters) { 157 bench_data* data = (bench_data*)arg; 158 int i; 159 160 for (i = 0; i < iters; ++i) { 161 secp256k1_ecmult(&data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], NULL); 162 } 163 } 164 165 static void bench_ecmult_1p_teardown(void* arg, int iters) { 166 bench_data* data = (bench_data*)arg; 167 bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters); 168 } 169 170 static void bench_ecmult_0p_g(void* arg, int iters) { 171 bench_data* data = (bench_data*)arg; 172 int i; 173 174 for (i = 0; i < iters; ++i) { 175 secp256k1_ecmult(&data->output[i], NULL, &secp256k1_scalar_zero, &data->scalars[(data->offset1+i) % POINTS]); 176 } 177 } 178 179 static void bench_ecmult_0p_g_teardown(void* arg, int iters) { 180 bench_data* data = (bench_data*)arg; 181 bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters); 182 } 183 184 static void bench_ecmult_1p_g(void* arg, int iters) { 185 bench_data* data = (bench_data*)arg; 186 int i; 187 188 for (i = 0; i < iters/2; ++i) { 189 secp256k1_ecmult(&data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], &data->scalars[(data->offset1+i) % POINTS]); 190 } 191 } 192 193 static void bench_ecmult_1p_g_teardown(void* arg, int iters) { 194 bench_data* data = (bench_data*)arg; 195 bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, &data->offset1, iters/2); 196 } 197 198 static void run_ecmult_bench(bench_data* data, int iters) { 199 char str[32]; 200 sprintf(str, "ecmult_gen"); 201 run_benchmark(str, bench_ecmult_gen, bench_ecmult_setup, bench_ecmult_gen_teardown, data, 10, iters); 202 sprintf(str, "ecmult_const"); 203 run_benchmark(str, bench_ecmult_const, bench_ecmult_setup, bench_ecmult_const_teardown, data, 10, iters); 204 sprintf(str, "ecmult_const_xonly"); 205 run_benchmark(str, bench_ecmult_const_xonly, bench_ecmult_setup, bench_ecmult_const_xonly_teardown, data, 10, iters); 206 /* ecmult with non generator point */ 207 sprintf(str, "ecmult_1p"); 208 run_benchmark(str, bench_ecmult_1p, bench_ecmult_setup, bench_ecmult_1p_teardown, data, 10, iters); 209 /* ecmult with generator point */ 210 sprintf(str, "ecmult_0p_g"); 211 run_benchmark(str, bench_ecmult_0p_g, bench_ecmult_setup, bench_ecmult_0p_g_teardown, data, 10, iters); 212 /* ecmult with generator and non-generator point. The reported time is per point. */ 213 sprintf(str, "ecmult_1p_g"); 214 run_benchmark(str, bench_ecmult_1p_g, bench_ecmult_setup, bench_ecmult_1p_g_teardown, data, 10, 2*iters); 215 } 216 217 static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) { 218 bench_data* data = (bench_data*)arg; 219 if (data->includes_g) ++idx; 220 if (idx == 0) { 221 *sc = data->scalars[data->offset1]; 222 *ge = secp256k1_ge_const_g; 223 } else { 224 *sc = data->scalars[(data->offset1 + idx) % POINTS]; 225 *ge = data->pubkeys[(data->offset2 + idx - 1) % POINTS]; 226 } 227 return 1; 228 } 229 230 static void bench_ecmult_multi(void* arg, int iters) { 231 bench_data* data = (bench_data*)arg; 232 233 int includes_g = data->includes_g; 234 int iter; 235 int count = data->count; 236 iters = iters / data->count; 237 238 for (iter = 0; iter < iters; ++iter) { 239 data->ecmult_multi(&data->ctx->error_callback, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g); 240 data->offset1 = (data->offset1 + count) % POINTS; 241 data->offset2 = (data->offset2 + count - 1) % POINTS; 242 } 243 } 244 245 static void bench_ecmult_multi_setup(void* arg) { 246 bench_data* data = (bench_data*)arg; 247 hash_into_offset(data, data->count); 248 } 249 250 static void bench_ecmult_multi_teardown(void* arg, int iters) { 251 bench_data* data = (bench_data*)arg; 252 int iter; 253 iters = iters / data->count; 254 /* Verify the results in teardown, to avoid doing comparisons while benchmarking. */ 255 for (iter = 0; iter < iters; ++iter) { 256 secp256k1_gej tmp; 257 secp256k1_gej_add_var(&tmp, &data->output[iter], &data->expected_output[iter], NULL); 258 CHECK(secp256k1_gej_is_infinity(&tmp)); 259 } 260 } 261 262 static void generate_scalar(const secp256k1_context *ctx, uint32_t num, secp256k1_scalar* scalar) { 263 secp256k1_sha256 sha256; 264 unsigned char c[10] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0}; 265 unsigned char buf[32]; 266 int overflow = 0; 267 c[6] = num; 268 c[7] = num >> 8; 269 c[8] = num >> 16; 270 c[9] = num >> 24; 271 secp256k1_sha256_initialize(&sha256); 272 secp256k1_sha256_write(secp256k1_get_hash_context(ctx), &sha256, c, sizeof(c)); 273 secp256k1_sha256_finalize(secp256k1_get_hash_context(ctx), &sha256, buf); 274 secp256k1_scalar_set_b32(scalar, buf, &overflow); 275 CHECK(!overflow); 276 } 277 278 static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_g, int num_iters) { 279 char str[32]; 280 size_t iters = 1 + num_iters / count; 281 size_t iter; 282 283 data->count = count; 284 data->includes_g = includes_g; 285 286 /* Compute (the negation of) the expected results directly. */ 287 hash_into_offset(data, data->count); 288 for (iter = 0; iter < iters; ++iter) { 289 secp256k1_scalar tmp; 290 secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS]; 291 size_t i = 0; 292 for (i = 0; i + 1 < count; ++i) { 293 secp256k1_scalar_mul(&tmp, &data->seckeys[(data->offset2++) % POINTS], &data->scalars[(data->offset1++) % POINTS]); 294 secp256k1_scalar_add(&total, &total, &tmp); 295 } 296 secp256k1_scalar_negate(&total, &total); 297 secp256k1_ecmult(&data->expected_output[iter], NULL, &secp256k1_scalar_zero, &total); 298 } 299 300 /* Run the benchmark. */ 301 if (includes_g) { 302 sprintf(str, "ecmult_multi_%ip_g", (int)count - 1); 303 } else { 304 sprintf(str, "ecmult_multi_%ip", (int)count); 305 } 306 run_benchmark(str, bench_ecmult_multi, bench_ecmult_multi_setup, bench_ecmult_multi_teardown, data, 10, count * iters); 307 } 308 309 int main(int argc, char **argv) { 310 bench_data data; 311 int i, p; 312 size_t scratch_size; 313 314 int default_iters = 10000; 315 int iters = get_iters(default_iters); 316 if (iters == 0) { 317 help(argv[0], default_iters); 318 return EXIT_FAILURE; 319 } 320 321 data.ecmult_multi = secp256k1_ecmult_multi_var; 322 323 if (argc > 1) { 324 if(have_flag(argc, argv, "-h") 325 || have_flag(argc, argv, "--help") 326 || have_flag(argc, argv, "help")) { 327 help(argv[0], default_iters); 328 return EXIT_SUCCESS; 329 } else if(have_flag(argc, argv, "pippenger_wnaf")) { 330 printf("Using pippenger_wnaf:\n"); 331 data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single; 332 } else if(have_flag(argc, argv, "strauss_wnaf")) { 333 printf("Using strauss_wnaf:\n"); 334 data.ecmult_multi = secp256k1_ecmult_strauss_batch_single; 335 } else if(have_flag(argc, argv, "simple")) { 336 printf("Using simple algorithm:\n"); 337 } else { 338 fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]); 339 help(argv[0], default_iters); 340 return EXIT_FAILURE; 341 } 342 } 343 344 data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_NONE); 345 scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*ALIGNMENT; 346 if (!have_flag(argc, argv, "simple")) { 347 data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size); 348 } else { 349 data.scratch = NULL; 350 } 351 352 /* Allocate stuff */ 353 data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS); 354 data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS); 355 data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS); 356 data.pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS); 357 data.expected_output = malloc(sizeof(secp256k1_gej) * (iters + 1)); 358 data.output = malloc(sizeof(secp256k1_gej) * (iters + 1)); 359 data.output_xonly = malloc(sizeof(secp256k1_fe) * (iters + 1)); 360 361 /* Generate a set of scalars, and private/public keypairs. */ 362 secp256k1_gej_set_ge(&data.pubkeys_gej[0], &secp256k1_ge_const_g); 363 secp256k1_scalar_set_int(&data.seckeys[0], 1); 364 for (i = 0; i < POINTS; ++i) { 365 generate_scalar(data.ctx, i, &data.scalars[i]); 366 if (i) { 367 secp256k1_gej_double_var(&data.pubkeys_gej[i], &data.pubkeys_gej[i - 1], NULL); 368 secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]); 369 } 370 } 371 secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS); 372 373 374 print_output_table_header_row(); 375 /* Initialize offset1 and offset2 */ 376 hash_into_offset(&data, 0); 377 run_ecmult_bench(&data, iters); 378 379 for (i = 1; i <= 8; ++i) { 380 run_ecmult_multi_bench(&data, i, 1, iters); 381 } 382 383 /* This is disabled with low count of iterations because the loop runs 77 times even with iters=1 384 * and the higher it goes the longer the computation takes(more points) 385 * So we don't run this benchmark with low iterations to prevent slow down */ 386 if (iters > 2) { 387 for (p = 0; p <= 11; ++p) { 388 for (i = 9; i <= 16; ++i) { 389 run_ecmult_multi_bench(&data, i << p, 1, iters); 390 } 391 } 392 } else { 393 printf("Skipping some benchmarks due to SECP256K1_BENCH_ITERS <= 2\n"); 394 } 395 396 if (data.scratch != NULL) { 397 secp256k1_scratch_space_destroy(data.ctx, data.scratch); 398 } 399 secp256k1_context_destroy(data.ctx); 400 free(data.scalars); 401 free(data.pubkeys); 402 free(data.pubkeys_gej); 403 free(data.seckeys); 404 free(data.output_xonly); 405 free(data.output); 406 free(data.expected_output); 407 408 return EXIT_SUCCESS; 409 }