/ benchmarks / bench_fp_double_precision.nim
bench_fp_double_precision.nim
1 # Constantine 2 # Copyright (c) 2018-2019 Status Research & Development GmbH 3 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy 4 # Licensed and distributed under either of 5 # * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). 6 # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). 7 # at your option. This file may not be copied, modified, or distributed except according to those terms. 8 9 # ############################################################ 10 # 11 # Benchmark of finite fields 12 # 13 # ############################################################ 14 15 import 16 # Internals 17 ../constantine/platforms/abstractions, 18 ../constantine/math/config/curves, 19 ../constantine/math/arithmetic, 20 ../constantine/math/extension_fields, 21 # Helpers 22 ../helpers/prng_unsafe, 23 ./platforms, 24 # Standard library 25 std/[monotimes, times, strformat, strutils] 26 27 var rng: RngState 28 let seed = uint32(getTime().toUnix() and (1'i64 shl 32 - 1)) # unixTime mod 2^32 29 rng.seed(seed) 30 echo "bench xoshiro512** seed: ", seed 31 32 # warmup 33 proc warmup*() = 34 # Warmup - make sure cpu is on max perf 35 let start = cpuTime() 36 var foo = 123 37 for i in 0 ..< 300_000_000: 38 foo += i*i mod 456 39 foo = foo mod 789 40 41 # Compiler shouldn't optimize away the results as cpuTime rely on sideeffects 42 let stop = cpuTime() 43 echo &"Warmup: {stop - start:>4.4f} s, result {foo} (displayed to avoid compiler optimizing warmup away)\n" 44 45 warmup() 46 47 when defined(gcc): 48 echo "\nCompiled with GCC" 49 elif defined(clang): 50 echo "\nCompiled with Clang" 51 elif defined(vcc): 52 echo "\nCompiled with MSVC" 53 elif defined(icc): 54 echo "\nCompiled with ICC" 55 else: 56 echo "\nCompiled with an unknown compiler" 57 58 echo "Optimization level => " 59 echo " no optimization: ", not defined(release) 60 echo " release: ", defined(release) 61 echo " danger: ", defined(danger) 62 echo " inline assembly: ", UseASM_X86_64 63 64 when CTT_32: 65 echo "⚠️ Warning: using Constantine with 32-bit limbs" 66 else: 67 echo "Using Constantine with 64-bit limbs" 68 69 when SupportsCPUName: 70 echo "Running on ", cpuName(), "" 71 72 when SupportsGetTicks: 73 echo "\n⚠️ Cycles measurements are approximate and use the CPU nominal clock: Turbo-Boost and overclocking will skew them." 74 echo "i.e. a 20% overclock will be about 20% off (assuming no dynamic frequency scaling)" 75 76 echo "\n=================================================================================================================\n" 77 78 proc separator*() = 79 echo "-".repeat(145) 80 81 proc report(op, field: string, start, stop: MonoTime, startClk, stopClk: int64, iters: int) = 82 let ns = inNanoseconds((stop-start) div iters) 83 let throughput = 1e9 / float64(ns) 84 when SupportsGetTicks: 85 echo &"{op:<28} {field:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op {(stopClk - startClk) div iters:>9} CPU cycles (approx)" 86 else: 87 echo &"{op:<28} {field:<40} {throughput:>15.3f} ops/s {ns:>9} ns/op" 88 89 proc notes*() = 90 echo "Notes:" 91 echo " - Compilers:" 92 echo " Compilers are severely limited on multiprecision arithmetic." 93 echo " Constantine compile-time assembler is used by default (nimble bench_fp)." 94 echo " GCC is significantly slower than Clang on multiprecision arithmetic due to catastrophic handling of carries." 95 echo " GCC also seems to have issues with large temporaries and register spilling." 96 echo " This is somewhat alleviated by Constantine compile-time assembler." 97 echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc\" or \"nimble bench_ec_g1_clang\"." 98 echo " Bench on specific compiler with assembler: \"nimble bench_ec_g1_gcc_noasm\" or \"nimble bench_ec_g1_clang_noasm\"." 99 echo " - The simplest operations might be optimized away by the compiler." 100 101 template bench(op: string, desc: string, iters: int, body: untyped): untyped = 102 let start = getMonotime() 103 when SupportsGetTicks: 104 let startClk = getTicks() 105 for _ in 0 ..< iters: 106 body 107 when SupportsGetTicks: 108 let stopClk = getTicks() 109 let stop = getMonotime() 110 111 when not SupportsGetTicks: 112 let startClk = -1'i64 113 let stopClk = -1'i64 114 115 report(op, desc, start, stop, startClk, stopClk, iters) 116 117 func random_unsafe(rng: var RngState, a: var FpDbl, Base: typedesc) = 118 ## Initialize a standalone Double-Width field element 119 ## we don't reduce it modulo p², this is only used for benchmark 120 let aHi = rng.random_unsafe(Base) 121 let aLo = rng.random_unsafe(Base) 122 for i in 0 ..< aLo.mres.limbs.len: 123 a.limbs2x[i] = aLo.mres.limbs[i] 124 for i in 0 ..< aHi.mres.limbs.len: 125 a.limbs2x[aLo.mres.limbs.len+i] = aHi.mres.limbs[i] 126 127 proc sumUnr(T: typedesc, iters: int) = 128 var r: T 129 let a = rng.random_unsafe(T) 130 let b = rng.random_unsafe(T) 131 bench("Addition unreduced", $T, iters): 132 r.sumUnr(a, b) 133 134 proc sum(T: typedesc, iters: int) = 135 var r: T 136 let a = rng.random_unsafe(T) 137 let b = rng.random_unsafe(T) 138 bench("Addition", $T, iters): 139 r.sum(a, b) 140 141 proc diffUnr(T: typedesc, iters: int) = 142 var r: T 143 let a = rng.random_unsafe(T) 144 let b = rng.random_unsafe(T) 145 bench("Substraction unreduced", $T, iters): 146 r.diffUnr(a, b) 147 148 proc diff(T: typedesc, iters: int) = 149 var r: T 150 let a = rng.random_unsafe(T) 151 let b = rng.random_unsafe(T) 152 bench("Substraction", $T, iters): 153 r.diff(a, b) 154 155 proc neg(T: typedesc, iters: int) = 156 var r: T 157 let a = rng.random_unsafe(T) 158 bench("Negation", $T, iters): 159 r.neg(a) 160 161 proc sum2xUnreduce(T: typedesc, iters: int) = 162 var r, a, b: doublePrec(T) 163 rng.random_unsafe(r, T) 164 rng.random_unsafe(a, T) 165 rng.random_unsafe(b, T) 166 bench("Addition 2x unreduced", $doublePrec(T), iters): 167 r.sum2xUnr(a, b) 168 169 proc sum2x(T: typedesc, iters: int) = 170 var r, a, b: doublePrec(T) 171 rng.random_unsafe(r, T) 172 rng.random_unsafe(a, T) 173 rng.random_unsafe(b, T) 174 bench("Addition 2x reduced", $doublePrec(T), iters): 175 r.sum2xMod(a, b) 176 177 proc diff2xUnreduce(T: typedesc, iters: int) = 178 var r, a, b: doublePrec(T) 179 rng.random_unsafe(r, T) 180 rng.random_unsafe(a, T) 181 rng.random_unsafe(b, T) 182 bench("Substraction 2x unreduced", $doublePrec(T), iters): 183 r.diff2xUnr(a, b) 184 185 proc diff2x(T: typedesc, iters: int) = 186 var r, a, b: doublePrec(T) 187 rng.random_unsafe(r, T) 188 rng.random_unsafe(a, T) 189 rng.random_unsafe(b, T) 190 bench("Substraction 2x reduced", $doublePrec(T), iters): 191 r.diff2xMod(a, b) 192 193 proc neg2x(T: typedesc, iters: int) = 194 var r, a: doublePrec(T) 195 rng.random_unsafe(a, T) 196 bench("Negation 2x reduced", $doublePrec(T), iters): 197 r.neg2xMod(a) 198 199 proc prod2xBench*(rLen, aLen, bLen: static int, iters: int) = 200 var r: BigInt[rLen] 201 let a = rng.random_unsafe(BigInt[aLen]) 202 let b = rng.random_unsafe(BigInt[bLen]) 203 bench("Multiplication 2x", $rLen & " <- " & $aLen & " x " & $bLen, iters): 204 r.prod(a, b) 205 206 proc square2xBench*(rLen, aLen: static int, iters: int) = 207 var r: BigInt[rLen] 208 let a = rng.random_unsafe(BigInt[aLen]) 209 bench("Squaring 2x", $rLen & " <- " & $aLen & "²", iters): 210 r.square(a) 211 212 proc reduce2x*(T: typedesc, iters: int) = 213 var r: T 214 var t: doublePrec(T) 215 rng.random_unsafe(t, T) 216 217 bench("Redc 2x", $T & " <- " & $doublePrec(T), iters): 218 r.redc2x(t) 219 220 proc reduce2xViaDivision*(T: typedesc, iters: int) = 221 222 const bits2x = 2 * T.C.getCurveBitWidth() 223 var r: matchingBigInt(T.C) 224 let t = rng.random_unsafe(BigInt[bits2x]) 225 226 bench("Reduction via division", $T & " <- " & $doublePrec(T), iters): 227 r.reduce(t, T.fieldMod()) 228 229 proc main() = 230 separator() 231 sum(Fp[BLS12_381], iters = 10_000_000) 232 sumUnr(Fp[BLS12_381], iters = 10_000_000) 233 diff(Fp[BLS12_381], iters = 10_000_000) 234 diffUnr(Fp[BLS12_381], iters = 10_000_000) 235 neg(Fp[BLS12_381], iters = 10_000_000) 236 separator() 237 sum2x(Fp[BLS12_381], iters = 10_000_000) 238 sum2xUnreduce(Fp[BLS12_381], iters = 10_000_000) 239 diff2x(Fp[BLS12_381], iters = 10_000_000) 240 diff2xUnreduce(Fp[BLS12_381], iters = 10_000_000) 241 neg2x(Fp[BLS12_381], iters = 10_000_000) 242 separator() 243 prod2xBench(512, 256, 256, iters = 10_000_000) 244 prod2xBench(768, 384, 384, iters = 10_000_000) 245 square2xBench(512, 256, iters = 10_000_000) 246 square2xBench(768, 384, iters = 10_000_000) 247 reduce2x(Fp[BN254_Snarks], iters = 10_000_000) 248 reduce2x(Fp[BLS12_381], iters = 10_000_000) 249 reduce2xViaDivision(Fp[BN254_Snarks], iters = 10_000) 250 reduce2xViaDivision(Fp[BLS12_381], iters = 10_000) 251 separator() 252 253 main() 254 notes()