primitives.nim
1 # Constantine 2 # Copyright (c) 2018-2019 Status Research & Development GmbH 3 # Copyright (c) 2020-Present Mamy André-Ratsimbazafy 4 # Licensed and distributed under either of 5 # * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT). 6 # * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0). 7 # at your option. This file may not be copied, modified, or distributed except according to those terms. 8 9 import 10 ./config, 11 constant_time/[ 12 ct_types, 13 ct_routines, 14 multiplexers, 15 ct_division 16 ], 17 intrinsics/[ 18 addcarry_subborrow, 19 extended_precision 20 ], 21 ./bithacks, 22 ./static_for, 23 ./allocs 24 25 export 26 config, 27 ct_types, 28 ct_routines, 29 multiplexers, 30 addcarry_subborrow, 31 extended_precision, 32 ct_division, 33 bithacks, 34 staticFor, 35 allocs 36 37 # Note: 38 # - cpuinfo_x86 initialize globals with following CPU features detection. 39 # This will impact benchmarks that do not need it, such as the threadpool. 40 41 when X86 and GCC_Compatible: 42 import isa/[cpuinfo_x86, macro_assembler_x86] 43 export cpuinfo_x86, macro_assembler_x86 44 45 # No exceptions allowed in core cryptographic operations 46 {.push raises: [].} 47 {.push checks: off.} 48 49 # ############################################################ 50 # 51 # Instrumentation 52 # 53 # ############################################################ 54 55 template debug*(body: untyped): untyped = 56 when defined(CTT_DEBUG): 57 body 58 59 proc builtin_unreachable(){.nodecl, importc: "__builtin_unreachable".} 60 61 func unreachable*() {.noReturn, inline.} = 62 doAssert false, "Unreachable" 63 when GCC_Compatible: 64 builtin_unreachable() 65 66 # ############################################################ 67 # 68 # Arithmetic 69 # 70 # ############################################################ 71 72 func ceilDiv_vartime*(a, b: auto): auto {.inline.} = 73 ## ceil division, to be used only on length or at compile-time 74 ## ceil(a / b) 75 # "LengthInDigits: static int" doesn't match "int" 76 # if "SomeInteger" is used instead of "auto" 77 (a + b - 1) div b 78 79 # ############################################################ 80 # 81 # Buffers 82 # 83 # ############################################################ 84 85 func setZero*(a: var openArray[SomeNumber]){.inline.} = 86 for i in 0 ..< a.len: 87 a[i] = 0 88 89 func setOne*(a: var openArray[SomeNumber]){.inline.} = 90 a[0] = 1 91 for i in 1 ..< a.len: 92 a[i] = 0 93 94 func asBytes*(s: static string): auto = 95 ## Reinterpret a compile-time string as an array of bytes 96 const N = s.len 97 var r: array[N, byte] 98 for i in 0 ..< s.len: 99 r[i] = byte s[i] 100 return r 101 102 func rawCopy*( 103 dst: var openArray[byte], 104 dStart: SomeInteger, 105 src: openArray[byte], 106 sStart: SomeInteger, 107 len: SomeInteger) {.inline.} = 108 ## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len] 109 ## Unlike the standard library, this cannot throw 110 ## even a defect. 111 debug: 112 doAssert 0 <= dStart and int(dStart+len) <= dst.len, "dStart: " & $dStart & ", dStart+len: " & $(dStart+len) & ", dst.len: " & $dst.len 113 doAssert 0 <= sStart and int(sStart+len) <= src.len, "sStart: " & $sStart & ", sStart+len: " & $(sStart+len) & ", src.len: " & $src.len 114 115 {.push checks: off.} # No OverflowError or IndexError allowed 116 for i in 0 ..< len: 117 dst[dStart + i] = src[sStart + i] 118 119 func rotateRight*[N: static int, T](a: var array[N, T]) {.inline.} = 120 # Rotate right (Somehow we can't use a generic template here) 121 # Inline 122 # Hopefully we want the compiler to see that N rounds of rotation 123 # can be optimized away with register renaming 124 let tmp = a[a.len-1] 125 staticForCountdown i, a.len-1, 1: 126 a[i] = a[i-1] 127 a[0] = tmp 128 129 func rotateLeft*[N: static int, T](a: var array[N, T]) {.inline.} = 130 # Rotate left (Somehow we can't use a generic template here) 131 # Inline 132 # Hopefully we want the compiler to see that N rounds of rotation 133 # can be optimized away with register renaming 134 let tmp = a[0] 135 staticFor i, 0, a.len-1: 136 a[i] = a[i+1] 137 a[a.len-1] = tmp 138 139 # ############################################################ 140 # 141 # Pointer arithmetics 142 # 143 # ############################################################ 144 145 template asUnchecked*[T](a: openArray[T]): ptr UncheckedArray[T] = 146 cast[ptr UncheckedArray[T]](a[0].unsafeAddr) 147 148 # Warning for pointer arithmetics via inline C 149 # be careful of not passing a `var ptr` 150 # to a function as `var` are passed by hidden pointers in Nim and the wrong 151 # pointer will be modified. Templates are fine. 152 153 func `+%`*(p: ptr or pointer, offset: SomeInteger): type(p) {.inline, noInit.}= 154 ## Pointer increment 155 {.emit: [result, " = ", p, " + ", offset, ";"].} 156 157 func `+%=`*(p: var (ptr or pointer), offset: SomeInteger){.inline.}= 158 ## Pointer increment 159 p = p +% offset 160 161 # ############################################################ 162 # 163 # Prefetching 164 # 165 # ############################################################ 166 167 type 168 PrefetchRW* {.size: cint.sizeof.} = enum 169 Read = 0 170 Write = 1 171 PrefetchLocality* {.size: cint.sizeof.} = enum 172 NoTemporalLocality = 0 # Data can be discarded from CPU cache after access 173 LowTemporalLocality = 1 174 ModerateTemporalLocality = 2 175 HighTemporalLocality = 3 # Data should be left in all levels of cache possible 176 # Translation 177 # 0 - use no cache eviction level 178 # 1 - L1 cache eviction level 179 # 2 - L2 cache eviction level 180 # 3 - L1 and L2 cache eviction level 181 182 when GCC_Compatible: 183 proc builtin_prefetch(data: pointer, rw: PrefetchRW, locality: PrefetchLocality) {.importc: "__builtin_prefetch", noDecl.} 184 185 template prefetch*( 186 data: ptr or pointer, 187 rw: static PrefetchRW = Read, 188 locality: static PrefetchLocality = HighTemporalLocality) = 189 ## Prefetch examples: 190 ## - https://scripts.mit.edu/~birge/blog/accelerating-code-using-gccs-prefetch-extension/ 191 ## - https://stackoverflow.com/questions/7327994/prefetching-examples 192 ## - https://lemire.me/blog/2018/04/30/is-software-prefetching-__builtin_prefetch-useful-for-performance/ 193 ## - https://www.naftaliharris.com/blog/2x-speedup-with-one-line-of-code/ 194 when GCC_Compatible: 195 builtin_prefetch(data, rw, locality) 196 else: 197 discard 198 199 func prefetchLarge*[T]( 200 data: ptr T, 201 rw: static PrefetchRW = Read, 202 locality: static PrefetchLocality = HighTemporalLocality, 203 maxCacheLines: static int = 0) {.inline.} = 204 ## Prefetch a large value 205 let pdata = pointer(data) 206 const span = sizeof(T) div 64 # 64 byte cache line 207 const N = if maxCacheLines == 0: span else: min(span, maxCacheLines) 208 for i in 0 ..< N: 209 prefetch(pdata +% (i*64), rw, locality)