/ constantine / platforms / primitives.nim
primitives.nim
  1  # Constantine
  2  # Copyright (c) 2018-2019    Status Research & Development GmbH
  3  # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
  4  # Licensed and distributed under either of
  5  #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
  6  #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
  7  # at your option. This file may not be copied, modified, or distributed except according to those terms.
  8  
  9  import
 10    ./config,
 11    constant_time/[
 12      ct_types,
 13      ct_routines,
 14      multiplexers,
 15      ct_division
 16    ],
 17    intrinsics/[
 18      addcarry_subborrow,
 19      extended_precision
 20    ],
 21    ./bithacks,
 22    ./static_for,
 23    ./allocs
 24  
 25  export
 26    config,
 27    ct_types,
 28    ct_routines,
 29    multiplexers,
 30    addcarry_subborrow,
 31    extended_precision,
 32    ct_division,
 33    bithacks,
 34    staticFor,
 35    allocs
 36  
 37  # Note:
 38  # - cpuinfo_x86 initialize globals with following CPU features detection.
 39  #   This will impact benchmarks that do not need it, such as the threadpool.
 40  
 41  when X86 and GCC_Compatible:
 42    import isa/[cpuinfo_x86, macro_assembler_x86]
 43    export cpuinfo_x86, macro_assembler_x86
 44  
 45  # No exceptions allowed in core cryptographic operations
 46  {.push raises: [].}
 47  {.push checks: off.}
 48  
 49  # ############################################################
 50  #
 51  #                      Instrumentation
 52  #
 53  # ############################################################
 54  
 55  template debug*(body: untyped): untyped =
 56    when defined(CTT_DEBUG):
 57      body
 58  
 59  proc builtin_unreachable(){.nodecl, importc: "__builtin_unreachable".}
 60  
 61  func unreachable*() {.noReturn, inline.} =
 62    doAssert false, "Unreachable"
 63    when GCC_Compatible:
 64      builtin_unreachable()
 65  
 66  # ############################################################
 67  #
 68  #                       Arithmetic
 69  #
 70  # ############################################################
 71  
 72  func ceilDiv_vartime*(a, b: auto): auto {.inline.} =
 73    ## ceil division, to be used only on length or at compile-time
 74    ## ceil(a / b)
 75    # "LengthInDigits: static int" doesn't match "int"
 76    # if "SomeInteger" is used instead of "auto"
 77    (a + b - 1) div b
 78  
 79  # ############################################################
 80  #
 81  #                         Buffers
 82  #
 83  # ############################################################
 84  
 85  func setZero*(a: var openArray[SomeNumber]){.inline.} =
 86    for i in 0 ..< a.len:
 87      a[i] = 0
 88  
 89  func setOne*(a: var openArray[SomeNumber]){.inline.} =
 90    a[0] = 1
 91    for i in 1 ..< a.len:
 92      a[i] = 0
 93  
 94  func asBytes*(s: static string): auto =
 95    ## Reinterpret a compile-time string as an array of bytes
 96    const N = s.len
 97    var r: array[N, byte]
 98    for i in 0 ..< s.len:
 99      r[i] = byte s[i]
100    return r
101  
102  func rawCopy*(
103         dst: var openArray[byte],
104         dStart: SomeInteger,
105         src: openArray[byte],
106         sStart: SomeInteger,
107         len: SomeInteger) {.inline.} =
108    ## Copy dst[dStart ..< dStart+len] = src[sStart ..< sStart+len]
109    ## Unlike the standard library, this cannot throw
110    ## even a defect.
111    debug:
112      doAssert 0 <= dStart and int(dStart+len) <= dst.len, "dStart: " & $dStart & ", dStart+len: " & $(dStart+len) & ", dst.len: " & $dst.len
113      doAssert 0 <= sStart and int(sStart+len) <= src.len, "sStart: " & $sStart & ", sStart+len: " & $(sStart+len) & ", src.len: " & $src.len
114  
115    {.push checks: off.} # No OverflowError or IndexError allowed
116    for i in 0 ..< len:
117      dst[dStart + i] = src[sStart + i]
118  
119  func rotateRight*[N: static int, T](a: var array[N, T]) {.inline.} =
120    # Rotate right (Somehow we can't use a generic template here)
121    # Inline
122    # Hopefully we want the compiler to see that N rounds of rotation
123    # can be optimized away with register renaming
124    let tmp = a[a.len-1]
125    staticForCountdown i, a.len-1, 1:
126      a[i] = a[i-1]
127    a[0] = tmp
128  
129  func rotateLeft*[N: static int, T](a: var array[N, T]) {.inline.} =
130    # Rotate left (Somehow we can't use a generic template here)
131    # Inline
132    # Hopefully we want the compiler to see that N rounds of rotation
133    # can be optimized away with register renaming
134    let tmp = a[0]
135    staticFor i, 0, a.len-1:
136      a[i] = a[i+1]
137    a[a.len-1] = tmp
138  
139  # ############################################################
140  #
141  #                    Pointer arithmetics
142  #
143  # ############################################################
144  
145  template asUnchecked*[T](a: openArray[T]): ptr UncheckedArray[T] =
146    cast[ptr UncheckedArray[T]](a[0].unsafeAddr)
147  
148  # Warning for pointer arithmetics via inline C
149  # be careful of not passing a `var ptr`
150  # to a function as `var` are passed by hidden pointers in Nim and the wrong
151  # pointer will be modified. Templates are fine.
152  
153  func `+%`*(p: ptr or pointer, offset: SomeInteger): type(p) {.inline, noInit.}=
154    ## Pointer increment
155    {.emit: [result, " = ", p, " + ", offset, ";"].}
156  
157  func `+%=`*(p: var (ptr or pointer), offset: SomeInteger){.inline.}=
158    ## Pointer increment
159    p = p +% offset
160  
161  # ############################################################
162  #
163  #                       Prefetching
164  #
165  # ############################################################
166  
167  type
168    PrefetchRW* {.size: cint.sizeof.} = enum
169      Read = 0
170      Write = 1
171    PrefetchLocality* {.size: cint.sizeof.} = enum
172      NoTemporalLocality = 0 # Data can be discarded from CPU cache after access
173      LowTemporalLocality = 1
174      ModerateTemporalLocality = 2
175      HighTemporalLocality = 3 # Data should be left in all levels of cache possible
176      # Translation
177      # 0 - use no cache eviction level
178      # 1 - L1 cache eviction level
179      # 2 - L2 cache eviction level
180      # 3 - L1 and L2 cache eviction level
181  
182  when GCC_Compatible:
183    proc builtin_prefetch(data: pointer, rw: PrefetchRW, locality: PrefetchLocality) {.importc: "__builtin_prefetch", noDecl.}
184  
185  template prefetch*(
186              data: ptr or pointer,
187              rw: static PrefetchRW = Read,
188              locality: static PrefetchLocality = HighTemporalLocality) =
189    ## Prefetch examples:
190    ##   - https://scripts.mit.edu/~birge/blog/accelerating-code-using-gccs-prefetch-extension/
191    ##   - https://stackoverflow.com/questions/7327994/prefetching-examples
192    ##   - https://lemire.me/blog/2018/04/30/is-software-prefetching-__builtin_prefetch-useful-for-performance/
193    ##   - https://www.naftaliharris.com/blog/2x-speedup-with-one-line-of-code/
194    when GCC_Compatible:
195      builtin_prefetch(data, rw, locality)
196    else:
197      discard
198  
199  func prefetchLarge*[T](
200          data: ptr T,
201          rw: static PrefetchRW = Read,
202          locality: static PrefetchLocality = HighTemporalLocality,
203          maxCacheLines: static int = 0) {.inline.} =
204    ## Prefetch a large value
205    let pdata = pointer(data)
206    const span = sizeof(T) div 64 # 64 byte cache line
207    const N = if maxCacheLines == 0: span else: min(span, maxCacheLines)
208    for i in 0 ..< N:
209      prefetch(pdata +% (i*64), rw, locality)