/ constantine / serialization / codecs.nim
codecs.nim
  1  # Constantine
  2  # Copyright (c) 2018-2019    Status Research & Development GmbH
  3  # Copyright (c) 2020-Present Mamy André-Ratsimbazafy
  4  # Licensed and distributed under either of
  5  #   * MIT license (license terms in the root directory or at http://opensource.org/licenses/MIT).
  6  #   * Apache v2 license (license terms in the root directory or at http://www.apache.org/licenses/LICENSE-2.0).
  7  # at your option. This file may not be copied, modified, or distributed except according to those terms.
  8  
  9  import ../platforms/abstractions
 10  
 11  # ############################################################
 12  #
 13  #                         Codecs
 14  #
 15  # ############################################################
 16  
 17  template sw(a: auto): SecretWord = SecretWord(a)
 18  template ssw(a: auto): SignedSecretWord = SignedSecretWord(a)
 19  
 20  # ############################################################
 21  #
 22  #                      Hexadecimal
 23  #
 24  # ############################################################
 25  
 26  func readHexChar(c: char): SecretWord {.inline.} =
 27    ## Converts an hex char to an int
 28    const OOR = ssw 256        # Push chars out-of-range
 29    var c = ssw(c) + OOR
 30  
 31    # '0' -> '9' maps to [0, 9]
 32    c.csub(OOR + ssw('0') - ssw  0, c.isInRangeMask(ssw('0') + OOR, ssw('9') + OOR))
 33    # 'A' -> 'Z' maps to [10, 16)
 34    c.csub(OOR + ssw('A') - ssw 10, c.isInRangeMask(ssw('A') + OOR, ssw('Z') + OOR))
 35    # 'a' -> 'z' maps to [10, 16)
 36    c.csub(OOR + ssw('a') - ssw 10, c.isInRangeMask(ssw('a') + OOR, ssw('z') + OOR))
 37  
 38    c = c and ssw(0xF) # Prevent overflow of invalid inputs
 39    return sw(c)
 40  
 41  func paddedFromHex*(output: var openArray[byte], hexStr: string, order: static[Endianness]) =
 42    ## Read a hex string and store it in a byte array `output`.
 43    ## The string may be shorter than the byte array.
 44    ##
 45    ## The source string must be hex big-endian.
 46    ## The destination array can be big or little endian
 47    ##
 48    ## Only characters accepted are 0x or 0X prefix
 49    ## and 0-9,a-f,A-F in particular spaces and _ are not valid.
 50    ##
 51    ## Procedure is constant-time except for the presence (or absence) of the 0x prefix.
 52    ##
 53    ## This procedure is intended for configuration, prototyping, research and debugging purposes.
 54    ## You MUST NOT use it for production.
 55  
 56    var
 57      skip = Zero
 58      dstIdx: int
 59      shift = 4
 60  
 61    if hexStr.len >= 2:
 62      skip = sw(2)*(
 63        sw(hexStr[0] == '0') and
 64        (sw(hexStr[1] == 'x') or sw(hexStr[1] == 'X'))
 65      )
 66  
 67    let maxStrSize = output.len * 2
 68    let size = hexStr.len - skip.int
 69  
 70    doAssert size <= maxStrSize, "size: " & $size & ", maxSize: " & $maxStrSize
 71  
 72    if size < maxStrSize:
 73      # include extra byte if odd length
 74      dstIdx = output.len - (size + 1) shr 1
 75      # start with shl of 4 if length is even
 76      shift = 4 - (size and 1) * 4
 77  
 78    for srcIdx in skip.int ..< hexStr.len:
 79      let c = hexStr[srcIdx]
 80      let nibble = byte(c.readHexChar() shl shift)
 81      when order == bigEndian:
 82        output[dstIdx] = output[dstIdx] or nibble
 83      else:
 84        output[output.high - dstIdx] = output[output.high - dstIdx] or nibble
 85      shift = (shift + 4) and 4
 86      dstIdx += shift shr 2
 87  
 88  func toHex*(bytes: openarray[byte]): string =
 89    ## Convert a byte-array to its hex representation
 90    ## Output is in lowercase and prefixed with 0x
 91    const hexChars = "0123456789abcdef"
 92    result = newString(2 + 2 * bytes.len)
 93    result[0] = '0'
 94    result[1] = 'x'
 95    for i in 0 ..< bytes.len:
 96      let bi = bytes[i]
 97      result[2 + 2*i] = hexChars.secretLookup(SecretWord bi shr 4 and 0xF)
 98      result[2 + 2*i+1] = hexChars.secretLookup(SecretWord bi and 0xF)
 99  
100  func fromHex*(dst: var openArray[byte], hex: string) =
101    dst.paddedFromHex(hex, bigEndian)
102  
103  func fromHex*[N: static int](T: type array[N, byte], hex: string): T =
104    result.paddedFromHex(hex, bigEndian)
105  
106  
107  # ############################################################
108  #
109  #                        Base64
110  #
111  # ############################################################
112  
113  func base64_decode(
114         dst: var openArray[byte],
115         src: openArray[char]): int {.used.} =
116    ## Decode a Base64 string/bytearray input into
117    ## an octet string
118    ## This procedure is constant-time, except for new lines, padding and invalid base64 characters
119    ##
120    ## Returns -1 if the buffer is too small
121    ## or the number of bytes written.
122    ## Bytes are written from the start of the buffer
123  
124    # TODO: unexposed, missing comprehensive test suite.
125  
126    var s, d = 0
127    var vals: array[4, SecretWord]
128    var bytes: array[3, byte]
129  
130    while s < src.len and d < dst.len:
131      var padding = ssw 0
132  
133      for i in 0 ..< 4:
134        const OOR = ssw 256        # Push chars out-of-range
135  
136        var c = ssw(src[s]) + OOR
137        s += 1
138  
139        # 'A' -> 'Z' maps to [0, 26)
140        c.csub(OOR + ssw('A'),          c.isInRangeMask(ssw('A') + OOR, ssw('Z') + OOR))
141        # 'a' -> 'z' maps to [26, 52)
142        c.csub(OOR + ssw('a') - ssw 26, c.isInRangeMask(ssw('a') + OOR, ssw('z') + OOR))
143        # '0' -> '9' maps to [52, 61)
144        c.csub(OOR + ssw('0') - ssw 52, c.isInRangeMask(ssw('0') + OOR, ssw('9') + OOR))
145        # '+' maps to 62
146        c.csub(OOR + ssw('+') - ssw 62, c.isInRangeMask(ssw('+') + OOR, ssw('+') + OOR))
147        # '/' maps to 63
148        c.csub(OOR + ssw('/') - ssw 63, c.isInRangeMask(ssw('/') + OOR, ssw('/') + OOR))
149        # '=' is padding and everything else is ignored
150        padding.cadd(ssw 1, c.isInRangeMask(ssw('=') + OOR, ssw('=') + OOR))
151  
152        # https://www.rfc-editor.org/rfc/rfc7468#section-2
153        # "Furthermore, parsers SHOULD ignore whitespace and other non-
154        #  base64 characters and MUST handle different newline conventions."
155        #
156        # Unfortunately, there is no way to deal with newlines, padding and invalid characters
157        # without revealing that they exist when we do not increment the destination index
158        if c.int >= OOR.int:
159          continue
160  
161        vals[i] = SecretWord(c)
162  
163      bytes[0] = byte((vals[0] shl 2) or (vals[1] shr 4))
164      bytes[1] = byte((vals[1] shl 4) or (vals[2] shr 2))
165      bytes[2] = byte((vals[2] shl 6) or  vals[3]       )
166  
167  
168      for i in 0 ..< 3 - padding.int:
169        if d >= dst.len:
170          return -1
171        dst[d] = bytes[i]
172        d += 1
173    return d