/ yaml / hints.nim
hints.nim
  1  #            NimYAML - YAML implementation in Nim
  2  #        (c) Copyright 2015-2023 Felix Krause
  3  #
  4  #    See the file "copying.txt", included in this
  5  #    distribution, for details about the copyright.
  6  
  7  ## =================
  8  ## Module yaml/hints
  9  ## =================
 10  ##
 11  ## The hints API enables you to guess the type of YAML scalars.
 12  
 13  import macros
 14  import private/internal
 15  
 16  type
 17    TypeHint* = enum
 18      ## A type hint can be computed from scalar content and tells you what
 19      ## NimYAML thinks the scalar's type is. It is generated by
 20      ## `guessType <#guessType,string>`_ The first matching RegEx
 21      ## in the following table will be the type hint of a scalar string.
 22      ##
 23      ## You can use it to determine the type of YAML scalars that have a '?'
 24      ## non-specific tag, but using this feature is completely optional.
 25      ##
 26      ## See also: https://yaml.org/spec/1.2.2/#103-core-schema
 27      ##
 28      ## ================== =========================
 29      ## Name               RegEx
 30      ## ================== =========================
 31      ## ``yTypeInteger``   ``[-+]? [0-9]+``
 32      ## ``yTypeFloat``     ``[-+]? ( \. [0-9]+ | [0-9]+ ( \. [0-9]* )? ) ( [eE] [-+]? [0-9]+ )?``
 33      ## ``yTypeFloatInf``  ``[-+]? ( \.inf | \.Inf | \.INF )``
 34      ## ``yTypeFloatNaN``  ``\.nan | \.NaN | \.NAN``
 35      ## ``yTypeBoolTrue``  ``true | True | TRUE``
 36      ## ``yTypeBoolFalse`` ``false | False | FALSE``
 37      ## ``yTypeNull``      ``null | Null | NULL | ~``
 38      ## ``yTypeTimestamp`` see `here <http://yaml.org/type/timestamp.html>`_.
 39      ## ``yTypeUnknown``   ``*``
 40      ## ================== =========================
 41      yTypeInteger, yTypeFloat, yTypeFloatInf, yTypeFloatNaN, yTypeBoolTrue,
 42      yTypeBoolFalse, yTypeNull, yTypeUnknown, yTypeTimestamp
 43  
 44    YamlTypeHintState = enum
 45      ythInitial,
 46      ythF, ythFA, ythFAL, ythFALS, ythFALSE,
 47      ythN, ythNU, ythNUL, ythNULL,
 48      ythT, ythTR, ythTRU, ythTRUE,
 49  
 50      ythPoint, ythPointI, ythPointIN, ythPointINF,
 51                ythPointN, ythPointNA, ythPointNAN,
 52  
 53      ythLowerF, ythLowerFA, ythLowerFAL, ythLowerFALS,
 54      ythLowerN, ythLowerNU, ythLowerNUL,
 55      ythLowerT, ythLowerTR, ythLowerTRU,
 56  
 57      ythPointLowerI, ythPointLowerIN,
 58      ythPointLowerN, ythPointLowerNA,
 59  
 60      ythMinus, ythPlus, ythInt1, ythInt2, ythInt3, ythInt4, ythInt,
 61      ythDecimal, ythNumE, ythNumEPlusMinus, ythExponent,
 62  
 63      ythYearMinus, ythMonth1, ythMonth2, ythMonthMinus, ythMonthMinusNoYmd,
 64      ythDay1, ythDay1NoYmd, ythDay2, ythDay2NoYmd,
 65      ythAfterDayT, ythAfterDaySpace, ythHour1, ythHour2, ythHourColon,
 66      ythMinute1, ythMinute2, ythMinuteColon, ythSecond1, ythSecond2, ythFraction,
 67      ythAfterTimeSpace, ythAfterTimeZ, ythAfterTimePlusMinus, ythTzHour1,
 68      ythTzHour2, ythTzHourColon, ythTzMinute1, ythTzMinute2
 69  
 70  macro typeHintStateMachine(c: untyped, content: varargs[untyped]) =
 71    yAssert content.kind == nnkArgList
 72    result = newNimNode(nnkCaseStmt, content).add(copyNimNode(c))
 73    for branch in content.children:
 74      yAssert branch.kind == nnkOfBranch
 75      var
 76        charBranch = newNimNode(nnkOfBranch, branch)
 77        i = 0
 78        stateBranches = newNimNode(nnkCaseStmt, branch).add(
 79            newIdentNode("typeHintState"))
 80      while branch[i].kind != nnkStmtList:
 81        charBranch.add(copyNimTree(branch[i]))
 82        inc(i)
 83      for rule in branch[i].children:
 84        yAssert rule.kind == nnkInfix
 85        yAssert rule[0].strVal == "=>"
 86        var stateBranch = newNimNode(nnkOfBranch, rule)
 87        case rule[1].kind
 88        of nnkBracket:
 89          for item in rule[1].children: stateBranch.add(item)
 90        of nnkIdent: stateBranch.add(rule[1])
 91        else: internalError("Invalid rule kind: " & $rule[1].kind)
 92        if rule[2].kind == nnkNilLit:
 93          stateBranch.add(newStmtList(newNimNode(nnkDiscardStmt).add(
 94                          newEmptyNode())))
 95        else:
 96          stateBranch.add(newStmtList(newAssignment(
 97                          newIdentNode("typeHintState"), copyNimTree(rule[2]))))
 98        stateBranches.add(stateBranch)
 99      stateBranches.add(newNimNode(nnkElse).add(newStmtList(
100          newNimNode(nnkReturnStmt).add(newIdentNode("yTypeUnknown")))))
101      charBranch.add(newStmtList(stateBranches))
102      result.add(charBranch)
103    result.add(newNimNode(nnkElse).add(newStmtList(
104               newNimNode(nnkReturnStmt).add(newIdentNode("yTypeUnknown")))))
105  
106  template advanceTypeHint(ch: char) {.dirty.} =
107    typeHintStateMachine ch:
108    of '~': ythInitial => ythNULL
109    of '.':
110      [ythInt1, ythInt2, ythInt3, ythInt4, ythInt] => ythDecimal
111      [ythInitial, ythMinus, ythPlus] => ythPoint
112      ythSecond2                      => ythFraction
113    of '+':
114      ythInitial                => ythPlus
115      ythNumE                   => ythNumEPlusMinus
116      [ythFraction, ythSecond2] => ythAfterTimePlusMinus
117    of '-':
118      ythInitial                => ythMinus
119      ythNumE                   => ythNumEPlusMinus
120      ythInt4                   => ythYearMinus
121      ythMonth1                 => ythMonthMinusNoYmd
122      ythMonth2                 => ythMonthMinus
123      [ythFraction, ythSecond2] => ythAfterTimePlusMinus
124    of '_':
125      [ythInt1, ythInt2, ythInt3, ythInt4] => ythInt
126      [ythInt, ythDecimal]      => nil
127    of ':':
128      [ythHour1, ythHour2]      => ythHourColon
129      ythMinute2                => ythMinuteColon
130      [ythTzHour1, ythTzHour2]  => ythTzHourColon
131    of '0'..'9':
132      ythInitial                        => ythInt1
133      ythInt1                           => ythInt2
134      ythInt2                           => ythInt3
135      ythInt3                           => ythInt4
136      [ythInt4, ythMinus, ythPlus]      => ythInt
137      [ythNumE, ythNumEPlusMinus]       => ythExponent
138      ythYearMinus                      => ythMonth1
139      ythMonth1                         => ythMonth2
140      ythMonthMinus                     => ythDay1
141      ythMonthMinusNoYmd                => ythDay1NoYmd
142      ythDay1                           => ythDay2
143      ythDay1NoYmd                      => ythDay2NoYmd
144      [ythAfterDaySpace, ythAfterDayT]  => ythHour1
145      ythHour1                          => ythHour2
146      ythHourColon                      => ythMinute1
147      ythMinute1                        => ythMinute2
148      ythMinuteColon                    => ythSecond1
149      ythSecond1                        => ythSecond2
150      ythAfterTimePlusMinus             => ythTzHour1
151      ythTzHour1                        => ythTzHour2
152      ythTzHourColon                    => ythTzMinute1
153      ythTzMinute1                      => ythTzMinute2
154      ythPoint                          => ythDecimal
155      [ythInt, ythDecimal, ythExponent, ythFraction] => nil
156    of 'a':
157      [ythF, ythLowerF] => ythLowerFA
158      ythPointN         => ythPointNA
159      ythPointLowerN    => ythPointLowerNA
160    of 'A':
161      ythF      => ythFA
162      ythPointN => ythPointNA
163    of 'e':
164      [ythInt, ythDecimal,
165        ythInt1, ythInt2, ythInt3, ythInt4] => ythNumE
166      ythLowerFALS => ythFALSE
167      ythLowerTRU  => ythTRUE
168    of 'E':
169      [ythInt, ythDecimal,
170        ythInt1, ythInt2, ythInt3, ythInt4] => ythNumE
171      ythFALS => ythFALSE
172      ythTRU  => ythTRUE
173    of 'f':
174      ythInitial      => ythLowerF
175      ythPointLowerIN => ythPointINF
176    of 'F':
177      ythInitial => ythF
178      ythPointIN => ythPointINF
179    of 'i': ythPoint => ythPointLowerI
180    of 'I': ythPoint => ythPointI
181    of 'l':
182      ythLowerNU  => ythLowerNUL
183      ythLowerNUL => ythNULL
184      ythLowerFA  => ythLowerFAL
185    of 'L':
186      ythNU  => ythNUL
187      ythNUL => ythNULL
188      ythFA  => ythFAL
189    of 'n':
190      ythInitial      => ythLowerN
191      ythPoint        => ythPointLowerN
192      [ythPointI, ythPointLowerI] => ythPointLowerIN
193      ythPointLowerNA => ythPointNAN
194    of 'N':
195      ythInitial => ythN
196      ythPoint   => ythPointN
197      ythPointI  => ythPointIN
198      ythPointNA => ythPointNAN
199    of 'r': [ythT, ythLowerT] => ythLowerTR
200    of 'R': ythT => ythTR
201    of 's':
202      ythLowerFAL => ythLowerFALS
203    of 'S':
204      ythFAL => ythFALS
205    of 't':
206      ythInitial         => ythLowerT
207      [ythDay1, ythDay2, ythDay1NoYmd, ythDay2NoYmd] => ythAfterDayT
208    of 'T':
209      ythInitial         => ythT
210      [ythDay1, ythDay2, ythDay1NoYmd, ythDay2NoYmd] => ythAfterDayT
211    of 'u':
212      [ythN, ythLowerN]  => ythLowerNU
213      ythLowerTR         => ythLowerTRU
214    of 'U':
215      ythN  => ythNU
216      ythTR => ythTRU
217    of 'Z': [ythSecond2, ythFraction, ythAfterTimeSpace] => ythAfterTimeZ
218    of ' ', '\t':
219      [ythSecond2, ythFraction] => ythAfterTimeSpace
220      [ythDay1, ythDay2, ythDay1NoYmd, ythDay2NoYmd] => ythAfterDaySpace
221      [ythAfterTimeSpace, ythAfterDaySpace] => nil
222  
223  proc guessType*(scalar: string): TypeHint {.raises: [].} =
224    ## Parse scalar string according to the RegEx table documented at
225    ## `TypeHint <#TypeHind>`_.
226    var typeHintState: YamlTypeHintState = ythInitial
227    for c in scalar: advanceTypeHint(c)
228    case typeHintState
229    of ythNULL, ythInitial: result = yTypeNull
230    of ythTRUE: result = yTypeBoolTrue
231    of ythFALSE: result = yTypeBoolFalse
232    of ythInt1, ythInt2, ythInt3, ythInt4, ythInt: result = yTypeInteger
233    of ythDecimal, ythExponent: result = yTypeFloat
234    of ythPointINF: result = yTypeFloatInf
235    of ythPointNAN: result = yTypeFloatNaN
236    of ythDay2, ythSecond2, ythFraction, ythAfterTimeZ, ythTzHour1, ythTzHour2,
237       ythTzMinute1, ythTzMinute2: result = yTypeTimestamp
238    else: result = yTypeUnknown