/ test / testEventParser.nim
testEventParser.nim
  1  #            NimYAML - YAML implementation in Nim
  2  #        (c) Copyright 2015-2023 Felix Krause
  3  #
  4  #    See the file "copying.txt", included in this
  5  #    distribution, for details about the copyright.
  6  
  7  import ../yaml, ../yaml/data
  8  import lexbase, streams, tables, strutils
  9  
 10  type
 11    LexerToken = enum
 12      plusStr, minusStr, plusDoc, minusDoc, plusMap, minusMap, plusSeq, minusSeq,
 13      mapBraces, seqBrackets,
 14      eqVal, eqAli, chevTag, andAnchor, starAnchor, colonContent, sqContent,
 15      dqContent, litContent, foContent,
 16      explDirEnd, explDocEnd, noToken
 17  
 18    StreamPos = enum
 19      beforeStream, inStream, afterStream
 20  
 21    EventLexer = object of BaseLexer
 22      content: string
 23  
 24    EventStreamError = object of ValueError
 25  
 26  proc nextToken(lex: var EventLexer): LexerToken =
 27    while true:
 28      case lex.buf[lex.bufpos]
 29      of ' ', '\t': lex.bufpos.inc()
 30      of '\r': lex.bufpos = lex.handleCR(lex.bufpos)
 31      of '\l': lex.bufpos = lex.handleLF(lex.bufpos)
 32      else: break
 33    if lex.buf[lex.bufpos] == EndOfFile: return noToken
 34    case lex.buf[lex.bufpos]
 35    of ':', '"', '\'', '|', '>':
 36      let t = case lex.buf[lex.bufpos]
 37      of ':': colonContent
 38      of '"': dqContent
 39      of '\'': sqContent
 40      of '|': litContent
 41      of '>': foContent
 42      else: colonContent
 43  
 44      lex.content = ""
 45      lex.bufpos.inc()
 46      while true:
 47        case lex.buf[lex.bufpos]
 48        of EndOfFile: break
 49        of '\c':
 50          lex.bufpos = lex.handleCR(lex.bufpos)
 51          break
 52        of '\l':
 53          lex.bufpos = lex.handleLF(lex.bufpos)
 54          break
 55        of '\\':
 56          lex.bufpos.inc()
 57          case lex.buf[lex.bufpos]
 58          of 'n': lex.content.add('\l')
 59          of 'r': lex.content.add('\r')
 60          of '0': lex.content.add('\0')
 61          of 'b': lex.content.add('\b')
 62          of 't': lex.content.add('\t')
 63          of '\\': lex.content.add('\\')
 64          else: raise newException(EventStreamError,
 65                          "Unknown escape character: " & lex.buf[lex.bufpos])
 66        else: lex.content.add(lex.buf[lex.bufpos])
 67        lex.bufpos.inc()
 68      result = t
 69    of '<':
 70      lex.content = ""
 71      lex.bufpos.inc()
 72      while lex.buf[lex.bufpos] != '>':
 73        lex.content.add(lex.buf[lex.bufpos])
 74        lex.bufpos.inc()
 75        if lex.buf[lex.bufpos] == EndOfFile:
 76          raise newException(EventStreamError, "Unclosed tag URI!")
 77      result = chevTag
 78      lex.bufpos.inc()
 79    of '&':
 80      lex.content = ""
 81      lex.bufpos.inc()
 82      while lex.buf[lex.bufpos] notin {' ', '\t', '\r', '\l', EndOfFile}:
 83        lex.content.add(lex.buf[lex.bufpos])
 84        lex.bufpos.inc()
 85      result = andAnchor
 86    of '*':
 87      lex.content = ""
 88      lex.bufpos.inc()
 89      while lex.buf[lex.bufpos] notin {' ', '\t', '\r', '\l', EndOfFile}:
 90        lex.content.add(lex.buf[lex.bufpos])
 91        lex.bufpos.inc()
 92      result = starAnchor
 93    of '{':
 94      lex.bufpos.inc()
 95      if lex.buf[lex.bufpos] == '}':
 96        result = mapBraces
 97      else: raise newException(EventStreamError, "Invalid token: {" & lex.buf[lex.bufpos])
 98      lex.bufpos.inc()
 99    of '[':
100      lex.bufpos.inc()
101      if lex.buf[lex.bufpos] == ']':
102        result = seqBrackets
103      else: raise newException(EventStreamError, "Invalid token: [" & lex.buf[lex.bufpos])
104      lex.bufpos.inc()
105    else:
106      lex.content = ""
107      while lex.buf[lex.bufpos] notin {' ', '\t', '\r', '\l', EndOfFile}:
108        lex.content.add(lex.buf[lex.bufpos])
109        lex.bufpos.inc()
110      case lex.content
111      of "+STR": result = plusStr
112      of "-STR": result = minusStr
113      of "+DOC": result = plusDoc
114      of "-DOC": result = minusDoc
115      of "+MAP": result = plusMap
116      of "-MAP": result = minusMap
117      of "+SEQ": result = plusSeq
118      of "-SEQ": result = minusSeq
119      of "=VAL": result = eqVal
120      of "=ALI": result = eqAli
121      of "---": result = explDirEnd
122      of "...": result = explDocEnd
123      else: raise newException(EventStreamError, "Invalid token: " & lex.content)
124  
125  template assertInEvent(name: string) {.dirty.} =
126    if not inEvent:
127      raise newException(EventStreamError, "Illegal token: " & name)
128  
129  template yieldEvent() {.dirty.} =
130    if inEvent:
131      yield curEvent
132      inEvent = false
133  
134  template setTag(t: Tag) {.dirty.} =
135    case curEvent.kind
136    of yamlStartSeq: curEvent.seqProperties.tag = t
137    of yamlStartMap: curEvent.mapProperties.tag = t
138    of yamlScalar: curEvent.scalarProperties.tag = t
139    else: discard
140  
141  template setAnchor(a: Anchor) {.dirty.} =
142    case curEvent.kind
143    of yamlStartSeq: curEvent.seqProperties.anchor = a
144    of yamlStartMap: curEvent.mapProperties.anchor = a
145    of yamlScalar: curEvent.scalarProperties.anchor = a
146    of yamlAlias: curEvent.aliasTarget = a
147    else: discard
148  
149  template curTag(): Tag =
150    var foo: Tag
151    case curEvent.kind
152    of yamlStartSeq: foo = curEvent.seqProperties.tag
153    of yamlStartMap: foo = curEvent.mapProperties.tag
154    of yamlScalar: foo = curEvent.scalarProperties.tag
155    else: raise newException(EventStreamError,
156                             $curEvent.kind & " may not have a tag")
157    foo
158  
159  template setCurTag(val: Tag) =
160    case curEvent.kind
161    of yamlStartSeq: curEvent.seqProperties.tag = val
162    of yamlStartMap: curEvent.mapProperties.tag = val
163    of yamlScalar: curEvent.scalarProperties.tag = val
164    else: raise newException(EventStreamError,
165                             $curEvent.kind & " may not have a tag")
166  
167  template curAnchor(): Anchor =
168    var foo: Anchor
169    case curEvent.kind
170    of yamlStartSeq: foo = curEvent.seqProperties.anchor
171    of yamlStartMap: foo = curEvent.mapProperties.anchor
172    of yamlScalar: foo = curEvent.scalarProperties.anchor
173    of yamlAlias: foo = curEvent.aliasTarget
174    else: raise newException(EventStreamError,
175                             $curEvent.kind & "may not have an anchor")
176    foo
177  
178  template setCurAnchor(val: Anchor) =
179    case curEvent.kind
180    of yamlStartSeq: curEvent.seqProperties.anchor = val
181    of yamlStartMap: curEvent.mapProperties.anchor = val
182    of yamlScalar: curEvent.scalarProperties.anchor = val
183    of yamlAlias: curEvent.aliasTarget = val
184    else: raise newException(EventStreamError,
185                             $curEvent.kind & " may not have an anchor")
186  
187  template eventStart(k: EventKind) {.dirty.} =
188    if streamPos == beforeStream:
189      yield Event(kind: yamlStartStream)
190      streamPos = inStream
191    else: yieldEvent()
192    curEvent = Event(kind: k)
193    setTag(yTagQuestionMark)
194    setAnchor(yAnchorNone)
195    inEvent = true
196  
197  proc parseEventStream*(input: Stream): YamlStream =
198    var backend = iterator(): Event =
199      var lex: EventLexer
200      lex.open(input)
201      var
202        inEvent = false
203        curEvent: Event
204        streamPos: StreamPos = beforeStream
205      while lex.buf[lex.bufpos] != EndOfFile:
206        let token = lex.nextToken()
207        case token
208        of plusStr:
209          if streamPos != beforeStream:
210            raise newException(EventStreamError, "Illegal +STR")
211          streamPos = inStream
212          eventStart(yamlStartStream)
213        of minusStr:
214          if streamPos != inStream:
215            raise newException(EventStreamError, "Illegal -STR")
216          streamPos = afterStream
217          eventStart(yamlEndStream)
218        of plusDoc: eventStart(yamlStartDoc)
219        of minusDoc: eventStart(yamlEndDoc)
220        of plusMap: eventStart(yamlStartMap)
221        of minusMap: eventStart(yamlEndMap)
222        of plusSeq: eventStart(yamlStartSeq)
223        of minusSeq: eventStart(yamlEndSeq)
224        of eqVal: eventStart(yamlScalar)
225        of eqAli: eventStart(yamlAlias)
226        of mapBraces:
227          assertInEvent("braces")
228          curEvent.mapStyle = csFlow
229        of seqBrackets:
230          assertInEvent("brackets")
231          curEvent.seqStyle = csFlow
232        of chevTag:
233          assertInEvent("tag")
234          if curTag() != yTagQuestionMark:
235            raise newException(EventStreamError,
236                               "Duplicate tag in " & $curEvent.kind)
237          setCurTag(Tag(lex.content))
238        of andAnchor:
239          assertInEvent("anchor")
240          if curAnchor() != yAnchorNone:
241            raise newException(EventStreamError,
242                               "Duplicate anchor in " & $curEvent.kind)
243          setCurAnchor(lex.content.Anchor)
244        of starAnchor:
245          assertInEvent("alias")
246          if curEvent.kind != yamlAlias:
247            raise newException(EventStreamError, "Unexpected alias: " &
248                escape(lex.content))
249          elif curEvent.aliasTarget != yAnchorNone:
250            raise newException(EventStreamError, "Duplicate alias target: " &
251                escape(lex.content))
252          else:
253            curEvent.aliasTarget = lex.content.Anchor
254        of colonContent:
255          assertInEvent("scalar content")
256          curEvent.scalarContent = lex.content
257          if curEvent.kind != yamlScalar:
258            raise newException(EventStreamError,
259                               "scalar content in non-scalar tag")
260        of sqContent:
261          assertInEvent("scalar content")
262          curEvent.scalarContent = lex.content
263          if curTag() == yTagQuestionMark: setCurTag(yTagExclamationMark)
264          curEvent.scalarStyle = ssSingleQuoted
265        of dqContent:
266          assertInEvent("scalar content")
267          curEvent.scalarContent = lex.content
268          if curTag() == yTagQuestionMark: setCurTag(yTagExclamationMark)
269          curEvent.scalarStyle = ssDoubleQuoted
270        of litContent:
271          assertInEvent("scalar content")
272          curEvent.scalarContent = lex.content
273          curEvent.scalarStyle = ssLiteral
274        of foContent:
275          assertInEvent("scalar content")
276          curEvent.scalarContent = lex.content
277          curEvent.scalarStyle = ssFolded
278        of explDirEnd:
279          assertInEvent("explicit directives end")
280          if curEvent.kind != yamlStartDoc:
281            raise newException(EventStreamError,
282                               "Unexpected explicit directives end")
283        of explDocEnd:
284          if curEvent.kind != yamlEndDoc:
285            raise newException(EventStreamError,
286                               "Unexpected explicit document end")
287        of noToken: discard
288      yieldEvent()
289      if streamPos == inStream:
290        yield Event(kind: yamlEndStream)
291    result = initYamlStream(backend)
292    
293  proc parseEventString*(input: sink string): YamlStream =
294    var str = newStringStream(input)
295    result = parseEventStream(str)