/ test / tlex.nim
tlex.nim
  1  #            NimYAML - YAML implementation in Nim
  2  #        (c) Copyright 2015-2023 Felix Krause
  3  #
  4  #    See the file "copying.txt", included in this
  5  #    distribution, for details about the copyright.
  6  
  7  import ../yaml/private/lex
  8  
  9  import unittest, strutils
 10  
 11  const
 12    tokensWithValue =
 13      {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted, Token.Literal,
 14       Token.Folded, Token.Suffix, Token.VerbatimTag,
 15       Token.UnknownDirective}
 16    tokensWithFullLexeme =
 17      {Token.DirectiveParam, Token.TagHandle}
 18    tokensWithShortLexeme = {Token.Anchor, Token.Alias}
 19  
 20  
 21  type
 22    TokenWithValue = object
 23      case kind: Token
 24      of tokensWithValue:
 25        value: string
 26      of tokensWithFullLexeme:
 27        lexeme: string
 28      of tokensWithShortLexeme:
 29        slexeme: string
 30      of Indentation:
 31        indentation: int
 32      else: discard
 33  
 34  proc actualRepr(lex: Lexer, t: Token): string =
 35    result = $t
 36    case t
 37    of tokensWithValue + {Token.TagHandle}:
 38      result.add("(" & escape(lex.evaluated) & ")")
 39    of Indentation:
 40      result.add("(" & $lex.currentIndentation() & ")")
 41    else: discard
 42  
 43  proc assertEquals(input: string, expected: varargs[TokenWithValue]) =
 44    var
 45      lex: Lexer
 46      i = 0
 47    lex.init(input)
 48    for expectedToken in expected:
 49      inc(i)
 50      try:
 51        lex.next()
 52        doAssert lex.cur == expectedToken.kind, "Wrong token kind at #" & $i &
 53            ": Expected " & $expectedToken.kind & ", got " &
 54            lex.actualRepr(lex.cur)
 55        case expectedToken.kind
 56        of tokensWithValue:
 57          doAssert lex.evaluated == expectedToken.value, "Wrong token content at #" &
 58              $i & ": Expected " & escape(expectedToken.value) &
 59              ", got " & escape(lex.evaluated)
 60        of tokensWithFullLexeme:
 61          doAssert lex.fullLexeme() == expectedToken.lexeme, "Wrong token lexeme at #" &
 62              $i & ": Expected" & escape(expectedToken.lexeme) &
 63              ", got " & escape(lex.fullLexeme())
 64        of tokensWithShortLexeme:
 65          doAssert lex.shortLexeme() == expectedToken.slexeme, "Wrong token slexeme at #" &
 66              $i & ": Expected" & escape(expectedToken.slexeme) &
 67              ", got " & escape(lex.shortLexeme())
 68        of Indentation:
 69          doAssert lex.currentIndentation() == expectedToken.indentation,
 70              "Wrong indentation length at #" & $i & ": Expected " &
 71              $expectedToken.indentation & ", got " & $lex.currentIndentation()
 72        else: discard
 73      except LexerError as e:
 74        echo "Error at line", e.line, ", column", e.column, ":", e.msg
 75        echo e.lineContent
 76        assert false
 77  
 78  proc i(indent: int): TokenWithValue =
 79    TokenWithValue(kind: Token.Indentation, indentation: indent)
 80  proc pl(v: string): TokenWithValue =
 81    TokenWithValue(kind: Token.Plain, value: v)
 82  proc sq(v: string): TokenWithValue =
 83    TokenWithValue(kind: Token.SingleQuoted, value: v)
 84  proc dq(v: string): TokenWithValue =
 85    TokenWithValue(kind: Token.DoubleQuoted, value: v)
 86  proc e(): TokenWithValue = TokenWithValue(kind: Token.StreamEnd)
 87  proc mk(): TokenWithValue = TokenWithValue(kind: Token.MapKeyInd)
 88  proc mv(): TokenWithValue = TokenWithValue(kind: Token.MapValueInd)
 89  proc si(): TokenWithValue = TokenWithValue(kind: Token.SeqItemInd)
 90  proc dy(): TokenWithValue = TokenWithValue(kind: Token.YamlDirective)
 91  proc dt(): TokenWithValue = TokenWithValue(kind: Token.TagDirective)
 92  proc du(v: string): TokenWithValue =
 93    TokenWithValue(kind: Token.UnknownDirective, value: v)
 94  proc dp(v: string): TokenWithValue =
 95    TokenWithValue(kind: Token.DirectiveParam, lexeme: v)
 96  proc th(v: string): TokenWithValue =
 97    TokenWithValue(kind: Token.TagHandle, lexeme: v)
 98  proc ts(v: string): TokenWithValue =
 99    TokenWithValue(kind: Token.Suffix, value: v)
100  proc tv(v: string): TokenWithValue =
101    TokenWithValue(kind: Token.VerbatimTag, value: v)
102  proc dirE(): TokenWithValue = TokenWithValue(kind: Token.DirectivesEnd)
103  proc docE(): TokenWithValue = TokenWithValue(kind: Token.DocumentEnd)
104  proc ls(v: string): TokenWithValue = TokenWithValue(kind: Token.Literal, value: v)
105  proc fs(v: string): TokenWithValue = TokenWithValue(kind: Token.Folded, value: v)
106  proc ss(): TokenWithValue = TokenWithValue(kind: Token.SeqStart)
107  proc se(): TokenWithValue = TokenWithValue(kind: Token.SeqEnd)
108  proc ms(): TokenWithValue = TokenWithValue(kind: Token.MapStart)
109  proc me(): TokenWithValue = TokenWithValue(kind: Token.MapEnd)
110  proc sep(): TokenWithValue = TokenWithValue(kind: Token.SeqSep)
111  proc an(v: string): TokenWithValue = TokenWithValue(kind: Token.Anchor, slexeme: v)
112  proc al(v: string): TokenWithValue = TokenWithValue(kind: Token.Alias, slexeme: v)
113  
114  suite "Lexer":
115    test "Empty document":
116      assertEquals("", e())
117  
118    test "Single-line scalar":
119      assertEquals("scalar", i(0), pl("scalar"), e())
120  
121    test "Multiline scalar":
122      assertEquals("scalar\l  line two", i(0), pl("scalar line two"), e())
123  
124    test "Single-line mapping":
125      assertEquals("key: value", i(0), pl("key"), mv(), pl("value"), e())
126  
127    test "Multiline mapping":
128      assertEquals("key:\n  value", i(0), pl("key"), mv(), i(2), pl("value"),
129          e())
130  
131    test "Explicit mapping":
132      assertEquals("? key\n: value", i(0), mk(), pl("key"), i(0), mv(),
133          pl("value"), e())
134  
135    test "Sequence":
136      assertEquals("- a\n- b", i(0), si(), pl("a"), i(0), si(), pl("b"), e())
137  
138    test "Single-line single-quoted scalar":
139      assertEquals("'quoted  scalar'", i(0), sq("quoted  scalar"), e())
140  
141    test "Multiline single-quoted scalar":
142      assertEquals("'quoted\l  multi line  \l\lscalar'", i(0),
143      sq("quoted multi line\lscalar"), e())
144  
145    test "Single-line double-quoted scalar":
146      assertEquals("\"quoted  scalar\"", i(0), dq("quoted  scalar"), e())
147  
148    test "Multiline double-quoted scalar":
149      assertEquals("\"quoted\l  multi line  \l\lscalar\"", i(0),
150      dq("quoted multi line\lscalar"), e())
151  
152    test "Escape sequences":
153      assertEquals(""""\n\x31\u0032\U00000033"""", i(0), dq("\l123"), e())
154  
155    test "Directives":
156      assertEquals("%YAML 1.2\n---\n%TAG\n...\n\n%TAG ! example.html",
157          dy(), dp("1.2"), dirE(), i(0), pl("%TAG"), docE(), dt(),
158          th("!"), ts("example.html"), e())
159  
160    test "Markers and Unknown Directive":
161      assertEquals("---\n---\n...\n%UNKNOWN warbl", dirE(), dirE(),
162          docE(), du("UNKNOWN"), dp("warbl"), e())
163  
164    test "Block scalar":
165      assertEquals("|\l  a\l\l  b\l # comment", i(0), ls("a\l\lb\l"), e())
166  
167    test "Block Scalars":
168      assertEquals("one : >2-\l   foo\l  bar\ltwo: |+\l bar\l  baz", i(0),
169          pl("one"), mv(), fs(" foo\nbar"), i(0), pl("two"), mv(),
170          ls("bar\l baz"), e())
171  
172    test "Flow indicators":
173      assertEquals("bla]: {c: d, [e]: f}", i(0), pl("bla]"), mv(), ms(), pl("c"),
174          mv(), pl("d"), sep(), ss(), pl("e"), se(), mv(), pl("f"), me(), e())
175  
176    test "Adjacent map values in flow style":
177      assertEquals("{\"foo\":bar, [1]\l :egg}", i(0), ms(), dq("foo"), mv(),
178          pl("bar"), sep(), ss(), pl("1"), se(), mv(), pl("egg"), me(), e())
179  
180    test "Tag handles":
181      assertEquals("- !!str string\l- !local local\l- !e! e", i(0), si(),
182          th("!!"), ts("str"), pl("string"), i(0), si(), th("!"), ts("local"),
183          pl("local"), i(0), si(), th("!e!"), ts(""), pl("e"), e())
184  
185    test "Literal tag handle":
186      assertEquals("!<tag:yaml.org,2002:str> string", i(0),
187          tv("tag:yaml.org,2002:str"), pl("string"), e())
188  
189    test "Anchors and aliases":
190      assertEquals("&a foo: {&b b: *a, *b : c}", i(0), an("a"), pl("foo"), mv(),
191          ms(), an("b"), pl("b"), mv(), al("a"), sep(), al("b"), mv(), pl("c"),
192          me(), e())
193  
194    test "Space at implicit key":
195      assertEquals("foo   :\n  bar", i(0), pl("foo"), mv(), i(2), pl("bar"), e())
196  
197    test "inline anchor at implicit key":
198      assertEquals("top6: \l  &anchor6 'key6' : scalar6", i(0), pl("top6"), mv(),
199                   i(2), an("anchor6"), sq("key6"), mv(), pl("scalar6"), e())
200  
201    test "adjacent anchors":
202      assertEquals("foo: &a\n  &b bar", i(0), pl("foo"), mv(), an("a"), i(2),
203          an("b"), pl("bar"), e())
204  
205    test "comment at empty key/value pair":
206      assertEquals(": # foo\nbar:", i(0), mv(), i(0), pl("bar"), mv(), e())
207  
208    test "Map in Sequence":
209      assertEquals("""-
210    a: b
211    c: d
212  """, i(0), si(), i(2), pl("a"), mv(), pl("b"), i(2), pl("c"), mv(), pl("d"), e())
213  
214    test "dir end after multiline scalar":
215      assertEquals("foo:\n  bar\n  baz\n---\nderp", i(0), pl("foo"), mv(), i(2),
216                   pl("bar baz"), dirE(), i(0), pl("derp"), e())
217  
218    test "Sequence with compact maps":
219      assertEquals("- a: drzw\n- b", i(0), si(), pl("a"), mv(), pl("drzw"), i(0), si(), pl("b"), e())
220  
221    test "Empty lines":
222      assertEquals("""block: foo
223  
224    bar
225  
226      baz
227  flow: {
228    foo
229  
230    bar: baz
231  
232  
233    mi
234  }""", i(0), pl("block"), mv(), pl("foo\nbar\nbaz"),
235      i(0), pl("flow"), mv(), ms(), pl("foo\nbar"), mv(),
236      pl("baz\n\nmi"), me(), e())