tlex.nim
1 # NimYAML - YAML implementation in Nim 2 # (c) Copyright 2015-2023 Felix Krause 3 # 4 # See the file "copying.txt", included in this 5 # distribution, for details about the copyright. 6 7 import ../yaml/private/lex 8 9 import unittest, strutils 10 11 const 12 tokensWithValue = 13 {Token.Plain, Token.SingleQuoted, Token.DoubleQuoted, Token.Literal, 14 Token.Folded, Token.Suffix, Token.VerbatimTag, 15 Token.UnknownDirective} 16 tokensWithFullLexeme = 17 {Token.DirectiveParam, Token.TagHandle} 18 tokensWithShortLexeme = {Token.Anchor, Token.Alias} 19 20 21 type 22 TokenWithValue = object 23 case kind: Token 24 of tokensWithValue: 25 value: string 26 of tokensWithFullLexeme: 27 lexeme: string 28 of tokensWithShortLexeme: 29 slexeme: string 30 of Indentation: 31 indentation: int 32 else: discard 33 34 proc actualRepr(lex: Lexer, t: Token): string = 35 result = $t 36 case t 37 of tokensWithValue + {Token.TagHandle}: 38 result.add("(" & escape(lex.evaluated) & ")") 39 of Indentation: 40 result.add("(" & $lex.currentIndentation() & ")") 41 else: discard 42 43 proc assertEquals(input: string, expected: varargs[TokenWithValue]) = 44 var 45 lex: Lexer 46 i = 0 47 lex.init(input) 48 for expectedToken in expected: 49 inc(i) 50 try: 51 lex.next() 52 doAssert lex.cur == expectedToken.kind, "Wrong token kind at #" & $i & 53 ": Expected " & $expectedToken.kind & ", got " & 54 lex.actualRepr(lex.cur) 55 case expectedToken.kind 56 of tokensWithValue: 57 doAssert lex.evaluated == expectedToken.value, "Wrong token content at #" & 58 $i & ": Expected " & escape(expectedToken.value) & 59 ", got " & escape(lex.evaluated) 60 of tokensWithFullLexeme: 61 doAssert lex.fullLexeme() == expectedToken.lexeme, "Wrong token lexeme at #" & 62 $i & ": Expected" & escape(expectedToken.lexeme) & 63 ", got " & escape(lex.fullLexeme()) 64 of tokensWithShortLexeme: 65 doAssert lex.shortLexeme() == expectedToken.slexeme, "Wrong token slexeme at #" & 66 $i & ": Expected" & escape(expectedToken.slexeme) & 67 ", got " & escape(lex.shortLexeme()) 68 of Indentation: 69 doAssert lex.currentIndentation() == expectedToken.indentation, 70 "Wrong indentation length at #" & $i & ": Expected " & 71 $expectedToken.indentation & ", got " & $lex.currentIndentation() 72 else: discard 73 except LexerError as e: 74 echo "Error at line", e.line, ", column", e.column, ":", e.msg 75 echo e.lineContent 76 assert false 77 78 proc i(indent: int): TokenWithValue = 79 TokenWithValue(kind: Token.Indentation, indentation: indent) 80 proc pl(v: string): TokenWithValue = 81 TokenWithValue(kind: Token.Plain, value: v) 82 proc sq(v: string): TokenWithValue = 83 TokenWithValue(kind: Token.SingleQuoted, value: v) 84 proc dq(v: string): TokenWithValue = 85 TokenWithValue(kind: Token.DoubleQuoted, value: v) 86 proc e(): TokenWithValue = TokenWithValue(kind: Token.StreamEnd) 87 proc mk(): TokenWithValue = TokenWithValue(kind: Token.MapKeyInd) 88 proc mv(): TokenWithValue = TokenWithValue(kind: Token.MapValueInd) 89 proc si(): TokenWithValue = TokenWithValue(kind: Token.SeqItemInd) 90 proc dy(): TokenWithValue = TokenWithValue(kind: Token.YamlDirective) 91 proc dt(): TokenWithValue = TokenWithValue(kind: Token.TagDirective) 92 proc du(v: string): TokenWithValue = 93 TokenWithValue(kind: Token.UnknownDirective, value: v) 94 proc dp(v: string): TokenWithValue = 95 TokenWithValue(kind: Token.DirectiveParam, lexeme: v) 96 proc th(v: string): TokenWithValue = 97 TokenWithValue(kind: Token.TagHandle, lexeme: v) 98 proc ts(v: string): TokenWithValue = 99 TokenWithValue(kind: Token.Suffix, value: v) 100 proc tv(v: string): TokenWithValue = 101 TokenWithValue(kind: Token.VerbatimTag, value: v) 102 proc dirE(): TokenWithValue = TokenWithValue(kind: Token.DirectivesEnd) 103 proc docE(): TokenWithValue = TokenWithValue(kind: Token.DocumentEnd) 104 proc ls(v: string): TokenWithValue = TokenWithValue(kind: Token.Literal, value: v) 105 proc fs(v: string): TokenWithValue = TokenWithValue(kind: Token.Folded, value: v) 106 proc ss(): TokenWithValue = TokenWithValue(kind: Token.SeqStart) 107 proc se(): TokenWithValue = TokenWithValue(kind: Token.SeqEnd) 108 proc ms(): TokenWithValue = TokenWithValue(kind: Token.MapStart) 109 proc me(): TokenWithValue = TokenWithValue(kind: Token.MapEnd) 110 proc sep(): TokenWithValue = TokenWithValue(kind: Token.SeqSep) 111 proc an(v: string): TokenWithValue = TokenWithValue(kind: Token.Anchor, slexeme: v) 112 proc al(v: string): TokenWithValue = TokenWithValue(kind: Token.Alias, slexeme: v) 113 114 suite "Lexer": 115 test "Empty document": 116 assertEquals("", e()) 117 118 test "Single-line scalar": 119 assertEquals("scalar", i(0), pl("scalar"), e()) 120 121 test "Multiline scalar": 122 assertEquals("scalar\l line two", i(0), pl("scalar line two"), e()) 123 124 test "Single-line mapping": 125 assertEquals("key: value", i(0), pl("key"), mv(), pl("value"), e()) 126 127 test "Multiline mapping": 128 assertEquals("key:\n value", i(0), pl("key"), mv(), i(2), pl("value"), 129 e()) 130 131 test "Explicit mapping": 132 assertEquals("? key\n: value", i(0), mk(), pl("key"), i(0), mv(), 133 pl("value"), e()) 134 135 test "Sequence": 136 assertEquals("- a\n- b", i(0), si(), pl("a"), i(0), si(), pl("b"), e()) 137 138 test "Single-line single-quoted scalar": 139 assertEquals("'quoted scalar'", i(0), sq("quoted scalar"), e()) 140 141 test "Multiline single-quoted scalar": 142 assertEquals("'quoted\l multi line \l\lscalar'", i(0), 143 sq("quoted multi line\lscalar"), e()) 144 145 test "Single-line double-quoted scalar": 146 assertEquals("\"quoted scalar\"", i(0), dq("quoted scalar"), e()) 147 148 test "Multiline double-quoted scalar": 149 assertEquals("\"quoted\l multi line \l\lscalar\"", i(0), 150 dq("quoted multi line\lscalar"), e()) 151 152 test "Escape sequences": 153 assertEquals(""""\n\x31\u0032\U00000033"""", i(0), dq("\l123"), e()) 154 155 test "Directives": 156 assertEquals("%YAML 1.2\n---\n%TAG\n...\n\n%TAG ! example.html", 157 dy(), dp("1.2"), dirE(), i(0), pl("%TAG"), docE(), dt(), 158 th("!"), ts("example.html"), e()) 159 160 test "Markers and Unknown Directive": 161 assertEquals("---\n---\n...\n%UNKNOWN warbl", dirE(), dirE(), 162 docE(), du("UNKNOWN"), dp("warbl"), e()) 163 164 test "Block scalar": 165 assertEquals("|\l a\l\l b\l # comment", i(0), ls("a\l\lb\l"), e()) 166 167 test "Block Scalars": 168 assertEquals("one : >2-\l foo\l bar\ltwo: |+\l bar\l baz", i(0), 169 pl("one"), mv(), fs(" foo\nbar"), i(0), pl("two"), mv(), 170 ls("bar\l baz"), e()) 171 172 test "Flow indicators": 173 assertEquals("bla]: {c: d, [e]: f}", i(0), pl("bla]"), mv(), ms(), pl("c"), 174 mv(), pl("d"), sep(), ss(), pl("e"), se(), mv(), pl("f"), me(), e()) 175 176 test "Adjacent map values in flow style": 177 assertEquals("{\"foo\":bar, [1]\l :egg}", i(0), ms(), dq("foo"), mv(), 178 pl("bar"), sep(), ss(), pl("1"), se(), mv(), pl("egg"), me(), e()) 179 180 test "Tag handles": 181 assertEquals("- !!str string\l- !local local\l- !e! e", i(0), si(), 182 th("!!"), ts("str"), pl("string"), i(0), si(), th("!"), ts("local"), 183 pl("local"), i(0), si(), th("!e!"), ts(""), pl("e"), e()) 184 185 test "Literal tag handle": 186 assertEquals("!<tag:yaml.org,2002:str> string", i(0), 187 tv("tag:yaml.org,2002:str"), pl("string"), e()) 188 189 test "Anchors and aliases": 190 assertEquals("&a foo: {&b b: *a, *b : c}", i(0), an("a"), pl("foo"), mv(), 191 ms(), an("b"), pl("b"), mv(), al("a"), sep(), al("b"), mv(), pl("c"), 192 me(), e()) 193 194 test "Space at implicit key": 195 assertEquals("foo :\n bar", i(0), pl("foo"), mv(), i(2), pl("bar"), e()) 196 197 test "inline anchor at implicit key": 198 assertEquals("top6: \l &anchor6 'key6' : scalar6", i(0), pl("top6"), mv(), 199 i(2), an("anchor6"), sq("key6"), mv(), pl("scalar6"), e()) 200 201 test "adjacent anchors": 202 assertEquals("foo: &a\n &b bar", i(0), pl("foo"), mv(), an("a"), i(2), 203 an("b"), pl("bar"), e()) 204 205 test "comment at empty key/value pair": 206 assertEquals(": # foo\nbar:", i(0), mv(), i(0), pl("bar"), mv(), e()) 207 208 test "Map in Sequence": 209 assertEquals("""- 210 a: b 211 c: d 212 """, i(0), si(), i(2), pl("a"), mv(), pl("b"), i(2), pl("c"), mv(), pl("d"), e()) 213 214 test "dir end after multiline scalar": 215 assertEquals("foo:\n bar\n baz\n---\nderp", i(0), pl("foo"), mv(), i(2), 216 pl("bar baz"), dirE(), i(0), pl("derp"), e()) 217 218 test "Sequence with compact maps": 219 assertEquals("- a: drzw\n- b", i(0), si(), pl("a"), mv(), pl("drzw"), i(0), si(), pl("b"), e()) 220 221 test "Empty lines": 222 assertEquals("""block: foo 223 224 bar 225 226 baz 227 flow: { 228 foo 229 230 bar: baz 231 232 233 mi 234 }""", i(0), pl("block"), mv(), pl("foo\nbar\nbaz"), 235 i(0), pl("flow"), mv(), ms(), pl("foo\nbar"), mv(), 236 pl("baz\n\nmi"), me(), e())