edn.nim
1 import 2 lexbase, streams, strutils, unicode, nre, tables, hashes, options 3 4 type 5 TokenKind* = enum 6 tkError, 7 tkEof, 8 tkString, 9 tkInt, 10 tkFloat 11 12 EdnError* = enum 13 errNone, 14 errInvalidToken, 15 errEofExpected, 16 errQuoteExpected 17 18 ConditionalExpressionsHandling* = enum 19 asError, asTagged, cljSource, cljsSource, ignoreConditionals 20 21 CommentsHandling* = enum 22 discardComments, keepComments 23 24 ParseOptions* = object 25 eof_is_error*: bool 26 eof_value*: EdnNode 27 suppress_read*: bool 28 conditional_exprs*: ConditionalExpressionsHandling 29 comments_handling*: CommentsHandling 30 31 EdnParser* = object of BaseLexer 32 a: string 33 token*: TokenKind 34 err: EdnError 35 filename: string 36 options*: ParseOptions 37 38 EdnNodeKind* = enum 39 EdnNil 40 EdnBool 41 EdnCharacter 42 EdnInt 43 EdnRatio 44 EdnFloat 45 EdnString 46 EdnSymbol 47 EdnKeyword 48 EdnList 49 EdnMap 50 EdnVector 51 EdnSet 52 EdnTaggedValue 53 EdnCommentLine 54 EdnRegex 55 EdnVarQuote 56 57 CommentPlacement* = enum 58 Before 59 After 60 Inside 61 62 KeywordNamespacing* = enum 63 NoNamespace # just :foo 64 LocalNamespace # e.g. ::foo 65 NonLocalNamespace # e.g ::some-ns/foo 66 FullNamespace # e.g :some.namespace/foo 67 68 CommentsObj* = object 69 placement*: CommentPlacement 70 comment_lines*: seq[string] 71 Comment* = ref CommentsObj 72 73 EdnNode* = ref EdnNodeObj 74 EdnNodeObj* {.acyclic.} = object 75 case kind*: EdnNodeKind 76 of EdnNil: 77 nil 78 of EdnBool: 79 boolVal*: bool 80 of EdnCharacter: 81 character*: string # to support unicode chars 82 of EdnInt: 83 num*: BiggestInt 84 of EdnRatio: 85 rnum*: tuple[numerator, denominator: BiggestInt] 86 of EdnFloat: 87 fnum*: float 88 of EdnString: 89 str*: string 90 of EdnSymbol: 91 symbol*: tuple[ns, name: string] 92 symbol_meta*: HMap 93 of EdnKeyword: 94 keyword*: tuple[ns, name: string] 95 namespacing*: KeywordNamespacing 96 of EdnList: 97 list*: seq[EdnNode] 98 list_meta*: HMap 99 of EdnMap: 100 map*: HMap 101 map_meta*: HMap 102 of EdnVector: 103 vec*: seq[EdnNode] 104 vec_meta*: HMap 105 of EdnSet: 106 set_elems*: HMap 107 set_meta*: HMap 108 of EdnTaggedValue: 109 tag*: tuple[ns, name: string] 110 value*: EdnNode 111 of EdnCommentLine: 112 comment*: string 113 of EdnRegex: 114 regex*: string 115 of EdnVarQuote: 116 var_symbol: EdnNode 117 line*: int 118 column*: int 119 comments*: seq[Comment] 120 121 122 HMapEntryObj = tuple[key: EdnNode, value: EdnNode] 123 HMapEntry = ref HMapEntryObj 124 HMapObj* = object 125 count*: int 126 buckets*: seq[seq[HMapEntry]] 127 HMap* = ref HMapObj 128 129 ParseError* = object of CatchableError 130 ParseInfo* = tuple[line, col: int] 131 132 MacroReader = proc(p: var EdnParser): EdnNode {.gcsafe.} 133 MacroArray = array[char, MacroReader] 134 135 const non_constituents = ['@', '`', '~'] 136 137 converter to_int(c: char): int = result = ord(c) 138 139 var 140 macros {.threadvar.}: MacroArray 141 dispatch_macros {.threadvar.}: MacroArray 142 143 proc non_constituent(c: char): bool = 144 result = non_constituents.contains(c) 145 146 proc is_macro(c: char): bool = 147 result = c.to_int < macros.len and macros[c] != nil 148 149 proc is_terminating_macro(c: char): bool = 150 result = c != '#' and c != '\'' and is_macro(c) 151 152 proc get_macro(ch: char): MacroReader = 153 result = macros[ch] 154 155 ## ============== HMAP TYPE AND FWD DECLS =========== 156 157 proc new_hmap*(capacity: int = 16): HMap {.gcsafe.} 158 159 proc `[]=`*(m: HMap, key: EdnNode, val: EdnNode) {.gcsafe.} 160 161 proc val_at*(m: HMap, key: EdnNode, default: EdnNode = nil): EdnNode {.gcsafe.} 162 163 proc `[]`*(m: HMap, key: EdnNode): Option[EdnNode] {.gcsafe.} 164 165 proc len*(m: HMap): int = m.count 166 167 iterator items*(m: HMap): HMapEntry = 168 for b in m.buckets: 169 if len(b) != 0: 170 for entry in b: 171 yield entry 172 173 proc merge_maps*(m1, m2 :HMap): void {.gcsafe.} 174 175 proc add_meta*(node: EdnNode, meta: HMap): EdnNode {.gcsafe.} 176 177 ## ============== NEW OBJ FACTORIES ================= 178 179 var 180 edn_true* {.threadvar.}: EdnNode 181 edn_false* {.threadvar.}: EdnNode 182 # when a new thread is instantiated, the bool value is not copied to new thread, 183 # however we can rely on its default value to be false (uninitialised) 184 initialised {.threadvar.}: bool 185 186 proc new_edn_string_move(s: string): EdnNode = 187 result = EdnNode(kind: EdnString) 188 shallowCopy(result.str, s) 189 190 proc new_edn_int*(s: string): EdnNode = 191 return EdnNode(kind: EdnInt, num: parseBiggestInt(s)) 192 193 proc new_edn_int*(val: int): EdnNode = 194 return EdnNode(kind: EdnInt, num: val) 195 196 proc new_edn_ratio*(nom, denom: BiggestInt): EdnNode = 197 return EdnNode(kind: EdnRatio, rnum: (nom, denom)) 198 199 proc new_edn_float*(s: string): EdnNode = 200 return EdnNode(kind: EdnFloat, fnum: parseFloat(s)) 201 202 proc new_edn_float*(val: float): EdnNode = 203 return EdnNode(kind: EdnFloat, fnum: val) 204 205 proc new_edn_bool*(val: bool): EdnNode = 206 case val 207 of true: return edn_true 208 of false: return edn_false 209 # of true: return EdnNode(kind: EdnBool, boolVal: true) 210 # of false: return EdnNode(kind: EdnBool, boolVal: false) 211 212 proc new_edn_bool*(s: string): EdnNode = 213 let parsed: bool = parseBool(s) 214 return new_edn_bool(parsed) 215 216 proc new_edn_symbol*(ns, name: string): EdnNode = 217 return EdnNode(kind: EdnSymbol, symbol: (ns, name)) 218 219 proc new_edn_keyword*(ns, name: string): EdnNode = 220 return EdnNode(kind: EdnKeyword, keyword: (ns, name)) 221 222 proc new_edn_nil*(): EdnNode = 223 new(result) 224 225 ### === VALS === 226 227 var 228 EdnTrue {.threadvar.}: EdnNode 229 EdnFalse {.threadvar.}: EdnNode 230 KeyTag* {.threadvar.}: EdnNode 231 CljTag {.threadvar.}: EdnNode 232 CljsTag {.threadvar.}: EdnNode 233 DefaultTag {.threadvar.}: EdnNode 234 LineKw {.threadvar.}: EdnNode 235 ColumnKw {.threadvar.}: EdnNode 236 SplicedQKw* {.threadvar.}: EdnNode 237 238 239 ### === ERROR HANDLING UTILS === 240 241 proc err_info*(p: EdnParser): ParseInfo = 242 result = (p.line_number, get_col_number(p, p.bufpos)) 243 244 ### === MACRO READERS === 245 246 proc read*(p: var EdnParser): EdnNode {.gcsafe.} 247 248 proc valid_utf8_alpha(c: char): bool = 249 return c.isAlphaAscii() or c >= 0xc0 250 251 proc handle_hex_char(c: char, x: var int): bool = 252 result = true 253 case c 254 of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) 255 of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) 256 of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) 257 else: result = false 258 259 proc parse_escaped_utf16(buf: cstring, pos: var int): int = 260 result = 0 261 for _ in 0..3: 262 if handle_hex_char(buf[pos], result): 263 inc(pos) 264 else: 265 return -1 266 267 proc parse_string(p: var EdnParser): TokenKind = 268 result = tkString 269 var pos = p.bufpos 270 var buf = p.buf 271 while true: 272 case buf[pos] 273 of '\0': 274 p.err = errQuoteExpected 275 of '"': 276 inc(pos) 277 break; 278 of '\\': 279 case buf[pos+1] 280 of '\\', '"', '\'', '/': 281 add(p.a, buf[pos+1]) 282 inc(pos, 2) 283 of 'b': 284 add(p.a, '\b') 285 inc(pos, 2) 286 of 'f': 287 add(p.a, '\b') 288 inc(pos, 2) 289 of 'n': 290 add(p.a, '\L') 291 inc(pos, 2) 292 of 'r': 293 add(p.a, '\C') 294 inc(pos, 2) 295 of 't': 296 add(p.a, '\t') 297 inc(pos, 2) 298 of 'u': 299 inc(pos, 2) 300 var r = parse_escaped_utf16(buf, pos) 301 if r < 0: 302 p.err = errInvalidToken 303 break 304 # deal with surrogates 305 if (r and 0xfc00) == 0xd800: 306 if buf[pos] & buf[pos + 1] != "\\u": 307 p.err = errInvalidToken 308 break 309 inc(pos, 2) 310 var s = parse_escaped_utf16(buf, pos) 311 if (s and 0xfc00) == 0xdc00 and s > 0: 312 r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00)) 313 else: 314 p.err = errInvalidToken 315 break 316 add(p.a, toUTF8(Rune(r))) 317 else: 318 # don't bother with the error 319 add(p.a, buf[pos]) 320 inc(pos) 321 of '\c': 322 pos = lexbase.handleCR(p, pos) 323 buf = p.buf 324 add(p.a, '\c') 325 of '\L': 326 pos = lexbase.handleLF(p, pos) 327 buf = p.buf 328 add(p.a, '\L') 329 else: 330 add(p.a, buf[pos]) 331 inc(pos) 332 p.bufpos = pos 333 334 proc read_string(p: var EdnParser): EdnNode = 335 discard parse_string(p) 336 if p.err != errNone: 337 raise newException(ParseError, "read_string failure: " & $p.err) 338 result = new_edn_string_move(p.a) 339 p.a = "" 340 341 proc read_quoted_internal(p: var EdnParser, quote_name: string): EdnNode = 342 let quoted = read(p) 343 result = EdnNode(kind: EdnList) 344 result.list = @[new_edn_symbol("", quote_name), quoted] 345 346 proc read_quoted*(p: var EdnParser): EdnNode = 347 return read_quoted_internal(p, "quote") 348 349 proc read_quasiquoted*(p: var EdnParser): EdnNode = 350 return read_quoted_internal(p, "quasiquote") 351 352 proc read_unquoted*(p: var EdnParser): EdnNode = 353 return read_quoted_internal(p, "unquote") 354 355 proc read_deref*(p: var EdnParser): EdnNode = 356 return read_quoted_internal(p, "deref") 357 358 # TODO: read comment as continuous blocks, not just lines 359 proc read_comment(p: var EdnParser): EdnNode = 360 var pos = p.bufpos 361 var buf = p.buf 362 result = EdnNode(kind: EdnCommentLine) 363 if p.options.comments_handling == keepComments: 364 while true: 365 case buf[pos] 366 of '\L': 367 pos = lexbase.handleLF(p, pos) 368 break 369 of '\c': 370 pos = lexbase.handleCR(p, pos) 371 break 372 of EndOfFile: 373 raise new_exception(ParseError, "EOF while reading comment") 374 else: 375 add(p.a, buf[pos]) 376 inc(pos) 377 p.bufpos = pos 378 result.comment = p.a 379 p.a = "" 380 else: 381 while true: 382 case buf[pos] 383 of '\L': 384 pos = lexbase.handleLF(p, pos) 385 break 386 of '\c': 387 pos = lexbase.handleCR(p, pos) 388 break 389 of EndOfFile: 390 raise new_exception(ParseError, "EOF while reading comment") 391 else: 392 inc(pos) 393 p.bufpos = pos 394 395 proc read_token(p: var EdnParser, lead_constituent: bool): string = 396 var pos = p.bufpos 397 var ch = p.buf[pos] 398 if lead_constituent and non_constituent(ch): 399 raise new_exception(ParseError, "Invalid leading character " & ch) 400 else: 401 result = "" 402 result.add(ch) 403 while true: 404 inc(pos) 405 ch = p.buf[pos] 406 if ch == EndOfFile or isSpaceAscii(ch) or is_terminating_macro(ch): 407 break 408 elif non_constituent(ch): 409 raise new_exception(ParseError, "Invalid constituent character: " & ch) 410 result.add(ch) 411 p.bufpos = pos 412 413 proc read_character(p: var EdnParser): EdnNode = 414 var pos = p.bufpos 415 #var buf = p.buf 416 let ch = p.buf[pos] 417 if ch == EndOfFile: 418 raise new_exception(ParseError, "EOF while reading character") 419 420 result = EdnNode(kind: EdnCharacter) 421 let token = read_token(p, false) 422 if token.len == 1: 423 result.character = token 424 if token == "newline": 425 result.character = "\c" 426 elif token == "space": 427 result.character = " " 428 elif token == "tab": 429 result.character = "\t" 430 elif token == "backspace": 431 result.character = "\b" 432 elif token == "formfeed": 433 result.character = "\f" 434 elif token == "return": 435 result.character = "\r" 436 elif token.startsWith("u"): 437 result.character = parse_hex_str(token.substr(1)) 438 elif token.startsWith("o"): 439 # TODO: impl unicode char reading 440 raise new_exception(ParseError, "Not implemented: reading unicode chars") 441 442 proc skip_ws(p: var EdnParser) = 443 # commas are whitespace in edn collections 444 var pos = p.bufpos 445 var buf = p.buf 446 while true: 447 case buf[pos] 448 of ' ', '\t', ',': 449 inc(pos) 450 of '\c': 451 pos = lexbase.handleCR(p, pos) 452 buf = p.buf 453 of '\L': 454 pos = lexbase.handleLF(p, pos) 455 buf = p.buf 456 else: 457 break 458 p.bufpos = pos 459 460 proc match_symbol(s: string): EdnNode = 461 let 462 ns_pat = re"[:]?([\D].*)" 463 name_pat = re"(\D.*)" 464 split_sym = s.split('/') 465 var 466 ns: string 467 name: string 468 case split_sym.len 469 of 1: 470 ns = "" 471 name = split_sym[0] 472 of 2: 473 ns = split_sym[0] 474 name = split_sym[1] 475 else: 476 return nil 477 478 if ns != "": 479 let ns_m = ns.match(ns_pat) 480 if ns_m.is_some(): 481 ns = ns_m.get().captures[0] 482 if name != "": 483 let name_m = name.match(name_pat) 484 if name_m.is_some(): 485 name = name_m.get().captures[0] 486 if s[0] == ':': 487 result = EdnNode(kind: EdnKeyword) 488 # locally namespaced kw (e.g. ::foo) 489 if split_sym.len == 1: 490 if 2 < s.high() and s[1] == ':': 491 result.keyword = (ns, name.substr(2, name.len)) 492 result.namespacing = LocalNamespace 493 else: 494 result.keyword = (ns, name.substr(1,name.len)) 495 result.namespacing = NoNamespace 496 else: 497 result.keyword = (ns.replace(":", ""), name) 498 if s[1] == ':': 499 result.namespacing = NonLocalNamespace 500 else: 501 result.namespacing = FullNamespace 502 else: 503 result = EdnNode(kind: EdnSymbol) 504 result.symbol = (ns, name) 505 506 proc interpret_token(token: string): EdnNode = 507 result = nil 508 case token 509 of "nil": 510 result = new_edn_nil() 511 of "true": 512 result = new_edn_bool(token) 513 of "false": 514 result = new_edn_bool(token) 515 else: 516 result = nil 517 518 if result == nil: 519 result = match_symbol(token) 520 if result == nil: 521 raise new_exception(ParseError, "Invalid token: " & token) 522 523 524 proc attach_comment_lines(node: EdnNode, comment_lines: seq[string], placement: CommentPlacement): void = 525 var co = new(Comment) 526 co.placement = placement 527 co.comment_lines = comment_lines 528 if node.comments.len == 0: node.comments = @[co] 529 else: node.comments.add(co) 530 531 type DelimitedListResult = object 532 list: seq[EdnNode] 533 comment_lines: seq[string] 534 comment_placement: CommentPlacement 535 536 type DelimitedListReadOptions = enum 537 Recursive 538 539 proc read_delimited_list( 540 p: var EdnParser, delimiter: char, opts: Table[DelimitedListReadOptions, bool]): DelimitedListResult = 541 # the bufpos should be already be past the opening paren etc. 542 var list: seq[EdnNode] = @[] 543 var comment_lines: seq[string] = @[] 544 var count = 0 545 let is_recursive: bool = opts.get_or_default(Recursive, false) 546 let with_comments = keepComments == p.options.comments_handling 547 while true: 548 skip_ws(p) 549 var pos = p.bufpos 550 let ch = p.buf[pos] 551 if ch == EndOfFile: 552 let msg = "EOF while reading list $# $# $#" 553 raise new_exception(ParseError, format(msg, delimiter, p.filename, p.line_number)) 554 555 if ch == delimiter: 556 inc(pos) 557 p.bufpos = pos 558 # make sure any comments get attached 559 if with_comments and list.len > 0 and comment_lines.len > 0: 560 var node = list[list.high] 561 attach_comment_lines(node, comment_lines, After) 562 comment_lines = @[] 563 break 564 565 if is_macro(ch): 566 let m = get_macro(ch) 567 inc(pos) 568 p.bufpos = pos 569 let node = m(p) 570 if node != nil: 571 if ch == ';' and node.kind == EdnCommentLine: 572 if with_comments: 573 comment_lines.add(node.comment) 574 else: 575 discard 576 else: 577 inc(count) 578 list.add(node) 579 # attach comments encountered before this node 580 if with_comments and comment_lines.len > 0: 581 attach_comment_lines(node, comment_lines, Before) 582 comment_lines = @[] 583 else: 584 let node = read(p) 585 if node != nil: 586 if with_comments: 587 case node.kind 588 of EdnCommentLine: 589 comment_lines.add(node.comment) 590 else: 591 if comment_lines.len > 0: 592 attach_comment_lines(node, comment_lines, Before) 593 comment_lines = @[] 594 inc(count) 595 list.add(node) 596 else: # discardComments 597 case node.kind 598 of EdnCommentLine: 599 discard 600 else: 601 inc(count) 602 list.add(node) 603 604 if comment_lines.len == 0: 605 result.comment_lines = @[] 606 else: 607 result.comment_lines = comment_lines 608 result.comment_placement = Inside 609 result.list = list 610 611 proc add_line_col_info(p: var EdnParser, node: var EdnNode): void = 612 node.line = p.line_number 613 node.column = getColNumber(p, p.bufpos) 614 615 proc maybe_add_comments(node: EdnNode, list_result: DelimitedListResult): EdnNode = 616 if list_result.comment_lines.len > 0: 617 var co = new(Comment) 618 co.placement = Inside 619 co.comment_lines = list_result.comment_lines 620 if node.comments.len == 0: node.comments = @[co] 621 else: node.comments.add(co) 622 return node 623 624 proc get_meta*(node: EdnNode): HMap {.gcsafe.} 625 626 proc is_element_spliced(node: EdnNode): bool = 627 case node.kind 628 of EdnVector, EdnList: 629 let meta = node.get_meta() 630 if meta == nil: return false 631 632 let is_spliced = meta[SplicedQKw] 633 if is_spliced.is_none(): return false 634 else: return is_spliced.get() == EdnTrue 635 636 else: 637 return false 638 639 proc splice_conditional_exprs(list_result: DelimitedListResult): seq[EdnNode] = 640 var indices: seq[int] = @[] 641 var index = 0 642 var spliced_result: Option[seq[EdnNode]] = none(seq[EdnNode]) 643 644 for item in list_result.list: 645 if is_element_spliced(item): 646 indices.add(index) 647 if spliced_result.is_none(): 648 #backfill previous elems 649 spliced_result = some[seq[EdnNode]](@[]) 650 var j = 0 651 while j < index: 652 spliced_result.get().add(list_result.list[j]) 653 inc(j) 654 var elems_to_splice: seq[EdnNode] 655 case item.kind 656 of EdnVector: 657 elems_to_splice = item.vec 658 of EdnList: 659 elems_to_splice = item.list 660 else: 661 raise new_exception(ParseError, "Only vectors or list can be spliced in conditional reader expressions: " & $item.kind) 662 for to_splice in elems_to_splice: 663 spliced_result.get().add(to_splice) 664 elif spliced_result.is_some(): 665 spliced_result.get().add(item) 666 inc(index) 667 668 if spliced_result.is_some(): 669 return spliced_result.get() 670 else: 671 return list_result.list 672 673 proc read_list(p: var EdnParser): EdnNode = 674 result = EdnNode(kind: EdnList) 675 add_line_col_info(p, result) 676 var delimited_result = read_delimited_list(p, ')', {Recursive: true}.to_table()) 677 result.list = splice_conditional_exprs(delimited_result) 678 discard maybe_add_comments(result, delimited_result) 679 680 const 681 MAP_EVEN = "Map literal must contain even number of forms " 682 683 proc read_map(p: var EdnParser): EdnNode = 684 result = EdnNode(kind: EdnMap) 685 var list_result = read_delimited_list(p, '}', {Recursive: true}.to_table()) 686 var 687 list = splice_conditional_exprs(list_result) 688 index = 0 689 if (list.len and 1) == 1: 690 for x in list: 691 if index mod 2 == 0 and x.kind == EdnKeyword: 692 echo "MAP ELEM " & $x.kind & " " & $x.keyword 693 else: 694 echo "MAP ELEM " & $x.kind 695 inc(index) 696 let position = (p.line_number, get_col_number(p, p.bufpos)) 697 #echo "line ", getCurrentLine(p), " col: ", getColNumber(p, p.bufpos) 698 raise new_exception(ParseError, MAP_EVEN & $position & " " & $list.len & " " & p.filename) 699 else: 700 result.map = new_hmap() 701 var i = 0 702 while i <= list.high - 1: 703 result.map[list[i]] = list[i+1] 704 i = i + 2 705 add_line_col_info(p, result) 706 discard maybe_add_comments(result, list_result) 707 708 const 709 NS_MAP_INVALID = "Namespaced map must specify a valid namespace: kind $#, namespace $#, $#:$#" 710 NS_MAP_EVEN = "Namespaced map literal must contain an even number of forms" 711 712 proc read_ns_map(p: var EdnParser): EdnNode = 713 let n = read(p) 714 if n.kind != EdnSymbol or n.symbol.ns != "": 715 let ns_str = if n.symbol.ns == "": "nil" else: n.symbol.ns 716 raise new_exception(ParseError, format(NS_MAP_INVALID, n.kind, ns_str, p.filename, p.line_number)) 717 718 skip_ws(p) 719 720 if p.buf[p.bufpos] != '{': 721 raise new_exception(ParseError, "Namespaced map must specify a map") 722 inc(p.bufpos) 723 let list_result = read_delimited_list(p, '}', {Recursive: true}.to_table()) 724 let list = list_result.list #TODO: handle conditional splicing here 725 if (list.len and 1) == 1: 726 raise new_exception(ParseError, NS_MAP_EVEN) 727 var 728 map = new_hmap() 729 i = 0 730 while i < list.high: 731 var key = list[i] 732 inc(i) 733 var value = list[i] 734 inc(i) 735 case key.kind 736 of EdnKeyword: 737 if key.keyword.ns == "": 738 map[new_edn_keyword(n.symbol.name, key.keyword.name)] = value 739 elif key.keyword.ns == "_": 740 map[new_edn_keyword("", key.keyword.name)] = value 741 else: 742 map[key] = value 743 of EdnSymbol: 744 if key.symbol.ns == "": 745 map[new_edn_symbol(n.symbol.name, key.symbol.name)] = value 746 elif key.keyword.ns == "_": 747 map[new_edn_keyword("", key.symbol.name)] = value 748 else: 749 map[key] = value 750 else: 751 map[key] = value 752 753 result = EdnNode(kind: EdnMap, map: map) 754 discard maybe_add_comments(result, list_result) 755 756 proc read_vector(p: var EdnParser): EdnNode = 757 result = EdnNode(kind: EdnVector) 758 add_line_col_info(p, result) 759 let delimited_result = read_delimited_list(p, ']', {Recursive: true}.to_table()) 760 result.vec = splice_conditional_exprs(delimited_result) 761 discard maybe_add_comments(result, delimited_result) 762 763 proc read_set(p: var EdnParser): EdnNode = 764 result = EdnNode(kind: EdnSet) 765 add_line_col_info(p, result) 766 let list_result = read_delimited_list(p, '}', {Recursive: true}.to_table()) 767 var elements = list_result.list 768 discard maybe_add_comments(result, list_result) 769 var i = 0 770 # TODO: hmap_capacity(len(elements)) 771 result.set_elems = new_hmap() 772 while i <= elements.high: 773 result.set_elems[elements[i]] = new_edn_bool(true) 774 inc(i) 775 776 proc read_anonymous_fn*(p: var EdnParser): EdnNode = 777 # TODO: extract arglist from fn body 778 result = EdnNode(kind: EdnList) 779 var arglist = EdnNode(kind: EdnVector, vec: @[]) 780 result.list = @[new_edn_symbol("", "fn")] 781 # remember this one came from a macro 782 let meta = new_hmap() 783 meta[new_edn_keyword("", "from-reader-macro")] = new_edn_bool(true) 784 result.list_meta = meta 785 786 var list_result = read_delimited_list(p, ')', {Recursive: true}.to_table()) 787 for item in list_result.list: 788 result.list.add(item) 789 discard maybe_add_comments(result, list_result) 790 return result 791 792 proc safely_add_meta(node: EdnNode, meta: HMap): EdnNode {.gcsafe.} 793 794 const 795 READER_COND_MSG = "reader conditional should be a list: " 796 READER_COND_FEAT_KW = "feature should be a keyword: " 797 READER_COND_AS_TAGGED_ERR = "'asTagged' option not available for reader conditionals" 798 799 proc read_reader_conditional(p: var EdnParser): EdnNode = 800 # '#? (:clj ...)' 801 let pos = p.bufpos 802 var is_spliced: bool 803 if p.buf[pos] == '@': 804 is_spliced = true 805 inc(p.bufpos) 806 else: 807 is_spliced = false 808 809 let exp = read(p) 810 if exp.kind != EdnList: 811 raise new_exception(ParseError, READER_COND_MSG & $exp.kind) 812 var 813 i = 0 814 m = new_hmap() 815 while i <= exp.list.high: 816 let feature = exp.list[i] 817 if feature.kind != EdnKeyword: 818 raise new_exception(ParseError, READER_COND_FEAT_KW & $feature.kind & " line " & $p.line_number) 819 inc(i) 820 var val: EdnNode 821 if i <= exp.list.high: 822 val = exp.list[i] 823 # TODO: does not verify if we're trying to splice at toplevel 824 if is_spliced and (val.kind != EdnVector): 825 raise new_exception(ParseError, "Trying to splice a conditional expression with: " & $val.kind & ", element " & $i) 826 inc(i) 827 else: 828 let msg = format("No value for platform tag: $#, line $#", feature.keyword, feature.line) 829 raise new_exception(ParseError, msg) 830 m[feature] = val 831 832 let cond_exprs = p.options.conditional_exprs 833 case cond_exprs 834 of asError: 835 raise new_exception(ParseError, "Reader conditional occured") 836 of asTagged: 837 raise new_exception(ParseError, READER_COND_AS_TAGGED_ERR) 838 of cljSource: 839 let val = m[CljTag] 840 if is_some(val): 841 result = val.get 842 else: result = nil 843 of cljsSource: 844 let val = m[CljsTag] 845 if is_some(val): 846 result = val.get 847 else: result = nil 848 of ignoreConditionals: 849 return nil 850 851 # try the :default case 852 if result == nil: 853 let default_val = m[DefaultTag] 854 if is_some(default_val): 855 result = default_val.get 856 857 #TODO: better handle splicing - new node type or sth else? 858 if result != nil and is_spliced: 859 var hmap = new_hmap() 860 hmap[SplicedQKw] = EdnTrue 861 discard add_meta(result, hmap) 862 863 return result 864 865 866 867 868 const META_CANNOT_APPLY_MSG = 869 "Metadata can be applied only to symbols, lists, vectors and maps and sets. Got :" 870 871 proc add_meta*(node: EdnNode, meta: HMap): EdnNode = 872 case node.kind 873 of EdnSymbol: 874 node.symbol_meta = meta 875 of EdnList: 876 node.list_meta = meta 877 of EdnMap: 878 node.map_meta = meta 879 of EdnVector: 880 node.vec_meta = meta 881 of EdnSet: 882 node.set_meta = meta 883 else: 884 raise new_exception(ParseError, META_CANNOT_APPLY_MSG & $node.kind) 885 result = node 886 887 proc get_meta*(node: EdnNode): HMap = 888 case node.kind 889 of EdnSymbol: 890 return node.symbol_meta 891 of EdnList: 892 return node.list_meta 893 of EdnMap: 894 return node.map_meta 895 of EdnVector: 896 return node.vec_meta 897 of EdnSet: 898 return node.set_meta 899 else: 900 raise new_exception(ParseError, "Given type does not support metadata: " & $node.kind) 901 902 proc safely_add_meta(node: EdnNode, meta: HMap): EdnNode = 903 var node_meta = get_meta(node) 904 if node_meta == nil: 905 return add_meta(node, meta) 906 else: 907 merge_maps(node_meta, meta) 908 return node 909 910 const META_INVALID_MSG = 911 "Metadata must be EdnSymbol, EdnKeyword, EdnString or EdnMap" 912 913 proc read_metadata(p: var EdnParser): EdnNode = 914 var m: HMap 915 let old_opts = p.options 916 p.options.eof_is_error = true 917 var meta = read(p) 918 case meta.kind 919 of EdnSymbol: 920 m = new_hmap() 921 m[KeyTag] = meta 922 of EdnKeyword: 923 m = new_hmap() 924 m[meta] = EdnTrue 925 of EdnString: 926 m = new_hmap() 927 m[KeyTag] = meta 928 of EdnMap: 929 m = meta.map 930 of EdnSet: 931 m = meta.map 932 else: 933 p.options = old_opts 934 raise new_exception(ParseError, META_INVALID_MSG) 935 # read the actual data 936 try: 937 var node = read(p) 938 result = safely_add_meta(node, m) # need to make sure we don't overwrite 939 finally: 940 p.options = old_opts 941 942 proc read_tagged(p: var EdnParser): EdnNode = 943 var node = read(p) 944 if node.kind != EdnSymbol: 945 raise new_exception(ParseError, "tag should be a symbol: " & $node.kind) 946 result = EdnNode(kind: EdnTaggedValue, tag: node.symbol, value: read(p)) 947 948 proc read_cond_as_tagged(p: var EdnParser): EdnNode = 949 # reads forms like #+clj foo as EdnTaggedValue 950 var tagged = read_tagged(p) 951 tagged.tag = ("", "+" & tagged.tag.name) 952 return tagged 953 954 proc read_cond_matching(p: var EdnParser, tag: string): EdnNode = 955 var tagged = read_cond_as_tagged(p) 956 if tagged.kind == EdnTaggedValue: 957 if tagged.tag.name == tag: 958 return tagged.value 959 else: 960 return nil 961 raise new_exception(ParseError, "Expected a tagged value, got: " & $tagged.kind) 962 963 proc read_cond_clj(p:var EdnParser): EdnNode = 964 return read_cond_matching(p, "+clj") 965 966 proc read_cond_cljs(p:var EdnParser): EdnNode = 967 return read_cond_matching(p, "+cljs") 968 969 proc hash*(node: EdnNode): Hash = 970 var h: Hash = 0 971 h = h !& hash(node.kind) 972 case node.kind 973 of EdnNil: 974 h = h !& hash(0) 975 of EdnBool: 976 h = h !& hash(node.bool_val) 977 of EdnCharacter: 978 h = h !& hash(node.character) 979 of EdnInt: 980 h = h !& hash(node.num) 981 of EdnRatio: 982 h = h !& hash(node.rnum) 983 of EdnFloat: 984 h = h !& hash(node.fnum) 985 of EdnString: 986 h = h !& hash(node.str) 987 of EdnSymbol: 988 h = h !& hash(node.symbol) 989 of EdnKeyword: 990 h = h !& hash(node.keyword) 991 h = h !& hash(node.namespacing) 992 of EdnList: 993 h = h !& hash(node.list) 994 of EdnMap: 995 for entry in node.map: 996 h = h !& hash(entry.key) 997 h = h !& hash(entry.value) 998 of EdnVector: 999 h = h !& hash(node.vec) 1000 of EdnSet: 1001 for entry in node.set_elems: 1002 h = h !& hash(entry.key) 1003 h = h !& hash(entry.value) 1004 of EdnTaggedValue: 1005 h = h !& hash(node.tag) 1006 h = h !& hash(node.value) 1007 of EdnCommentLine: 1008 h = h !& hash(node.comment) 1009 of EdnRegex: 1010 h = h !& hash(node.regex) 1011 of EdnVarQuote: 1012 h = h !& hash(node.var_symbol) 1013 result = !$h 1014 1015 proc `==`*(this, that: EdnNode): bool = 1016 if this.is_nil: 1017 if that.is_nil: return true 1018 return false 1019 elif that.is_nil or this.kind != that.kind: 1020 return false 1021 else: 1022 case this.kind 1023 of EdnNil: 1024 return that.kind == EdnNil 1025 of EdnBool: 1026 return this.boolVal == that.boolVal 1027 of EdnCharacter: 1028 return this.character == that.character 1029 of EdnInt: 1030 return this.num == that.num 1031 of EdnRatio: 1032 return this.rnum == that.rnum 1033 of EdnFloat: 1034 return this.fnum == that.fnum 1035 of EdnString: 1036 return this.str == that.str 1037 of EdnSymbol: 1038 return this.symbol == that.symbol 1039 of EdnKeyword: 1040 return this.keyword == that.keyword and this.namespacing == that.namespacing 1041 of EdnList: 1042 return this.list == that.list 1043 of EdnMap: 1044 return this.map == that.map 1045 of EdnVector: 1046 return this.vec == that.vec 1047 of EdnSet: 1048 return this.set_elems == that.set_elems 1049 of EdnTaggedValue: 1050 return this.tag == that.tag and this.value == that.value 1051 of EdnCommentLine: 1052 return this.comment == that.comment 1053 of EdnRegex: 1054 return this.regex == that.regex 1055 of EdnVarQuote: 1056 return this.var_symbol == that.var_symbol 1057 1058 proc read_var_quote(p: var EdnParser): EdnNode = 1059 let node = read(p) 1060 case node.kind 1061 of EdnSymbol: 1062 result = EdnNode(kind: EdnVarQuote, var_symbol: node) 1063 else: 1064 raise new_exception(ParseError, "Attempted to read a var qote, but got" & $node.kind) 1065 1066 proc read_regex(p: var EdnParser): EdnNode = 1067 let s = read_string(p) 1068 result = EdnNode(kind: EdnRegex, regex: s.str) 1069 1070 proc read_unmatched_delimiter(p: var EdnParser): EdnNode = 1071 raise new_exception(ParseError, "Unmatched delimiter: " & p.buf[p.bufpos]) 1072 1073 proc read_discard(p: var EdnParser): EdnNode = 1074 discard read(p) 1075 result = nil 1076 1077 proc read_dispatch(p: var EdnParser): EdnNode = 1078 var pos = p.bufpos 1079 let ch = p.buf[pos] 1080 if ch == EndOfFile: 1081 raise new_exception(ParseError, "EOF while reading dispatch macro") 1082 let m = dispatch_macros[ch] 1083 if m == nil: 1084 if valid_utf8_alpha(ch): 1085 result = read_tagged(p) 1086 else: 1087 raise new_exception(ParseError, "No dispatch macro for: " & ch) 1088 else: 1089 p.bufpos = pos + 1 1090 result = m(p) 1091 1092 proc init_macro_array() = 1093 macros['"'] = read_string 1094 macros['\''] = read_quoted 1095 macros['`'] = read_quasi_quoted 1096 macros[';'] = read_comment 1097 macros['~'] = read_unquoted 1098 macros['@'] = read_deref 1099 macros['#'] = read_dispatch 1100 macros['^'] = read_metadata 1101 macros['\\'] = read_character 1102 macros['('] = read_list 1103 macros['{'] = read_map 1104 macros['['] = read_vector 1105 macros[')'] = read_unmatched_delimiter 1106 macros[']'] = read_unmatched_delimiter 1107 macros['}'] = read_unmatched_delimiter 1108 1109 proc init_dispatch_macro_array() = 1110 dispatch_macros['^'] = read_metadata 1111 dispatch_macros[':'] = read_ns_map 1112 dispatch_macros['{'] = read_set 1113 # dispatch_macros['<'] = nil # new UnreadableReader(); 1114 dispatch_macros['_'] = read_discard 1115 dispatch_macros['('] = read_anonymous_fn 1116 dispatch_macros['?'] = read_reader_conditional 1117 dispatch_macros['"'] = read_regex 1118 dispatch_macros['\''] = read_var_quote 1119 1120 proc init_edn_readers() = 1121 edn_true = EdnNode(kind: EdnBool, bool_val: true) 1122 edn_false = EdnNode(kind: EdnBool, bool_val: false) 1123 EdnTrue = edn_true 1124 EdnFalse = edn_false 1125 KeyTag = new_edn_keyword("", "tag") 1126 CljTag = new_edn_keyword("", "clj") 1127 CljsTag = new_edn_keyword("", "cljs") 1128 DefaultTag = new_edn_keyword("", "default") 1129 LineKw = new_edn_keyword("edn.nim", "line") 1130 ColumnKw = new_edn_keyword("edn.nim", "column") 1131 SplicedQKw = new_edn_keyword("edn.nim", "spliced?") 1132 init_macro_array() 1133 init_dispatch_macro_array() 1134 initialised = true 1135 1136 proc init_edn_readers*(options: ParseOptions) = 1137 case options.conditional_exprs 1138 of asError: 1139 discard # the default will throw on #+clj / #+cljs 1140 of asTagged: 1141 dispatch_macros['+'] = read_cond_as_tagged 1142 of cljSource: 1143 dispatch_macros['+'] = read_cond_clj 1144 of cljsSource: 1145 dispatch_macros['+'] = read_cond_cljs 1146 of ignoreConditionals: 1147 discard 1148 1149 init_edn_readers() 1150 1151 ### === HMap: a simple hash map ==== 1152 1153 proc new_hmap(capacity: int = 16): HMap = 1154 assert capacity >= 0 1155 new(result) 1156 result.buckets = new_seq[seq[HMapEntry]](capacity) 1157 result.count = 0 1158 1159 proc `[]=`*(m: HMap, key: EdnNode, val: EdnNode) = 1160 let h = hash(key) 1161 if m.count + 1 > int(0.75 * float(m.buckets.high)): 1162 var 1163 new_cap = if m.count == 0: 8 else: 2 * m.buckets.high 1164 tmp_map = new_hmap(new_cap) 1165 for b in m.buckets: 1166 if b.len > 0: 1167 for entry in b: 1168 tmp_map[entry.key] = entry.value 1169 tmp_map[key] = val 1170 m[] = tmp_map[] 1171 else: 1172 var bucket_index = h and m.buckets.high 1173 var entry = new(HMapEntry) 1174 entry.key = key 1175 entry.value = val 1176 if m.buckets[bucket_index].len == 0: 1177 m.buckets[bucket_index] = @[entry] 1178 inc(m.count) 1179 else: 1180 var overwritten = false 1181 for item in m.buckets[bucket_index]: 1182 if item.key == entry.key: 1183 item.value = val 1184 overwritten = true 1185 if not overwritten: 1186 m.buckets[bucket_index].add(entry) 1187 inc(m.count) 1188 1189 proc val_at*(m: HMap, key: EdnNode, default: EdnNode = nil): EdnNode = 1190 let 1191 h = hash(key) 1192 bucket_index = h and m.buckets.high 1193 bucket = m.buckets[bucket_index] 1194 result = default 1195 if bucket.len > 0: 1196 for entry in bucket: 1197 if entry.key == key: 1198 result = entry.value 1199 break 1200 1201 1202 1203 proc `[]`*(m: HMap, key: EdnNode): Option[EdnNode] = 1204 let 1205 default = EdnNode(kind: EdnBool, bool_val: true) 1206 found = val_at(m, key, default) 1207 pf = cast[pointer](found) 1208 pd = cast[pointer](default) 1209 if pd == pf: 1210 return none(EdnNode) 1211 else: 1212 return some(found) 1213 1214 proc merge_maps*(m1, m2 :HMap): void = 1215 for entry in m2: 1216 m1[entry.key] = entry.value 1217 1218 ### === TODO: name for this section ==== 1219 1220 proc open*(p: var EdnParser, input: Stream, filename: string) = 1221 lexbase.open(p, input) 1222 p.filename = filename 1223 p.a = "" 1224 1225 proc close*(p: var EdnParser) {.inline.} = 1226 lexbase.close(p) 1227 1228 proc get_line(p: EdnParser): int {.inline.} = 1229 result = p.line_number 1230 1231 proc get_column(p: EdnParser): int {.inline.} = 1232 result = get_col_number(p, p.bufpos) 1233 1234 proc get_filename(p: EdnParser): string = 1235 result = p.filename 1236 1237 proc parse_number(p: var EdnParser): TokenKind = 1238 result = TokenKind.tkEof 1239 var 1240 pos = p.bufpos 1241 buf = p.buf 1242 if (buf[pos] == '-') or (buf[pos] == '+'): 1243 add(p.a, buf[pos]) 1244 inc(pos) 1245 if buf[pos] == '.': 1246 add(p.a, "0.") 1247 inc(pos) 1248 result = tkFloat 1249 else: 1250 result = tkInt 1251 while buf[pos] in Digits: 1252 add(p.a, buf[pos]) 1253 inc(pos) 1254 if buf[pos] == '.': 1255 add(p.a, '.') 1256 inc(pos) 1257 result = tkFloat 1258 # digits after the dot 1259 while buf[pos] in Digits: 1260 add(p.a, buf[pos]) 1261 inc(pos) 1262 if buf[pos] in {'E', 'e'}: 1263 add(p.a, buf[pos]) 1264 inc(pos) 1265 result = tkFloat 1266 if buf[pos] in {'+', '-'}: 1267 add(p.a, buf[pos]) 1268 inc(pos) 1269 while buf[pos] in Digits: 1270 add(p.a, buf[pos]) 1271 inc(pos) 1272 p.bufpos = pos 1273 1274 proc read_num(p: var EdnParser): EdnNode = 1275 var num_result = parse_number(p) 1276 let opts = p.options 1277 case num_result 1278 of tkEof: 1279 if opts.eof_is_error: 1280 raise new_exception(ParseError, "EOF while reading") 1281 else: 1282 result = nil 1283 of tkInt: 1284 if p.buf[p.bufpos] == '/': 1285 if not isDigit(p.buf[p.bufpos+1]): 1286 let e = err_info(p) 1287 raise new_exception(ParseError, "error reading a ratio: " & $e) 1288 var numerator = new_edn_int(p.a) 1289 inc(p.bufpos) 1290 p.a = "" 1291 var denom_tok = parse_number(p) 1292 if denom_tok == tkInt: 1293 var denom = new_edn_int(p.a) 1294 result = new_edn_ratio(numerator.num, denom.num) 1295 else: 1296 raise new_exception(ParseError, "error reading a ratio: " & p.a) 1297 elif p.buf[p.bufpos] == 'M': #TODO: for now... 1298 inc(p.bufpos) 1299 result = new_edn_int(p.a) 1300 else: 1301 result = new_edn_int(p.a) 1302 of tkFloat: 1303 if p.buf[p.bufpos] == 'M': #TODO: for now... 1304 inc(p.bufpos) 1305 result = new_edn_float(p.a) 1306 of tkError: 1307 raise new_exception(ParseError, "error reading a number: " & p.a) 1308 else: 1309 raise new_exception(ParseError, "error reading a number (?): " & p.a) 1310 1311 proc read_internal(p: var EdnParser): EdnNode = 1312 setLen(p.a, 0) 1313 skip_ws(p) 1314 let ch = p.buf[p.bufpos] 1315 let opts = p.options 1316 var token: string 1317 case ch 1318 of EndOfFile: 1319 if opts.eof_is_error: 1320 let position = (p.line_number, get_col_number(p, p.bufpos)) 1321 raise new_exception(ParseError, "EOF while reading " & $position) 1322 else: 1323 p.token = tkEof 1324 return opts.eof_value 1325 of '0'..'9': 1326 return read_num(p) 1327 elif is_macro(ch): 1328 let m = macros[ch] # save line:col metadata here? 1329 inc(p.bufpos) 1330 return m(p) 1331 elif ch in {'+', '-'}: 1332 if isDigit(p.buf[p.bufpos + 1]): 1333 return read_num(p) 1334 else: 1335 let column = getColNumber(p, p.bufpos) 1336 token = read_token(p, false) 1337 let line_num = p.line_number 1338 result = interpret_token(token) 1339 result.line = line_num 1340 result.column = column 1341 return result 1342 1343 1344 let column = getColNumber(p, p.bufpos) 1345 token = read_token(p, true) 1346 if opts.suppress_read: 1347 result = nil 1348 else: 1349 let line_num = p.line_number 1350 result = interpret_token(token) 1351 result.line = line_num 1352 result.column = column 1353 1354 proc read*(p: var EdnParser): EdnNode = 1355 if not initialised: 1356 init_edn_readers() 1357 init_edn_readers(p.options) 1358 result = read_internal(p) 1359 let noComments = p.options.comments_handling == discardComments 1360 while result != nil and noComments and result.kind == EdnCommentLine: 1361 result = read_internal(p) 1362 1363 proc read*(s: Stream, filename: string): EdnNode = 1364 var p: EdnParser 1365 var opts: ParseOptions 1366 opts.eof_is_error = true 1367 opts.suppress_read = false 1368 opts.conditional_exprs = asError 1369 opts.comments_handling = discardComments 1370 p.options = opts 1371 p.open(s, filename) 1372 defer: p.close() 1373 result = read(p) 1374 1375 proc read*(buffer: string): EdnNode = 1376 result = read(new_string_stream(buffer), "*input*") 1377 1378 proc read*(buffer: string, options: ParseOptions): EdnNode = 1379 var 1380 p: EdnParser 1381 s = new_string_stream(buffer) 1382 p.options = options 1383 p.open(s, "*input*") 1384 defer: p.close() 1385 result = read(p) 1386 1387 proc `$`*(node: EdnNode): string = 1388 case node.kind 1389 of EdnKeyword: 1390 case node.namespacing 1391 of NoNamespace: 1392 result = ":" & node.keyword.name 1393 of LocalNamespace: 1394 result = "::" & node.keyword.name 1395 of NonLocalNamespace: 1396 result = "::" & node.keyword.ns & "/" & node.keyword.name 1397 of FullNamespace: 1398 result = ":" & node.keyword.ns & "/" & node.keyword.name 1399 of EdnSymbol: 1400 if node.symbol.ns == "": 1401 result = node.symbol.name 1402 else: 1403 result = node.symbol.ns & "/" & node.symbol.name 1404 else: 1405 assert(false) 1406 1407 # DONE: handling cond forms that are returned as nil (e.g. ommited) 1408 # TODO: special comments handlers experimenting with literate progrmming 1409 # TODO: util method for reading strings but accepting ParseOpts 1410 # TODO: asTagged untested or not working? maybe drop?