parser.lua
1 -- MIT License 2 -- 3 -- Copyright (c) 2018 Kouhei Sutou <kou@clear-code.com> 4 -- 5 -- Permission is hereby granted, free of charge, to any person obtaining a copy 6 -- of this software and associated documentation files (the "Software"), to deal 7 -- in the Software without restriction, including without limitation the rights 8 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 -- copies of the Software, and to permit persons to whom the Software is 10 -- furnished to do so, subject to the following conditions: 11 -- 12 -- The above copyright notice and this permission notice shall be included in all 13 -- copies or substantial portions of the Software. 14 -- 15 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 -- SOFTWARE. 22 23 -- CHANGE LOG 24 -- (16 May 2024, chmod777) 25 -- * Replace spaces with tabs 26 -- * Parse full rulesets 27 28 -- RESOURCES 29 -- * https://www.w3.org/TR/CSS21/grammar.html 30 31 32 local Parser = {} 33 34 local Property 35 local Source 36 if VFS then 37 local luaWidgetDir = 'LuaUI/Widgets/' 38 PropertyParser = VFS.Include(luaWidgetDir..'chmod777_includes/html_css/css_property_parser.lua').CSSPropertySelector 39 Source = VFS.Include(luaWidgetDir..'chmod777_includes/html_css/modified_luacs/source.lua') 40 end 41 42 local methods = {} 43 44 local metatable = {} 45 function metatable.__index(parser, key) 46 return methods[key] 47 end 48 49 -- Specification: https://www.w3.org/TR/selectors-3/ 50 -- 51 -- Grammar: 52 -- 53 -- selectors_group 54 -- : selector [ COMMA S* selector ]* 55 -- ; 56 -- 57 -- selector 58 -- : simple_selector_sequence [ combinator simple_selector_sequence ]* 59 -- ; 60 -- 61 -- combinator 62 -- /* combinators can be surrounded by whitespace */ 63 -- : PLUS S* | GREATER S* | TILDE S* | S+ 64 -- ; 65 -- 66 -- simple_selector_sequence 67 -- : [ type_selector | universal ] 68 -- [ HASH | class | attrib | pseudo | negation ]* 69 -- | [ HASH | class | attrib | pseudo | negation ]+ 70 -- ; 71 -- 72 -- type_selector 73 -- : [ namespace_prefix ]? element_name 74 -- ; 75 -- 76 -- namespace_prefix 77 -- : [ IDENT | '*' ]? '|' 78 -- ; 79 -- 80 -- element_name 81 -- : IDENT 82 -- ; 83 -- 84 -- universal 85 -- : [ namespace_prefix ]? '*' 86 -- ; 87 -- 88 -- class 89 -- : '.' IDENT 90 -- ; 91 -- 92 -- attrib 93 -- : '[' S* [ namespace_prefix ]? IDENT S* 94 -- [ [ PREFIXMATCH | 95 -- SUFFIXMATCH | 96 -- SUBSTRINGMATCH | 97 -- '=' | 98 -- INCLUDES | 99 -- DASHMATCH ] S* [ IDENT | STRING ] S* 100 -- ]? ']' 101 -- ; 102 -- 103 -- pseudo 104 -- /* '::' starts a pseudo-element, ':' a pseudo-class */ 105 -- /* Exceptions: :first-line, :first-letter, :before and :after. */ 106 -- /* Note that pseudo-elements are restricted to one per selector and */ 107 -- /* occur only in the last simple_selector_sequence. */ 108 -- : ':' ':'? [ IDENT | functional_pseudo ] 109 -- ; 110 -- 111 -- functional_pseudo 112 -- : FUNCTION S* expression ')' 113 -- ; 114 -- 115 -- expression 116 -- /* In CSS3, the expressions are identifiers, strings, */ 117 -- /* or of the form "an+b" */ 118 -- : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+ 119 -- ; 120 -- 121 -- negation 122 -- : NOT S* negation_arg S* ')' 123 -- ; 124 -- 125 -- negation_arg 126 -- : type_selector | universal | HASH | class | attrib | pseudo 127 -- ; 128 -- 129 -- 130 -- Lexer: 131 -- 132 -- %option case-insensitive 133 -- 134 -- ident [-]?{nmstart}{nmchar}* 135 -- name {nmchar}+ 136 -- nmstart [_a-z]|{nonascii}|{escape} 137 -- nonascii [^\0-\177] 138 -- unicode \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])? 139 -- escape {unicode}|\\[^\n\r\f0-9a-f] 140 -- nmchar [_a-z0-9-]|{nonascii}|{escape} 141 -- num [0-9]+|[0-9]*\.[0-9]+ 142 -- string {string1}|{string2} 143 -- string1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\" 144 -- string2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\' 145 -- invalid {invalid1}|{invalid2} 146 -- invalid1 \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})* 147 -- invalid2 \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})* 148 -- nl \n|\r\n|\r|\f 149 -- w [ \t\r\n\f]* 150 -- 151 -- D d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])? 152 -- E e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])? 153 -- N n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n 154 -- O o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o 155 -- T t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t 156 -- V v|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\v 157 -- 158 -- %% 159 -- 160 -- [ \t\r\n\f]+ return S; 161 -- 162 -- "~=" return INCLUDES; 163 -- "|=" return DASHMATCH; 164 -- "^=" return PREFIXMATCH; 165 -- "$=" return SUFFIXMATCH; 166 -- "*=" return SUBSTRINGMATCH; 167 -- {ident} return IDENT; 168 -- {string} return STRING; 169 -- {ident}"(" return FUNCTION; 170 -- {num} return NUMBER; 171 -- "#"{name} return HASH; 172 -- {w}"+" return PLUS; 173 -- {w}">" return GREATER; 174 -- {w}"," return COMMA; 175 -- {w}"~" return TILDE; 176 -- ":"{N}{O}{T}"(" return NOT; 177 -- @{ident} return ATKEYWORD; 178 -- {invalid} return INVALID; 179 -- {num}% return PERCENTAGE; 180 -- {num}{ident} return DIMENSION; 181 -- "<!--" return CDO; 182 -- "-->" return CDC; 183 -- 184 -- \/\*[^*]*\*+([^/*][^*]*\*+)*\/ /* ignore comments */ 185 -- 186 -- . return *yytext; 187 188 local function on(parser, name, ...) 189 local listener = parser.listener 190 local callback = listener["on_" .. name] 191 -- Spring.Echo("on_" .. name, ...) 192 if callback then 193 callback(listener, ...) 194 end 195 end 196 197 local function type_selector(parser) 198 local source = parser.source 199 local position = source.position 200 local namespace_prefix = source:match_namespace_prefix() 201 local element_name = source:match_ident() 202 203 if not element_name then 204 source:seek(position) 205 return false 206 end 207 208 on(parser, "type_selector", namespace_prefix, element_name) 209 return true 210 end 211 212 local function universal(parser) 213 local source = parser.source 214 local position = source.position 215 local namespace_prefix = source:match_namespace_prefix() 216 local asterisk = source:match("%*") 217 218 if not asterisk then 219 source:seek(position) 220 return false 221 end 222 223 on(parser, "universal", namespace_prefix) 224 return true 225 end 226 227 local function hash(parser) 228 local name = parser.source:match_hash() 229 if name then 230 on(parser, "hash", name) 231 return true 232 else 233 return false 234 end 235 end 236 237 local function class(parser) 238 local source = parser.source 239 local position = source.position 240 241 if not source:match("%.") then 242 return false 243 end 244 245 local name = parser.source:match_ident() 246 if name then 247 on(parser, "class", name) 248 return true 249 else 250 source:seek(position) 251 return false 252 end 253 end 254 255 local function attribute(parser) 256 local source = parser.source 257 local position = source.position 258 259 if not source:match("%[") then 260 return false 261 end 262 263 source:match_whitespaces() 264 265 local position_name = source.position 266 local namespace_prefix = source:match_namespace_prefix() 267 268 local name = parser.source:match_ident() 269 if not name then 270 source:seek(position_name) 271 namespace_prefix = nil 272 name = source:match_ident() 273 if not name then 274 source:seek(position) 275 return false 276 end 277 end 278 279 source:match_whitespaces() 280 281 local operator = nil 282 if source:match("%^=") then 283 operator = "^=" 284 elseif source:match("%$=") then 285 operator = "$=" 286 elseif source:match("%*=") then 287 operator = "*=" 288 elseif source:match("=") then 289 operator = "=" 290 elseif source:match("~=") then 291 operator = "~=" 292 elseif source:match("|=") then 293 operator = "|=" 294 end 295 296 local value = nil 297 if operator then 298 source:match_whitespaces() 299 value = source:match_ident() 300 if not value then 301 value = source:match_string() 302 end 303 if not value then 304 source:seek(position) 305 return false 306 end 307 source:match_whitespaces() 308 end 309 310 if not source:match("%]") then 311 source:seek(position) 312 return false 313 end 314 315 on(parser, "attribute", namespace_prefix, name, operator, value) 316 return true 317 end 318 319 local function expression_component(parser, expression) 320 local source = parser.source 321 322 if source:match("%+") then 323 table.insert(expression, {"plus"}) 324 return true 325 end 326 327 if source:match("-") then 328 table.insert(expression, {"minus"}) 329 return true 330 end 331 332 local dimension = source:match_dimension() 333 if dimension then 334 table.insert(expression, {"dimension", dimension}) 335 return true 336 end 337 338 local number = source:match_number() 339 if number then 340 table.insert(expression, {"number", number}) 341 return true 342 end 343 344 local string = source:match_string() 345 if string then 346 table.insert(expression, {"string", string}) 347 return true 348 end 349 350 local name = source:match_ident() 351 if name then 352 table.insert(expression, {"name", name}) 353 return true 354 end 355 356 return false 357 end 358 359 local function functional_pseudo(parser) 360 local source = parser.source 361 local position = source.position 362 363 local function_name = source:match_ident() 364 if not function_name then 365 return false 366 end 367 368 if not source:match("%(") then 369 source:seek(position) 370 return false 371 end 372 373 local expression = {} 374 while true do 375 source:match_whitespaces() 376 if not expression_component(parser, expression) then 377 break 378 end 379 end 380 381 if #expression == 0 then 382 source:seek(position) 383 return false 384 end 385 386 if source:match("%)") then 387 on(parser, "functional_pseudo", function_name, expression) 388 return true 389 else 390 source:seek(position) 391 return false 392 end 393 end 394 395 local function pseudo(parser) 396 local source = parser.source 397 local position = source.position 398 399 if not source:match(":") then 400 return false 401 end 402 403 local event_name 404 if source:match(":") then 405 event_name = "pseudo_element" 406 else 407 event_name = "pseudo_class" 408 end 409 410 if functional_pseudo(parser) then 411 return true 412 end 413 414 local name = source:match_ident() 415 if name then 416 on(parser, event_name, name) 417 return true 418 else 419 source:seek(position) 420 return false 421 end 422 end 423 424 local function negation(parser) 425 local source = parser.source 426 local position = source.position 427 428 if not source:match(":not%(") then 429 return false 430 end 431 432 on(parser, "start_negation") 433 source:match_whitespaces() 434 if type_selector(parser) or 435 universal(parser) or 436 hash(parser) or 437 class(parser) or 438 attribute(parser) or 439 pseudo(parser) then 440 source:match_whitespaces() 441 if source:match("%)") then 442 on(parser, "end_negation") 443 return true 444 else 445 source:seek(position) 446 return false 447 end 448 else 449 source:seek(position) 450 return false 451 end 452 end 453 454 local function simple_selector_sequence(parser) 455 on(parser, "start_simple_selector_sequence") 456 local n_required = 1 457 if type_selector(parser) or universal(parser) then 458 n_required = 0 459 end 460 local n_occurred = 0 461 while hash(parser) or 462 class(parser) or 463 attribute(parser) or 464 negation(parser) or 465 pseudo(parser) do 466 n_occurred = n_occurred + 1 467 end 468 local success = (n_occurred >= n_required) 469 if success then 470 on(parser, "end_simple_selector_sequence") 471 end 472 return success 473 end 474 475 local function combinator(parser) 476 local source = parser.source 477 local position = source.position 478 479 local whitespaces = source:match_whitespaces() 480 481 if source:match("%+") then 482 source:match_whitespaces() 483 on(parser, "combinator", "+") 484 return "+" 485 elseif source:match(">") then 486 source:match_whitespaces() 487 on(parser, "combinator", ">") 488 return ">" 489 elseif source:match("~") then 490 source:match_whitespaces() 491 on(parser, "combinator", "~") 492 return "~" 493 elseif whitespaces then 494 on(parser, "combinator", " ") 495 return " " 496 else 497 source:seek(position) 498 return false 499 end 500 end 501 502 local function parse_selector(parser) 503 on(parser, "start_selector") 504 local source = parser.source 505 local position = source.position 506 507 if not simple_selector_sequence(parser) then 508 return false 509 end 510 511 while true do 512 local combinator_current = combinator(parser) 513 if not combinator_current then 514 break 515 end 516 if not simple_selector_sequence(parser) then 517 if combinator_current == " " then 518 break 519 end 520 return false 521 end 522 end 523 on(parser, "end_selector") 524 return true 525 end 526 527 local function parse_selectors_group(parser) 528 local source = parser.source 529 -- local postition = source.position 530 531 source:match_whitespaces() 532 on(parser, "start_selectors_group") 533 if not parse_selector(parser) then 534 -- error("Failed to parse CSS selectors group: " .. 535 -- "must have at least one selector: " .. 536 -- "<" .. parser.source:inspect() .. ">") 537 return false 538 end 539 while true do 540 source:match_whitespaces() 541 if not source:match(",") then 542 break 543 end 544 source:match_whitespaces() 545 if not parse_selector(parser) then 546 -- error("Failed to parse CSS selectors group: " .. 547 -- "must have selector after ',': " .. 548 -- "<" .. parser.source:inspect() .. ">") 549 return false 550 end 551 end 552 source:match_whitespaces() 553 if #source.data ~= source.position - 1 then 554 -- error("Failed to parse CSS selectors group: " .. 555 -- "there is garbage after selectors group: " .. 556 -- "<" .. parser.source:inspect() .. ">") 557 end 558 on(parser, "end_selectors_group") 559 return true 560 end 561 562 function parse_declaration(parser) 563 local source = parser.source 564 local position = source.position 565 566 local property_name = source:match_ident() 567 if not property_name then 568 source:seek(position) 569 return false 570 end 571 572 if not source:match(":") then 573 source:seek(position) 574 return false 575 end 576 source:match_whitespaces() 577 578 local property_parser = PropertyParser[property_name] 579 if property_parser then 580 local value = property_parser(property_name, parser) 581 if value then 582 source:match_whitespaces() 583 if not source:match(";") then 584 Spring.Echo("bad ending: did not find ';'") 585 return false 586 end 587 588 if property_name == 'background-position' then 589 value:pretty_print() 590 end 591 592 -- unpack certain properties that are shorthands 593 if property_name == "padding" then 594 local t,r,b,l = "padding-top","padding-right","padding-bottom","padding-left" 595 local top,right,bottom,left = unpack(value.value) 596 on(parser, "declaration", t, { value = top }) 597 on(parser, "declaration", r, { value = right }) 598 on(parser, "declaration", b, { value = bottom }) 599 on(parser, "declaration", l, { value = left }) 600 elseif property_name == "border-width" then 601 local t,r,b,l = "border-top-width","border-right-width","border-bottom-width","border-left-width" 602 local top,right,bottom,left = unpack(value.value) 603 on(parser, "declaration", t, { value = top }) 604 on(parser, "declaration", r, { value = right }) 605 on(parser, "declaration", b, { value = bottom }) 606 on(parser, "declaration", l, { value = left }) 607 elseif property_name == "margin" then 608 local t,r,b,l = "margin-top","margin-right","margin-bottom","margin-left" 609 local top,right,bottom,left = unpack(value.value) 610 on(parser, "declaration", t, { value = top }) 611 on(parser, "declaration", r, { value = right }) 612 on(parser, "declaration", b, { value = bottom }) 613 on(parser, "declaration", l, { value = left }) 614 else 615 on(parser, "declaration", property_name, value) 616 end 617 return true 618 end 619 end 620 621 local start_value = source.position 622 while not source:match(";") do 623 source:seek(source.position+1) 624 end 625 626 if source.position-2 <= start_value then 627 return false 628 end 629 local value = source.data:sub(start_value, source.position-2) 630 631 source:match_whitespaces() 632 on(parser, "declaration", property_name, value) 633 return true 634 end 635 636 function parse_declarations(parser) 637 local source = parser.source 638 local position = source.position 639 if not source:match("{") then 640 return false 641 end 642 source:match_whitespaces() 643 while parse_declaration(parser) do 644 source:match_whitespaces() 645 end 646 source:match_whitespaces() 647 if not source:match("}") then 648 return false 649 end 650 return true 651 end 652 653 function parse_ruleset(parser) 654 on(parser, "start_ruleset") 655 if not parse_selectors_group(parser) then 656 return false 657 end 658 if not parse_declarations(parser) then 659 return false 660 end 661 on(parser, "end_ruleset") 662 return true 663 end 664 665 function methods:parse() 666 on(self, "start_css") 667 while parse_ruleset(self) do 668 self.source:match_whitespaces() 669 end 670 on(self, "end_css") 671 end 672 673 function Parser.new(input, listener) 674 local parser = { 675 source = Source.new(input), 676 listener = listener, 677 } 678 setmetatable(parser, metatable) 679 return parser 680 end 681 682 return Parser