parser.lua
  1  -- MIT License
  2  -- 
  3  -- Copyright (c) 2018 Kouhei Sutou <kou@clear-code.com>
  4  -- 
  5  -- Permission is hereby granted, free of charge, to any person obtaining a copy
  6  -- of this software and associated documentation files (the "Software"), to deal
  7  -- in the Software without restriction, including without limitation the rights
  8  -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9  -- copies of the Software, and to permit persons to whom the Software is
 10  -- furnished to do so, subject to the following conditions:
 11  -- 
 12  -- The above copyright notice and this permission notice shall be included in all
 13  -- copies or substantial portions of the Software.
 14  -- 
 15  -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16  -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17  -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18  -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19  -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20  -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21  -- SOFTWARE.
 22  
 23  -- CHANGE LOG
 24  -- (16 May 2024, chmod777)
 25  --  * Replace spaces with tabs
 26  --  * Parse full rulesets
 27  
 28  -- RESOURCES
 29  -- * https://www.w3.org/TR/CSS21/grammar.html
 30  
 31  
 32  local Parser = {}
 33  
 34  local Property
 35  local Source
 36  if VFS then
 37  	local luaWidgetDir = 'LuaUI/Widgets/'
 38  	PropertyParser = VFS.Include(luaWidgetDir..'chmod777_includes/html_css/css_property_parser.lua').CSSPropertySelector
 39  	Source = VFS.Include(luaWidgetDir..'chmod777_includes/html_css/modified_luacs/source.lua')
 40  end
 41  
 42  local methods = {}
 43  
 44  local metatable = {}
 45  function metatable.__index(parser, key)
 46  	return methods[key]
 47  end
 48  
 49  -- Specification: https://www.w3.org/TR/selectors-3/
 50  --
 51  -- Grammar:
 52  --
 53  -- selectors_group
 54  --   : selector [ COMMA S* selector ]*
 55  --   ;
 56  --
 57  -- selector
 58  --   : simple_selector_sequence [ combinator simple_selector_sequence ]*
 59  --   ;
 60  --
 61  -- combinator
 62  --   /* combinators can be surrounded by whitespace */
 63  --   : PLUS S* | GREATER S* | TILDE S* | S+
 64  --   ;
 65  --
 66  -- simple_selector_sequence
 67  --   : [ type_selector | universal ]
 68  --     [ HASH | class | attrib | pseudo | negation ]*
 69  --   | [ HASH | class | attrib | pseudo | negation ]+
 70  --   ;
 71  --
 72  -- type_selector
 73  --   : [ namespace_prefix ]? element_name
 74  --   ;
 75  --
 76  -- namespace_prefix
 77  --   : [ IDENT | '*' ]? '|'
 78  --   ;
 79  --
 80  -- element_name
 81  --   : IDENT
 82  --   ;
 83  --
 84  -- universal
 85  --   : [ namespace_prefix ]? '*'
 86  --   ;
 87  --
 88  -- class
 89  --   : '.' IDENT
 90  --   ;
 91  --
 92  -- attrib
 93  --   : '[' S* [ namespace_prefix ]? IDENT S*
 94  --         [ [ PREFIXMATCH |
 95  --             SUFFIXMATCH |
 96  --             SUBSTRINGMATCH |
 97  --             '=' |
 98  --             INCLUDES |
 99  --             DASHMATCH ] S* [ IDENT | STRING ] S*
100  --         ]? ']'
101  --   ;
102  --
103  -- pseudo
104  --   /* '::' starts a pseudo-element, ':' a pseudo-class */
105  --   /* Exceptions: :first-line, :first-letter, :before and :after. */
106  --   /* Note that pseudo-elements are restricted to one per selector and */
107  --   /* occur only in the last simple_selector_sequence. */
108  --   : ':' ':'? [ IDENT | functional_pseudo ]
109  --   ;
110  --
111  -- functional_pseudo
112  --   : FUNCTION S* expression ')'
113  --   ;
114  --
115  -- expression
116  --   /* In CSS3, the expressions are identifiers, strings, */
117  --   /* or of the form "an+b" */
118  --   : [ [ PLUS | '-' | DIMENSION | NUMBER | STRING | IDENT ] S* ]+
119  --   ;
120  --
121  -- negation
122  --   : NOT S* negation_arg S* ')'
123  --   ;
124  --
125  -- negation_arg
126  --   : type_selector | universal | HASH | class | attrib | pseudo
127  --   ;
128  --
129  --
130  -- Lexer:
131  --
132  -- %option case-insensitive
133  --
134  -- ident     [-]?{nmstart}{nmchar}*
135  -- name      {nmchar}+
136  -- nmstart   [_a-z]|{nonascii}|{escape}
137  -- nonascii  [^\0-\177]
138  -- unicode   \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
139  -- escape    {unicode}|\\[^\n\r\f0-9a-f]
140  -- nmchar    [_a-z0-9-]|{nonascii}|{escape}
141  -- num       [0-9]+|[0-9]*\.[0-9]+
142  -- string    {string1}|{string2}
143  -- string1   \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*\"
144  -- string2   \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*\'
145  -- invalid   {invalid1}|{invalid2}
146  -- invalid1  \"([^\n\r\f\\"]|\\{nl}|{nonascii}|{escape})*
147  -- invalid2  \'([^\n\r\f\\']|\\{nl}|{nonascii}|{escape})*
148  -- nl        \n|\r\n|\r|\f
149  -- w         [ \t\r\n\f]*
150  --
151  -- D         d|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?
152  -- E         e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?
153  -- N         n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n
154  -- O         o|\\0{0,4}(4f|6f)(\r\n|[ \t\r\n\f])?|\\o
155  -- T         t|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t
156  -- V         v|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\v
157  --
158  -- %%
159  --
160  -- [ \t\r\n\f]+     return S;
161  --
162  -- "~="             return INCLUDES;
163  -- "|="             return DASHMATCH;
164  -- "^="             return PREFIXMATCH;
165  -- "$="             return SUFFIXMATCH;
166  -- "*="             return SUBSTRINGMATCH;
167  -- {ident}          return IDENT;
168  -- {string}         return STRING;
169  -- {ident}"("       return FUNCTION;
170  -- {num}            return NUMBER;
171  -- "#"{name}        return HASH;
172  -- {w}"+"           return PLUS;
173  -- {w}">"           return GREATER;
174  -- {w}","           return COMMA;
175  -- {w}"~"           return TILDE;
176  -- ":"{N}{O}{T}"("  return NOT;
177  -- @{ident}         return ATKEYWORD;
178  -- {invalid}        return INVALID;
179  -- {num}%           return PERCENTAGE;
180  -- {num}{ident}     return DIMENSION;
181  -- "<!--"           return CDO;
182  -- "-->"            return CDC;
183  --
184  -- \/\*[^*]*\*+([^/*][^*]*\*+)*\/                    /* ignore comments */
185  --
186  -- .                return *yytext;
187  
188  local function on(parser, name, ...)
189  	local listener = parser.listener
190  	local callback = listener["on_" .. name]
191  	-- Spring.Echo("on_" .. name, ...)
192  	if callback then
193  		callback(listener, ...)
194  	end
195  end
196  
197  local function type_selector(parser)
198  	local source = parser.source
199  	local position = source.position
200  	local namespace_prefix = source:match_namespace_prefix()
201  	local element_name = source:match_ident()
202  
203  	if not element_name then
204  		source:seek(position)
205  		return false
206  	end
207  
208  	on(parser, "type_selector", namespace_prefix, element_name)
209  	return true
210  end
211  
212  local function universal(parser)
213  	local source = parser.source
214  	local position = source.position
215  	local namespace_prefix = source:match_namespace_prefix()
216  	local asterisk = source:match("%*")
217  
218  	if not asterisk then
219  		source:seek(position)
220  		return false
221  	end
222  
223  	on(parser, "universal", namespace_prefix)
224  	return true
225  end
226  
227  local function hash(parser)
228  	local name = parser.source:match_hash()
229  	if name then
230  		on(parser, "hash", name)
231  		return true
232  	else
233  		return false
234  	end
235  end
236  
237  local function class(parser)
238  	local source = parser.source
239  	local position = source.position
240  
241  	if not source:match("%.") then
242  		return false
243  	end
244  
245  	local name = parser.source:match_ident()
246  	if name then
247  		on(parser, "class", name)
248  		return true
249  	else
250  		source:seek(position)
251  		return false
252  	end
253  end
254  
255  local function attribute(parser)
256  	local source = parser.source
257  	local position = source.position
258  
259  	if not source:match("%[") then
260  		return false
261  	end
262  
263  	source:match_whitespaces()
264  
265  	local position_name = source.position
266  	local namespace_prefix = source:match_namespace_prefix()
267  
268  	local name = parser.source:match_ident()
269  	if not name then
270  		source:seek(position_name)
271  		namespace_prefix = nil
272  		name = source:match_ident()
273  		if not name then
274  			source:seek(position)
275  			return false
276  		end
277  	end
278  
279  	source:match_whitespaces()
280  
281  	local operator = nil
282  	if source:match("%^=") then
283  		operator = "^="
284  	elseif source:match("%$=") then
285  		operator = "$="
286  	elseif source:match("%*=") then
287  		operator = "*="
288  	elseif source:match("=") then
289  		operator = "="
290  	elseif source:match("~=") then
291  		operator = "~="
292  	elseif source:match("|=") then
293  		operator = "|="
294  	end
295  
296  	local value = nil
297  	if operator then
298  		source:match_whitespaces()
299  		value = source:match_ident()
300  		if not value then
301  			value = source:match_string()
302  		end
303  		if not value then
304  			source:seek(position)
305  			return false
306  		end
307  		source:match_whitespaces()
308  	end
309  
310  	if not source:match("%]") then
311  		source:seek(position)
312  		return false
313  	end
314  
315  	on(parser, "attribute", namespace_prefix, name, operator, value)
316  	return true
317  end
318  
319  local function expression_component(parser, expression)
320  	local source = parser.source
321  
322  	if source:match("%+") then
323  		table.insert(expression, {"plus"})
324  		return true
325  	end
326  
327  	if source:match("-") then
328  		table.insert(expression, {"minus"})
329  		return true
330  	end
331  
332  	local dimension = source:match_dimension()
333  	if dimension then
334  		table.insert(expression, {"dimension", dimension})
335  		return true
336  	end
337  
338  	local number = source:match_number()
339  	if number then
340  		table.insert(expression, {"number", number})
341  		return true
342  	end
343  
344  	local string = source:match_string()
345  	if string then
346  		table.insert(expression, {"string", string})
347  		return true
348  	end
349  
350  	local name = source:match_ident()
351  	if name then
352  		table.insert(expression, {"name", name})
353  		return true
354  	end
355  
356  	return false
357  end
358  
359  local function functional_pseudo(parser)
360  	local source = parser.source
361  	local position = source.position
362  
363  	local function_name = source:match_ident()
364  	if not function_name then
365  		return false
366  	end
367  
368  	if not source:match("%(") then
369  		source:seek(position)
370  		return false
371  	end
372  
373  	local expression = {}
374  	while true do
375  		source:match_whitespaces()
376  		if not expression_component(parser, expression) then
377  			break
378  		end
379  	end
380  
381  	if #expression == 0 then
382  		source:seek(position)
383  		return false
384  	end
385  
386  	if source:match("%)") then
387  		on(parser, "functional_pseudo", function_name, expression)
388  		return true
389  	else
390  		source:seek(position)
391  		return false
392  	end
393  end
394  
395  local function pseudo(parser)
396  	local source = parser.source
397  	local position = source.position
398  
399  	if not source:match(":") then
400  		return false
401  	end
402  
403  	local event_name
404  	if source:match(":") then
405  		event_name = "pseudo_element"
406  	else
407  		event_name = "pseudo_class"
408  	end
409  
410  	if functional_pseudo(parser) then
411  		return true
412  	end
413  
414  	local name = source:match_ident()
415  	if name then
416  		on(parser, event_name, name)
417  		return true
418  	else
419  		source:seek(position)
420  		return false
421  	end
422  end
423  
424  local function negation(parser)
425  	local source = parser.source
426  	local position = source.position
427  
428  	if not source:match(":not%(") then
429  		return false
430  	end
431  
432  	on(parser, "start_negation")
433  	source:match_whitespaces()
434  	if type_selector(parser) or
435  			 universal(parser) or
436  			 hash(parser) or
437  			 class(parser) or
438  			 attribute(parser) or
439  			 pseudo(parser) then
440  		source:match_whitespaces()
441  		if source:match("%)") then
442  			on(parser, "end_negation")
443  			return true
444  		else
445  			source:seek(position)
446  			return false
447  		end
448  	else
449  		source:seek(position)
450  		return false
451  	end
452  end
453  
454  local function simple_selector_sequence(parser)
455  	on(parser, "start_simple_selector_sequence")
456  	local n_required = 1
457  	if type_selector(parser) or universal(parser) then
458  		n_required = 0
459  	end
460  	local n_occurred = 0
461  	while hash(parser) or
462  					class(parser) or
463  					attribute(parser) or
464  					negation(parser) or
465  					pseudo(parser) do
466  		n_occurred = n_occurred + 1
467  	end
468  	local success = (n_occurred >= n_required)
469  	if success then
470  		on(parser, "end_simple_selector_sequence")
471  	end
472  	return success
473  end
474  
475  local function combinator(parser)
476  	local source = parser.source
477  	local position = source.position
478  
479  	local whitespaces = source:match_whitespaces()
480  
481  	if source:match("%+") then
482  		source:match_whitespaces()
483  		on(parser, "combinator", "+")
484  		return "+"
485  	elseif source:match(">") then
486  		source:match_whitespaces()
487  		on(parser, "combinator", ">")
488  		return ">"
489  	elseif source:match("~") then
490  		source:match_whitespaces()
491  		on(parser, "combinator", "~")
492  		return "~"
493  	elseif whitespaces then
494  		on(parser, "combinator", " ")
495  		return " "
496  	else
497  		source:seek(position)
498  		return false
499  	end
500  end
501  
502  local function parse_selector(parser)
503  	on(parser, "start_selector")
504  	local source = parser.source
505  	local position = source.position
506  
507  	if not simple_selector_sequence(parser) then
508  		return false
509  	end
510  
511  	while true do
512  		local combinator_current = combinator(parser)
513  		if not combinator_current then
514  			break
515  		end
516  		if not simple_selector_sequence(parser) then
517  			if combinator_current == " " then
518  				break
519  			end
520  			return false
521  		end
522  	end
523  	on(parser, "end_selector")
524  	return true
525  end
526  
527  local function parse_selectors_group(parser)
528  	local source = parser.source
529  	-- local postition = source.position
530  
531  	source:match_whitespaces()
532  	on(parser, "start_selectors_group")
533  	if not parse_selector(parser) then
534  		-- error("Failed to parse CSS selectors group: " ..
535  		-- 	"must have at least one selector: " ..
536  		-- 	"<" .. parser.source:inspect() .. ">")
537  		return false
538  	end
539  	while true do
540  		source:match_whitespaces()
541  		if not source:match(",") then
542  			break
543  		end
544  		source:match_whitespaces()
545  		if not parse_selector(parser) then
546  			-- error("Failed to parse CSS selectors group: " ..
547  			-- 	"must have selector after ',': " ..
548  			-- 	"<" .. parser.source:inspect() .. ">")
549  			return false
550  		end
551  	end
552  	source:match_whitespaces()
553  	if #source.data ~= source.position - 1 then
554  		-- error("Failed to parse CSS selectors group: " ..
555  		-- 	"there is garbage after selectors group: " ..
556  		-- 	"<" .. parser.source:inspect() .. ">")
557  	end
558  	on(parser, "end_selectors_group")
559  	return true
560  end
561  
562  function parse_declaration(parser)
563  	local source = parser.source
564  	local position = source.position
565  
566  	local property_name = source:match_ident()
567  	if not property_name then
568  		source:seek(position)
569  		return false
570  	end
571  
572  	if not source:match(":") then
573  		source:seek(position)
574  		return false
575  	end
576  	source:match_whitespaces()
577  
578  	local property_parser = PropertyParser[property_name]
579  	if property_parser then
580  		local value = property_parser(property_name, parser)
581  		if value then
582  			source:match_whitespaces()
583  			if not source:match(";") then
584  				Spring.Echo("bad ending: did not find ';'")
585  				return false
586  			end
587  
588  			if property_name == 'background-position' then
589  				value:pretty_print()
590  			end
591  
592  			-- unpack certain properties that are shorthands
593  			if property_name == "padding" then
594  				local t,r,b,l = "padding-top","padding-right","padding-bottom","padding-left"
595  				local top,right,bottom,left = unpack(value.value)
596  				on(parser, "declaration", t, { value = top })
597  				on(parser, "declaration", r, { value = right })
598  				on(parser, "declaration", b, { value = bottom })
599  				on(parser, "declaration", l, { value = left })
600  			elseif property_name == "border-width" then
601  				local t,r,b,l = "border-top-width","border-right-width","border-bottom-width","border-left-width"
602  				local top,right,bottom,left = unpack(value.value)
603  				on(parser, "declaration", t, { value = top })
604  				on(parser, "declaration", r, { value = right })
605  				on(parser, "declaration", b, { value = bottom })
606  				on(parser, "declaration", l, { value = left })
607  			elseif property_name == "margin" then
608  				local t,r,b,l = "margin-top","margin-right","margin-bottom","margin-left"
609  				local top,right,bottom,left = unpack(value.value)
610  				on(parser, "declaration", t, { value = top })
611  				on(parser, "declaration", r, { value = right })
612  				on(parser, "declaration", b, { value = bottom })
613  				on(parser, "declaration", l, { value = left })
614  			else
615  				on(parser, "declaration", property_name, value)
616  			end
617  			return true
618  		end
619  	end
620  
621  	local start_value = source.position
622  	while not source:match(";") do
623  		source:seek(source.position+1)
624  	end
625  
626  	if source.position-2 <= start_value then
627  		return false
628  	end
629  	local value = source.data:sub(start_value, source.position-2)
630  
631  	source:match_whitespaces()
632  	on(parser, "declaration", property_name, value)
633  	return true
634  end
635  
636  function parse_declarations(parser)
637  	local source = parser.source
638  	local position = source.position
639  	if not source:match("{") then
640  		return false
641  	end
642  	source:match_whitespaces()
643  	while parse_declaration(parser) do
644  		source:match_whitespaces()
645  	end
646  	source:match_whitespaces()
647  	if not source:match("}") then
648  		return false
649  	end
650  	return true
651  end
652  
653  function parse_ruleset(parser)
654  	on(parser, "start_ruleset")
655  	if not parse_selectors_group(parser) then
656  		return false
657  	end
658  	if not parse_declarations(parser) then
659  		return false
660  	end
661  	on(parser, "end_ruleset")
662  	return true
663  end
664  
665  function methods:parse()
666  	on(self, "start_css")
667  	while parse_ruleset(self) do
668  		self.source:match_whitespaces()
669  	end
670  	on(self, "end_css")
671  end
672  
673  function Parser.new(input, listener)
674  	local parser = {
675  		source = Source.new(input),
676  		listener = listener,
677  	}
678  	setmetatable(parser, metatable)
679  	return parser
680  end
681  
682  return Parser