/ chmod777_includes / html_css / html_lexer.lua
html_lexer.lua
  1  -- File: html_lexer.lua
  2  
  3  --[[
  4  Copyright (C) 2024 chmod777
  5  
  6  This program is free software: you can redistribute it and/or modify it under
  7  the terms of the GNU Affero General Public License version 3 as published by the
  8  Free Software Foundation.
  9  
 10  This program is distributed in the hope that it will be useful, but WITHOUT ANY
 11  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 12  PARTICULAR PURPOSE. See the GNU Affero General Public License for more details.
 13  
 14  You should have received a copy of the GNU Affero General Public License along
 15  with this program. If not, see <https://www.gnu.org/licenses/>. 
 16  ]]
 17  
 18  LexemeType = {
 19  	OPEN = 1,         -- "<"
 20  	CLOSE = 2,        -- ">"
 21  	END_OPEN = 3,     -- "</"
 22  	SELF_CLOSING = 4, -- "/>"
 23  	EQ = 5,           -- "="
 24  	SINGLE_QUOTE = 6, --  '
 25  	DOUBLE_QUOTE = 7, --  "
 26  	WHITESPACE = 8,   -- \t\n\b
 27  	IDENTIFIER = 9,   -- tag
 28  }
 29  
 30  local Lexer = {}
 31  function Lexer:new(source)
 32  	local this = {}
 33  	this.source = source
 34  	this.index = 1
 35  	
 36  	function is_whitespace(c)
 37  		return c == ' ' or c == '\n' or c == '\t' or c == '\r' or c == '\b'
 38  	end
 39  	function is_special(c)
 40  		return c=='<' or c=='>' or c=='/' or c=='=' or c=='"' or c=="'"
 41  	end
 42  	function is_digit(c)
 43  		return c=='0' or c=='1' or c=='2' or c=='3' or c=='4' or c=='5' or c=='6' or c=='7' or c=='8' or c=='9'
 44  	end
 45  
 46  	function this:next()
 47  		local current = this:current_char()
 48  		if current == nil then
 49  			return nil
 50  		end
 51  
 52  		local start = this.index
 53  
 54  		if current == '<' then
 55  			this:advance()
 56  			local next = this:current_char()
 57  			if next == '/' then
 58  				local source = this.source:sub(start, this.index)
 59  				this:advance()
 60  				return LexemeType.END_OPEN, source
 61  			else
 62  				return LexemeType.OPEN, current
 63  			end
 64  		elseif current == '/' and this:next_char() ~= nil and this:next_char() == '>' then
 65  			this:advance()
 66  			this:advance()
 67  			return LexemeType.SELF_CLOSING, this.source:sub(start, this.index)
 68  		elseif current == '>' then
 69  			this:advance()
 70  			return LexemeType.CLOSE, current
 71  		elseif current == '=' then
 72  			this:advance()
 73  			return LexemeType.EQ, current
 74  		elseif current == "'" then
 75  			this:advance()
 76  			return LexemeType.SINGLE_QUOTE, current
 77  		elseif current == '"' then
 78  			this:advance()
 79  			return LexemeType.DOUBLE_QUOTE, current
 80  		elseif is_whitespace(current) then
 81  			while this:next_char() ~= nil and is_whitespace(this:next_char()) do
 82  				this:advance()
 83  			end
 84  			local source = this.source:sub(start, this.index)
 85  			this:advance()
 86  			return LexemeType.WHITESPACE, source
 87  		else
 88  			while this:next_char() ~= nil and not is_special(this:next_char()) and not is_whitespace(this:next_char()) do
 89  				this:advance()
 90  			end
 91  			local source = this.source:sub(start, this.index)
 92  			this:advance()
 93  			return LexemeType.IDENTIFIER, source
 94  		end
 95  	end
 96  	function this:advance()
 97  		this.index = this.index+1
 98  	end
 99  	function this:current_char()
100  		if this.index > #this.source then
101  			return nil
102  		end
103  		return this.source:sub(this.index, this.index)
104  	end
105  	function this:next_char()
106  		return this.source:sub(this.index+1, this.index+1)
107  	end
108  
109  	return this
110  end
111  
112  module = {
113  	LexemeType = LexemeType,
114  	Lexer = Lexer,
115  }
116  
117  return module