source.lua
  1  -- MIT License
  2  -- 
  3  -- Copyright (c) 2018 Kouhei Sutou <kou@clear-code.com>
  4  -- 
  5  -- Permission is hereby granted, free of charge, to any person obtaining a copy
  6  -- of this software and associated documentation files (the "Software"), to deal
  7  -- in the Software without restriction, including without limitation the rights
  8  -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9  -- copies of the Software, and to permit persons to whom the Software is
 10  -- furnished to do so, subject to the following conditions:
 11  -- 
 12  -- The above copyright notice and this permission notice shall be included in all
 13  -- copies or substantial portions of the Software.
 14  -- 
 15  -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16  -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17  -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18  -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19  -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20  -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21  -- SOFTWARE.
 22  
 23  -- CHANGE LOG
 24  -- (16 May 2024, chmod777)
 25  --  * Replace spaces with tabs
 26  --  * Remove unicode support
 27  -- (13 May 2024, chmod777)
 28  --  * Add method match_one_of_idents
 29  --  * Add method match_ident_from_table
 30  
 31  -- local utf8 = require("lua-utf8")
 32  
 33  local Source = {}
 34  
 35  local methods = {}
 36  
 37  local metatable = {}
 38  function metatable.__index(parser, key)
 39  	return methods[key]
 40  end
 41  
 42  function methods:inspect()
 43  	return
 44  		self.data:sub(1, self.position - 1) .. "|@|" ..
 45  		self.data:sub(self.position)
 46  end
 47  
 48  function methods:peek()
 49  	return self.data[self.position]
 50  end
 51  
 52  function methods:seek(position)
 53  	self.position = position
 54  end
 55  
 56  function methods:match(pattern)
 57  	local start, last = self.data:find("^" .. pattern, self.position)
 58  	if start then
 59  		self:seek(last + 1)
 60  		return self.data:sub(start, last)
 61  	else
 62  		return nil
 63  	end
 64  end
 65  
 66  function methods:match_whitespaces()
 67  	local pattern = "[ \t\r\n\f]+"
 68  	local whitespaces = self:match(pattern)
 69  	while true do
 70  		local comment = self:match_comment()
 71  		if not comment then
 72  			break
 73  		end
 74  		local sub_whitespaces = self:match(pattern)
 75  		if sub_whitespaces then
 76  			if whitespaces then
 77  				whitespaces = whitespaces .. sub_whitespaces
 78  			else
 79  				whitespaces = sub_whitespaces
 80  			end
 81  		end
 82  	end
 83  	return whitespaces
 84  end
 85  
 86  -- 
 87  function methods:match_one_of_idents(idents_list)
 88  	local position = self.position
 89  	local ident = self:match_ident()
 90  	for i = 1, #idents_list do
 91  		if idents_list[i] == ident then
 92  			return ident
 93  		end
 94  	end
 95  	self:seek(position)
 96  	return nil
 97  end
 98  
 99  function methods:match_ident_from_table(t)
100  	local position = self.position
101  
102  	local ident = self:match_ident()
103  	local value = t[ident]
104  	if value then
105  		return ident,value
106  	end
107  	self:seek(position)
108  	return nil
109  end
110  
111  function methods:match_comment_c_style()
112  	local comment = self:match("/%*.-%*/")
113  	if comment then
114  		return comment:sub(3, -3)
115  	else
116  		return nil
117  	end
118  end
119  
120  function methods:match_comment_sgml_style()
121  	local comment = self:match("<!%-%-.-%-%->")
122  	if comment then
123  		return comment:sub(5, -4)
124  	else
125  		return nil
126  	end
127  end
128  
129  function methods:match_comment()
130  	local content = self:match_comment_c_style()
131  	if content then
132  		return content
133  	end
134  
135  	content = self:match_comment_sgml_style()
136  	if content then
137  		return content
138  	end
139  
140  	return nil
141  end
142  
143  function methods:match_hyphen()
144  	return self:match("-")
145  end
146  
147  -- function methods:match_non_ascii()
148  -- 	local data = self.data:sub(self.position)
149  -- 	if #data == 0 then
150  -- 		return nil
151  -- 	end
152  
153  -- 	local code_point = utf8.codepoint(data)
154  -- 	if code_point < 0x80 then
155  -- 		return nil
156  -- 	end
157  
158  -- 	local next_offset, next_code_point = utf8.offset(data, 1)
159  -- 	if next_offset then
160  -- 		self:seek(self.position + next_offset - 1)
161  -- 	else
162  -- 		self:seek(#self.data + 1)
163  -- 	end
164  -- 	return utf8.char(code_point)
165  -- end
166  
167  function methods:match_escape()
168  	local position = self.position
169  
170  	-- local unicode_escape = self:match("\\[0-9a-zA-Z]+")
171  	-- if unicode_escape then
172  	-- 	if #unicode_escape > 7 then
173  	-- 		self:seek(self.position - (#unicode_escape - 7))
174  	-- 		unicode_escape = unicode_escape:sub(1, 7)
175  	-- 	end
176  	-- 	local code_point = tonumber("0x" .. unicode_escape:sub(2))
177  	-- 	if not self:match("\r\n") then
178  	-- 		self:match("[ \n\r\t\f]")
179  	-- 	end
180  	-- 	return utf8.char(code_point)
181  	-- end
182  
183  	-- self:seek(position)
184  	local escape = self:match("\\[^\n\r\f0-9a-zA-Z]")
185  	if escape then
186  		return escape:sub(2)
187  	end
188  
189  	return nil
190  end
191  
192  function methods:match_name_character(is_start)
193  	local in_ascii
194  	if is_start then
195  		in_ascii = self:match("[_a-zA-Z]")
196  	else
197  		in_ascii = self:match("[_a-zA-Z0-9-]")
198  	end
199  	if in_ascii then
200  		return in_ascii
201  	end
202  
203  	-- local non_ascii = self:match_non_ascii()
204  	-- if non_ascii then
205  	-- 	return non_ascii
206  	-- end
207  
208  	local escaped = self:match_escape()
209  	if escaped then
210  		return escaped
211  	end
212  
213  	return nil
214  end
215  
216  function methods:match_ident()
217  	local position = self.position
218  	local ident = ""
219  
220  	local hyphen = self:match_hyphen()
221  	if hyphen then
222  		ident = ident .. hyhpen
223  	end
224  
225  	local name_start = self:match_name_character(true)
226  	if not name_start then
227  		self:seek(position)
228  		return nil
229  	end
230  	ident = ident .. name_start
231  
232  	while true do
233  		local name_character = self:match_name_character(false)
234  		if not name_character then
235  			break
236  		end
237  		ident = ident .. name_character
238  	end
239  
240  	return ident
241  end
242  
243  function methods:match_string_character()
244  	if self:match("\\\r\n") then
245  		return ""
246  	end
247  
248  	if self:match("\\[\n\r\f]") then
249  		return ""
250  	end
251  
252  	-- local non_ascii = self:match_non_ascii()
253  	-- if non_ascii then
254  	-- 	return non_ascii
255  	-- end
256  
257  	local escaped = self:match_escape()
258  	if escaped then
259  		return escaped
260  	end
261  
262  	local normal_character = self:match("[^\n\r\f\\]")
263  	if normal_character then
264  		return normal_character
265  	end
266  
267  	return nil
268  end
269  
270  function methods:match_string()
271  	local position = self.position
272  
273  	local delimiter = self:match("[\"']")
274  	if not delimiter then
275  		return nil
276  	end
277  
278  	local content = ""
279  	while true do
280  		if self:match(delimiter) then
281  			return content
282  		end
283  
284  		local character = self:match_string_character()
285  		if character then
286  			content = content .. character
287  		else
288  			self:seek(position)
289  			return nil
290  		end
291  	end
292  end
293  
294  function methods:match_number()
295  	local position = self.position
296  
297  	local number = self:match("%d+")
298  	if not number then
299  		number = self:match("%d?%.%d+")
300  	end
301  
302  	if number then
303  		return tonumber(number)
304  	else
305  		self:seek(position)
306  		return nil
307  	end
308  end
309  
310  function methods:match_dimension()
311  	local position = self.position
312  
313  	local number = self:match_number()
314  	if not number then
315  		return nil
316  	end
317  
318  	local ident = self:match_ident()
319  	if not ident then
320  		self:seek(position)
321  		return nil
322  	end
323  
324  	return self.data:sub(position, self.position - 1)
325  end
326  
327  function methods:match_namespace_prefix()
328  	local position = self.position
329  
330  	if self:match("|") then
331  		return ""
332  	end
333  
334  	local prefix = self:match("-?[_%a][_%a%d-]*|")
335  	if not prefix then
336  		prefix = self:match("%*|")
337  	end
338  
339  	if prefix then
340  		return prefix:sub(0, -2)
341  	else
342  		return nil
343  	end
344  end
345  
346  function methods:match_hash()
347  	local matched = self:match("#[_%a%d-]+")
348  	if matched then
349  		return matched:sub(2)
350  	else
351  		return matched
352  	end
353  end
354  
355  function Source.new(data)
356  	local source = {
357  		data = data,
358  		position = 1,
359  	}
360  	setmetatable(source, metatable)
361  	return source
362  end
363  
364  return Source