source.lua
1 -- MIT License 2 -- 3 -- Copyright (c) 2018 Kouhei Sutou <kou@clear-code.com> 4 -- 5 -- Permission is hereby granted, free of charge, to any person obtaining a copy 6 -- of this software and associated documentation files (the "Software"), to deal 7 -- in the Software without restriction, including without limitation the rights 8 -- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 -- copies of the Software, and to permit persons to whom the Software is 10 -- furnished to do so, subject to the following conditions: 11 -- 12 -- The above copyright notice and this permission notice shall be included in all 13 -- copies or substantial portions of the Software. 14 -- 15 -- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 -- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 -- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 -- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 -- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 -- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 -- SOFTWARE. 22 23 -- CHANGE LOG 24 -- (16 May 2024, chmod777) 25 -- * Replace spaces with tabs 26 -- * Remove unicode support 27 -- (13 May 2024, chmod777) 28 -- * Add method match_one_of_idents 29 -- * Add method match_ident_from_table 30 31 -- local utf8 = require("lua-utf8") 32 33 local Source = {} 34 35 local methods = {} 36 37 local metatable = {} 38 function metatable.__index(parser, key) 39 return methods[key] 40 end 41 42 function methods:inspect() 43 return 44 self.data:sub(1, self.position - 1) .. "|@|" .. 45 self.data:sub(self.position) 46 end 47 48 function methods:peek() 49 return self.data[self.position] 50 end 51 52 function methods:seek(position) 53 self.position = position 54 end 55 56 function methods:match(pattern) 57 local start, last = self.data:find("^" .. pattern, self.position) 58 if start then 59 self:seek(last + 1) 60 return self.data:sub(start, last) 61 else 62 return nil 63 end 64 end 65 66 function methods:match_whitespaces() 67 local pattern = "[ \t\r\n\f]+" 68 local whitespaces = self:match(pattern) 69 while true do 70 local comment = self:match_comment() 71 if not comment then 72 break 73 end 74 local sub_whitespaces = self:match(pattern) 75 if sub_whitespaces then 76 if whitespaces then 77 whitespaces = whitespaces .. sub_whitespaces 78 else 79 whitespaces = sub_whitespaces 80 end 81 end 82 end 83 return whitespaces 84 end 85 86 -- 87 function methods:match_one_of_idents(idents_list) 88 local position = self.position 89 local ident = self:match_ident() 90 for i = 1, #idents_list do 91 if idents_list[i] == ident then 92 return ident 93 end 94 end 95 self:seek(position) 96 return nil 97 end 98 99 function methods:match_ident_from_table(t) 100 local position = self.position 101 102 local ident = self:match_ident() 103 local value = t[ident] 104 if value then 105 return ident,value 106 end 107 self:seek(position) 108 return nil 109 end 110 111 function methods:match_comment_c_style() 112 local comment = self:match("/%*.-%*/") 113 if comment then 114 return comment:sub(3, -3) 115 else 116 return nil 117 end 118 end 119 120 function methods:match_comment_sgml_style() 121 local comment = self:match("<!%-%-.-%-%->") 122 if comment then 123 return comment:sub(5, -4) 124 else 125 return nil 126 end 127 end 128 129 function methods:match_comment() 130 local content = self:match_comment_c_style() 131 if content then 132 return content 133 end 134 135 content = self:match_comment_sgml_style() 136 if content then 137 return content 138 end 139 140 return nil 141 end 142 143 function methods:match_hyphen() 144 return self:match("-") 145 end 146 147 -- function methods:match_non_ascii() 148 -- local data = self.data:sub(self.position) 149 -- if #data == 0 then 150 -- return nil 151 -- end 152 153 -- local code_point = utf8.codepoint(data) 154 -- if code_point < 0x80 then 155 -- return nil 156 -- end 157 158 -- local next_offset, next_code_point = utf8.offset(data, 1) 159 -- if next_offset then 160 -- self:seek(self.position + next_offset - 1) 161 -- else 162 -- self:seek(#self.data + 1) 163 -- end 164 -- return utf8.char(code_point) 165 -- end 166 167 function methods:match_escape() 168 local position = self.position 169 170 -- local unicode_escape = self:match("\\[0-9a-zA-Z]+") 171 -- if unicode_escape then 172 -- if #unicode_escape > 7 then 173 -- self:seek(self.position - (#unicode_escape - 7)) 174 -- unicode_escape = unicode_escape:sub(1, 7) 175 -- end 176 -- local code_point = tonumber("0x" .. unicode_escape:sub(2)) 177 -- if not self:match("\r\n") then 178 -- self:match("[ \n\r\t\f]") 179 -- end 180 -- return utf8.char(code_point) 181 -- end 182 183 -- self:seek(position) 184 local escape = self:match("\\[^\n\r\f0-9a-zA-Z]") 185 if escape then 186 return escape:sub(2) 187 end 188 189 return nil 190 end 191 192 function methods:match_name_character(is_start) 193 local in_ascii 194 if is_start then 195 in_ascii = self:match("[_a-zA-Z]") 196 else 197 in_ascii = self:match("[_a-zA-Z0-9-]") 198 end 199 if in_ascii then 200 return in_ascii 201 end 202 203 -- local non_ascii = self:match_non_ascii() 204 -- if non_ascii then 205 -- return non_ascii 206 -- end 207 208 local escaped = self:match_escape() 209 if escaped then 210 return escaped 211 end 212 213 return nil 214 end 215 216 function methods:match_ident() 217 local position = self.position 218 local ident = "" 219 220 local hyphen = self:match_hyphen() 221 if hyphen then 222 ident = ident .. hyhpen 223 end 224 225 local name_start = self:match_name_character(true) 226 if not name_start then 227 self:seek(position) 228 return nil 229 end 230 ident = ident .. name_start 231 232 while true do 233 local name_character = self:match_name_character(false) 234 if not name_character then 235 break 236 end 237 ident = ident .. name_character 238 end 239 240 return ident 241 end 242 243 function methods:match_string_character() 244 if self:match("\\\r\n") then 245 return "" 246 end 247 248 if self:match("\\[\n\r\f]") then 249 return "" 250 end 251 252 -- local non_ascii = self:match_non_ascii() 253 -- if non_ascii then 254 -- return non_ascii 255 -- end 256 257 local escaped = self:match_escape() 258 if escaped then 259 return escaped 260 end 261 262 local normal_character = self:match("[^\n\r\f\\]") 263 if normal_character then 264 return normal_character 265 end 266 267 return nil 268 end 269 270 function methods:match_string() 271 local position = self.position 272 273 local delimiter = self:match("[\"']") 274 if not delimiter then 275 return nil 276 end 277 278 local content = "" 279 while true do 280 if self:match(delimiter) then 281 return content 282 end 283 284 local character = self:match_string_character() 285 if character then 286 content = content .. character 287 else 288 self:seek(position) 289 return nil 290 end 291 end 292 end 293 294 function methods:match_number() 295 local position = self.position 296 297 local number = self:match("%d+") 298 if not number then 299 number = self:match("%d?%.%d+") 300 end 301 302 if number then 303 return tonumber(number) 304 else 305 self:seek(position) 306 return nil 307 end 308 end 309 310 function methods:match_dimension() 311 local position = self.position 312 313 local number = self:match_number() 314 if not number then 315 return nil 316 end 317 318 local ident = self:match_ident() 319 if not ident then 320 self:seek(position) 321 return nil 322 end 323 324 return self.data:sub(position, self.position - 1) 325 end 326 327 function methods:match_namespace_prefix() 328 local position = self.position 329 330 if self:match("|") then 331 return "" 332 end 333 334 local prefix = self:match("-?[_%a][_%a%d-]*|") 335 if not prefix then 336 prefix = self:match("%*|") 337 end 338 339 if prefix then 340 return prefix:sub(0, -2) 341 else 342 return nil 343 end 344 end 345 346 function methods:match_hash() 347 local matched = self:match("#[_%a%d-]+") 348 if matched then 349 return matched:sub(2) 350 else 351 return matched 352 end 353 end 354 355 function Source.new(data) 356 local source = { 357 data = data, 358 position = 1, 359 } 360 setmetatable(source, metatable) 361 return source 362 end 363 364 return Source