/ src / toml.lua
toml.lua
  1  local TOML = {
  2  	-- denotes the current supported TOML version
  3  	version = 0.40,
  4  
  5  	-- sets whether the parser should follow the TOML spec strictly
  6  	-- currently, no errors are thrown for the following rules if strictness is turned off:
  7  	--   tables having mixed keys
  8  	--   redefining a table
  9  	--   redefining a key within a table
 10  	strict = true,
 11  }
 12  
 13  -- converts TOML data into a lua table
 14  TOML.parse = function(toml, options)
 15  	options = options or {}
 16  	local strict = (options.strict ~= nil and options.strict or TOML.strict)
 17  
 18  	-- the official TOML definition of whitespace
 19  	local ws = "[\009\032]"
 20  
 21  	-- the official TOML definition of newline
 22  	local nl = "[\10"
 23  	do
 24  		local crlf = "\13\10"
 25  		nl = nl .. crlf
 26  	end
 27  	nl = nl .. "]"
 28  
 29  	-- stores text data
 30  	---@type number | string | nil
 31  	local buffer = ""
 32  
 33  	-- the current location within the string to parse
 34  	local cursor = 1
 35  
 36  	-- the output table
 37  	local out = {}
 38  
 39  	-- the current table to write to
 40  	local obj = out
 41  
 42  	-- returns the next n characters from the current position
 43  	local function char(n)
 44  		n = n or 0
 45  		return toml:sub(cursor + n, cursor + n)
 46  	end
 47  
 48  	-- moves the current position forward n (default: 1) characters
 49  	local function step(n)
 50  		n = n or 1
 51  		cursor = cursor + n
 52  	end
 53  
 54  	-- move forward until the next non-whitespace character
 55  	local function skipWhitespace()
 56  		while char():match(ws) do
 57  			step()
 58  		end
 59  	end
 60  
 61  	-- remove the (Lua) whitespace at the beginning and end of a string
 62  	local function trim(str)
 63  		return str:gsub("^%s*(.-)%s*$", "%1")
 64  	end
 65  
 66  	-- divide a string into a table around a delimiter
 67  	local function split(str, delim)
 68  		if str == "" then
 69  			return {}
 70  		end
 71  		local result = {}
 72  		local append = delim
 73  		if delim:match("%%") then
 74  			append = delim:gsub("%%", "")
 75  		end
 76  		for match in (str .. append):gmatch("(.-)" .. delim) do
 77  			table.insert(result, match)
 78  		end
 79  		return result
 80  	end
 81  
 82  	-- produce a parsing error message
 83  	-- the error contains the line number of the current position
 84  	local function err(message, strictOnly)
 85  		if not strictOnly or (strictOnly and strict) then
 86  			local line = 1
 87  			local c = 0
 88  			for l in toml:gmatch("(.-)" .. nl) do
 89  				c = c + l:len()
 90  				if c >= cursor then
 91  					break
 92  				end
 93  				line = line + 1
 94  			end
 95  			error("TOML: " .. message .. " on line " .. line .. ".", 4)
 96  		end
 97  	end
 98  
 99  	-- prevent infinite loops by checking whether the cursor is
100  	-- at the end of the document or not
101  	local function bounds()
102  		return cursor <= toml:len()
103  	end
104  
105  	local function parseString()
106  		local quoteType = char() -- should be single or double quote
107  
108  		-- this is a multiline string if the next 2 characters match
109  		local multiline = (char(1) == char(2) and char(1) == char())
110  
111  		-- buffer to hold the string
112  		local str = ""
113  
114  		-- skip the quotes
115  		step(multiline and 3 or 1)
116  
117  		while bounds() do
118  			if multiline and char():match(nl) and str == "" then
119  				-- skip line break line at the beginning of multiline string
120  				step()
121  			end
122  
123  			-- keep going until we encounter the quote character again
124  			if char() == quoteType then
125  				if multiline then
126  					if char(1) == char(2) and char(1) == quoteType then
127  						step(3)
128  						break
129  					end
130  				else
131  					step()
132  					break
133  				end
134  			end
135  
136  			if char():match(nl) and not multiline then
137  				err("Single-line string cannot contain line break")
138  			end
139  
140  			-- if we're in a double-quoted string, watch for escape characters!
141  			if quoteType == '"' and char() == "\\" then
142  				if multiline and char(1):match(nl) then
143  					-- skip until first non-whitespace character
144  					step(1) -- go past the line break
145  					while bounds() do
146  						if not char():match(ws) and not char():match(nl) then
147  							break
148  						end
149  						step()
150  					end
151  				else
152  					-- all available escape characters
153  					local escape = {
154  						b = "\b",
155  						t = "\t",
156  						n = "\n",
157  						f = "\f",
158  						r = "\r",
159  						['"'] = '"',
160  						["\\"] = "\\",
161  					}
162  					-- utf function from http://stackoverflow.com/a/26071044
163  					-- converts \uXXX into actual unicode
164  					local function utf(char)
165  						local bytemarkers = { { 0x7ff, 192 }, { 0xffff, 224 }, { 0x1fffff, 240 } }
166  						if char < 128 then
167  							return string.char(char)
168  						end
169  						local charbytes = {}
170  						for bytes, vals in pairs(bytemarkers) do
171  							if char <= vals[1] then
172  								for b = bytes + 1, 2, -1 do
173  									local mod = char % 64
174  									char = (char - mod) / 64
175  									charbytes[b] = string.char(128 + mod)
176  								end
177  								charbytes[1] = string.char(vals[2] + char)
178  								break
179  							end
180  						end
181  						return table.concat(charbytes)
182  					end
183  
184  					if escape[char(1)] then
185  						-- normal escape
186  						str = str .. escape[char(1)]
187  						step(2) -- go past backslash and the character
188  					elseif char(1) == "u" then
189  						-- utf-16
190  						step()
191  						local uni = char(1) .. char(2) .. char(3) .. char(4)
192  						step(5)
193  						uni = tonumber(uni, 16)
194  						if (uni >= 0 and uni <= 0xd7ff) and not (uni >= 0xe000 and uni <= 0x10ffff) then
195  							str = str .. utf(uni)
196  						else
197  							err("Unicode escape is not a Unicode scalar")
198  						end
199  					elseif char(1) == "U" then
200  						-- utf-32
201  						step()
202  						local uni = char(1) .. char(2) .. char(3) .. char(4) .. char(5) .. char(6) .. char(7) .. char(8)
203  						step(9)
204  						uni = tonumber(uni, 16)
205  						if (uni >= 0 and uni <= 0xd7ff) and not (uni >= 0xe000 and uni <= 0x10ffff) then
206  							str = str .. utf(uni)
207  						else
208  							err("Unicode escape is not a Unicode scalar")
209  						end
210  					else
211  						err("Invalid escape")
212  					end
213  				end
214  			else
215  				-- if we're not in a double-quoted string, just append it to our buffer raw and keep going
216  				str = str .. char()
217  				step()
218  			end
219  		end
220  
221  		return { value = str, type = "string" }
222  	end
223  
224  	local function parseNumber()
225  		---@type string | number | nil
226  		local num = ""
227  		local exp
228  		local date = false
229  		while bounds() do
230  			if char():match("[%+%-%.eE_0-9]") then
231  				if not exp then
232  					if char():lower() == "e" then
233  						-- as soon as we reach e or E, start appending to exponent buffer instead of
234  						-- number buffer
235  						exp = ""
236  					elseif char() ~= "_" then
237  						num = num .. char()
238  					end
239  				elseif char():match("[%+%-0-9]") then
240  					exp = exp .. char()
241  				else
242  					err("Invalid exponent")
243  				end
244  			elseif
245  				char():match(ws)
246  				or char() == "#"
247  				or char():match(nl)
248  				or char() == ","
249  				or char() == "]"
250  				or char() == "}"
251  			then
252  				break
253  			elseif char() == "T" or char() == "Z" then
254  				-- parse the date (as a string, since lua has no date object)
255  				date = true
256  				while bounds() do
257  					if char() == "," or char() == "]" or char() == "#" or char():match(nl) or char():match(ws) then
258  						break
259  					end
260  					num = num .. char()
261  					step()
262  				end
263  			else
264  				err("Invalid number")
265  			end
266  			step()
267  		end
268  
269  		if date then
270  			return { value = num, type = "date" }
271  		end
272  
273  		local float = false
274  		if num == nil then
275  			err("Invalid number")
276  		elseif num:match("%.") then
277  			float = true
278  		end
279  
280  		exp = exp and tonumber(exp) or 0
281  		num = tonumber(num)
282  
283  		if not float then
284  			return {
285  				-- lua will automatically convert the result
286  				-- of a power operation to a float, so we have
287  				-- to convert it back to an int with math.floor
288  				value = math.floor(num * 10 ^ exp),
289  				type = "int",
290  			}
291  		end
292  
293  		return { value = num * 10 ^ exp, type = "float" }
294  	end
295  
296  	local parseArray, getValue
297  
298  	function parseArray()
299  		step() -- skip [
300  		skipWhitespace()
301  
302  		local arrayType
303  		local array = {}
304  
305  		while bounds() do
306  			if char() == "]" then
307  				break
308  			elseif char():match(nl) then
309  				-- skip
310  				step()
311  				skipWhitespace()
312  			elseif char() == "#" then
313  				while bounds() and not char():match(nl) do
314  					step()
315  				end
316  			else
317  				-- get the next object in the array
318  				local v = getValue()
319  				if not v then
320  					break
321  				end
322  
323  				-- set the type if it hasn't been set before
324  				if arrayType == nil then
325  					arrayType = v.type
326  				elseif arrayType ~= v.type then
327  					err("Mixed types in array", true)
328  				end
329  
330  				array = array or {}
331  				table.insert(array, v.value)
332  
333  				if char() == "," then
334  					step()
335  				end
336  				skipWhitespace()
337  			end
338  		end
339  		step()
340  
341  		return { value = array, type = "array" }
342  	end
343  
344  	local function parseInlineTable()
345  		step() -- skip opening brace
346  
347  		---@type number | string
348  		local buffer = ""
349  		local quoted = false
350  		local tbl = {}
351  
352  		while bounds() do
353  			if char() == "}" then
354  				break
355  			elseif char() == "'" or char() == '"' then
356  				buffer = parseString().value
357  				quoted = true
358  			elseif char() == "=" then
359  				if not quoted then
360  					buffer = trim(buffer)
361  				end
362  
363  				step() -- skip =
364  				skipWhitespace()
365  
366  				if char():match(nl) then
367  					err("Newline in inline table")
368  				end
369  
370  				local v = getValue().value
371  				tbl[buffer] = v
372  
373  				skipWhitespace()
374  
375  				if char() == "," then
376  					step()
377  				elseif char():match(nl) then
378  					err("Newline in inline table")
379  				end
380  
381  				quoted = false
382  				buffer = ""
383  			else
384  				buffer = buffer .. char()
385  				step()
386  			end
387  		end
388  		step() -- skip closing brace
389  
390  		return { value = tbl, type = "array" }
391  	end
392  
393  	local function parseBoolean()
394  		local v
395  		if toml:sub(cursor, cursor + 3) == "true" then
396  			step(4)
397  			v = { value = true, type = "boolean" }
398  		elseif toml:sub(cursor, cursor + 4) == "false" then
399  			step(5)
400  			v = { value = false, type = "boolean" }
401  		else
402  			err("Invalid primitive")
403  		end
404  
405  		skipWhitespace()
406  		if char() == "#" then
407  			while not char():match(nl) do
408  				step()
409  			end
410  		end
411  
412  		return v
413  	end
414  
415  	-- figure out the type and get the next value in the document
416  	function getValue()
417  		if char() == '"' or char() == "'" then
418  			return parseString()
419  		elseif char():match("[%+%-0-9]") then
420  			return parseNumber()
421  		elseif char() == "[" then
422  			return parseArray()
423  		elseif char() == "{" then
424  			return parseInlineTable()
425  		else
426  			return parseBoolean()
427  		end
428  		-- date regex (for possible future support):
429  		-- %d%d%d%d%-[0-1][0-9]%-[0-3][0-9]T[0-2][0-9]%:[0-6][0-9]%:[0-6][0-9][Z%:%+%-%.0-9]*
430  	end
431  
432  	-- track whether the current key was quoted or not
433  	local quotedKey = false
434  
435  	-- parse the document!
436  	while cursor <= toml:len() do
437  		-- skip comments and whitespace
438  		if char() == "#" then
439  			while not char():match(nl) do
440  				step()
441  			end
442  		end
443  
444  		if char():match(nl) then
445  			-- skip
446  		end
447  
448  		if char() == "=" then
449  			step()
450  			skipWhitespace()
451  
452  			-- trim key name
453  			buffer = trim(buffer)
454  
455  			if buffer:match("^[0-9]*$") and not quotedKey then
456  				buffer = tonumber(buffer)
457  			end
458  
459  			if buffer == "" and not quotedKey then
460  				err("Empty key name")
461  			end
462  
463  			local v = getValue()
464  			if v then
465  				-- if the key already exists in the current object, throw an error
466  				if obj[buffer] then
467  					err('Cannot redefine key "' .. buffer .. '"', true)
468  				end
469  				-- Ensure v is not nil before accessing v.value (already handled by outer 'if v then')
470  				if buffer then
471  					obj[buffer] = v.value
472  				end
473  			end
474  
475  			-- clear the buffer
476  			buffer = ""
477  			quotedKey = false
478  
479  			-- skip whitespace and comments
480  			skipWhitespace()
481  			if char() == "#" then
482  				while bounds() and not char():match(nl) do
483  					step()
484  				end
485  			end
486  
487  			-- if there is anything left on this line after parsing a key and its value,
488  			-- throw an error
489  			if not char():match(nl) and cursor < toml:len() then
490  				err("Invalid primitive")
491  			end
492  		elseif char() == "[" then
493  			buffer = ""
494  			step()
495  			local tableArray = false
496  
497  			-- if there are two brackets in a row, it's a table array!
498  			if char() == "[" then
499  				tableArray = true
500  				step()
501  			end
502  
503  			obj = out
504  
505  			local function processKey(isLast)
506  				isLast = isLast or false
507  				buffer = trim(buffer)
508  
509  				if not quotedKey and buffer == "" then
510  					err("Empty table name")
511  				end
512  
513  				if isLast and obj[buffer] and not tableArray and #obj[buffer] > 0 then
514  					err("Cannot redefine table", true)
515  				end
516  
517  				-- set obj to the appropriate table so we can start
518  				-- filling it with values!
519  				if tableArray then
520  					-- push onto cache
521  					if obj[buffer] then
522  						obj = obj[buffer]
523  						if isLast then
524  							table.insert(obj, {})
525  						end
526  						obj = obj[#obj]
527  					else
528  						obj[buffer] = {}
529  						obj = obj[buffer]
530  						if isLast then
531  							table.insert(obj, {})
532  							obj = obj[1]
533  						end
534  					end
535  				else
536  					obj[buffer] = obj[buffer] or {}
537  					obj = obj[buffer]
538  				end
539  			end
540  
541  			while bounds() do
542  				if char() == "]" then
543  					if tableArray then
544  						if char(1) ~= "]" then
545  							err("Mismatching brackets")
546  						else
547  							step() -- skip inside bracket
548  						end
549  					end
550  					step() -- skip outside bracket
551  
552  					processKey(true)
553  					buffer = ""
554  					break
555  				elseif char() == '"' or char() == "'" then
556  					buffer = parseString().value
557  					quotedKey = true
558  				elseif char() == "." then
559  					step() -- skip period
560  					processKey()
561  					buffer = ""
562  				else
563  					buffer = buffer .. char()
564  					step()
565  				end
566  			end
567  
568  			buffer = ""
569  			quotedKey = false
570  		elseif char() == '"' or char() == "'" then
571  			-- quoted key
572  			buffer = parseString().value
573  			quotedKey = true
574  		end
575  
576  		buffer = buffer .. (char():match(nl) and "" or char())
577  		step()
578  	end
579  
580  	return out
581  end
582  
583  TOML.encode = function(tbl)
584  	local toml = ""
585  
586  	local cache = {}
587  
588  	local function parse(tbl)
589  		for k, v in pairs(tbl) do
590  			if type(v) == "boolean" then
591  				toml = toml .. k .. " = " .. tostring(v) .. "\n"
592  			elseif type(v) == "number" then
593  				toml = toml .. k .. " = " .. tostring(v) .. "\n"
594  			elseif type(v) == "string" then
595  				local quote = '"'
596  				v = v:gsub("\\", "\\\\")
597  
598  				-- if the string has any line breaks, make it multiline
599  				if v:match("^\n(.*)$") then
600  					quote = quote:rep(3)
601  					v = "\\n" .. v
602  				elseif v:match("\n") then
603  					quote = quote:rep(3)
604  				end
605  
606  				v = v:gsub("\b", "\\b")
607  				v = v:gsub("\t", "\\t")
608  				v = v:gsub("\f", "\\f")
609  				v = v:gsub("\r", "\\r")
610  				v = v:gsub('"', '\\"')
611  				v = v:gsub("/", "\\/")
612  				toml = toml .. k .. " = " .. quote .. v .. quote .. "\n"
613  			elseif type(v) == "table" then
614  				local array, arrayTable = true, true
615  				local first = {}
616  				for kk, vv in pairs(v) do
617  					if type(kk) ~= "number" then
618  						array = false
619  					end
620  					if type(vv) ~= "table" then
621  						v[kk] = nil
622  						first[kk] = vv
623  						arrayTable = false
624  					end
625  				end
626  
627  				if array then
628  					if arrayTable then
629  						-- double bracket syntax go!
630  						table.insert(cache, k)
631  						for kk, vv in pairs(v) do
632  							toml = toml .. "[[" .. table.concat(cache, ".") .. "]]\n"
633  							for k3, v3 in pairs(vv) do
634  								if type(v3) ~= "table" then
635  									vv[k3] = nil
636  									first[k3] = v3
637  								end
638  							end
639  							parse(first)
640  							parse(vv)
641  						end
642  						table.remove(cache)
643  					else
644  						-- plain ol boring array
645  						toml = toml .. k .. " = [\n"
646  						for kk, vv in pairs(first) do
647  							toml = toml .. tostring(vv) .. ",\n"
648  						end
649  						toml = toml .. "]\n"
650  					end
651  				else
652  					-- just a key/value table, folks
653  					table.insert(cache, k)
654  					toml = toml .. "[" .. table.concat(cache, ".") .. "]\n"
655  					parse(first)
656  					parse(v)
657  					table.remove(cache)
658  				end
659  			end
660  		end
661  	end
662  
663  	parse(tbl)
664  
665  	return toml:sub(1, -2)
666  end
667  
668  return TOML