csv_parser.lua
1 local M = {} 2 3 local QUOTE = '"' 4 5 local function delimiter_for_format(format) 6 if format == "tsv" then 7 return "\t" 8 end 9 return "," 10 end 11 12 M.delimiter_for_format = delimiter_for_format 13 14 local function read_quoted_field(lines, row, col, nl, field) 15 field.quoted = true 16 field.byte_start_row = row 17 field.byte_start_col = col 18 col = col + 1 19 local last_row, last_col = row, col - 1 20 local parts = {} 21 while true do 22 local line = lines[row] or "" 23 if col > #line then 24 parts[#parts + 1] = "\n" 25 row = row + 1 26 col = 1 27 if row > nl then 28 last_row, last_col = row - 1, math.max(1, #(lines[row - 1] or "")) 29 break 30 end 31 else 32 local ch = line:sub(col, col) 33 if ch == QUOTE then 34 local nx = line:sub(col + 1, col + 1) 35 if nx == QUOTE then 36 parts[#parts + 1] = QUOTE 37 col = col + 2 38 else 39 last_row, last_col = row, col 40 col = col + 1 41 break 42 end 43 else 44 parts[#parts + 1] = ch 45 col = col + 1 46 end 47 end 48 end 49 field.value = table.concat(parts) 50 field.byte_end_row = last_row 51 field.byte_end_col = last_col 52 if last_row > field.byte_start_row then 53 field.multiline = true 54 end 55 return row, col 56 end 57 58 local function read_unquoted_field(lines, row, col, delim, field) 59 field.quoted = false 60 field.byte_start_row = row 61 field.byte_start_col = col 62 local line = lines[row] or "" 63 local parts = {} 64 local last_col = col - 1 65 while col <= #line do 66 local ch = line:sub(col, col) 67 if ch == delim then 68 break 69 end 70 parts[#parts + 1] = ch 71 last_col = col 72 col = col + 1 73 end 74 field.value = table.concat(parts) 75 field.byte_end_row = row 76 field.byte_end_col = math.max(field.byte_start_col - 1, last_col) 77 return row, col 78 end 79 80 local function read_field(lines, row, col, delim, nl) 81 local field = {} 82 local line = lines[row] or "" 83 local ch = line:sub(col, col) 84 if ch == QUOTE then 85 return field, read_quoted_field(lines, row, col, nl, field) 86 end 87 return field, read_unquoted_field(lines, row, col, delim, field) 88 end 89 90 function M.parse(lines, format) 91 local delim = delimiter_for_format(format) 92 local records = {} 93 local nl = #lines 94 local row = 1 95 while row <= nl do 96 local record = { 97 buf_row_start = row, 98 fields = {}, 99 } 100 local col = 1 101 while true do 102 local field, new_row, new_col = read_field(lines, row, col, delim, nl) 103 record.fields[#record.fields + 1] = field 104 if field.multiline then 105 record.multiline = true 106 end 107 row = new_row 108 col = new_col 109 local line = lines[row] or "" 110 if col <= #line and line:sub(col, col) == delim then 111 field.delim_row = row 112 field.delim_col = col 113 col = col + 1 114 else 115 break 116 end 117 end 118 record.buf_row_end = row 119 records[#records + 1] = record 120 row = row + 1 121 end 122 return records 123 end 124 125 function M.find_record_start(lines, target_row) 126 local nl = #lines 127 if target_row < 1 then 128 return 1 129 end 130 local probe = math.max(1, target_row) 131 while probe > 1 do 132 local prev = lines[probe - 1] or "" 133 local quote_count = 0 134 for i = 1, #prev do 135 if prev:sub(i, i) == QUOTE then 136 quote_count = quote_count + 1 137 end 138 end 139 if quote_count % 2 == 0 then 140 break 141 end 142 probe = probe - 1 143 end 144 if probe < 1 then 145 probe = 1 146 end 147 if probe > nl then 148 probe = nl 149 end 150 return probe 151 end 152 153 return M