/ lua / cellmode / codec / csv_parser.lua
csv_parser.lua
  1  local M = {}
  2  
  3  local QUOTE = '"'
  4  
  5  local function delimiter_for_format(format)
  6    if format == "tsv" then
  7      return "\t"
  8    end
  9    return ","
 10  end
 11  
 12  M.delimiter_for_format = delimiter_for_format
 13  
 14  local function read_quoted_field(lines, row, col, nl, field)
 15    field.quoted = true
 16    field.byte_start_row = row
 17    field.byte_start_col = col
 18    col = col + 1
 19    local last_row, last_col = row, col - 1
 20    local parts = {}
 21    while true do
 22      local line = lines[row] or ""
 23      if col > #line then
 24        parts[#parts + 1] = "\n"
 25        row = row + 1
 26        col = 1
 27        if row > nl then
 28          last_row, last_col = row - 1, math.max(1, #(lines[row - 1] or ""))
 29          break
 30        end
 31      else
 32        local ch = line:sub(col, col)
 33        if ch == QUOTE then
 34          local nx = line:sub(col + 1, col + 1)
 35          if nx == QUOTE then
 36            parts[#parts + 1] = QUOTE
 37            col = col + 2
 38          else
 39            last_row, last_col = row, col
 40            col = col + 1
 41            break
 42          end
 43        else
 44          parts[#parts + 1] = ch
 45          col = col + 1
 46        end
 47      end
 48    end
 49    field.value = table.concat(parts)
 50    field.byte_end_row = last_row
 51    field.byte_end_col = last_col
 52    if last_row > field.byte_start_row then
 53      field.multiline = true
 54    end
 55    return row, col
 56  end
 57  
 58  local function read_unquoted_field(lines, row, col, delim, field)
 59    field.quoted = false
 60    field.byte_start_row = row
 61    field.byte_start_col = col
 62    local line = lines[row] or ""
 63    local parts = {}
 64    local last_col = col - 1
 65    while col <= #line do
 66      local ch = line:sub(col, col)
 67      if ch == delim then
 68        break
 69      end
 70      parts[#parts + 1] = ch
 71      last_col = col
 72      col = col + 1
 73    end
 74    field.value = table.concat(parts)
 75    field.byte_end_row = row
 76    field.byte_end_col = math.max(field.byte_start_col - 1, last_col)
 77    return row, col
 78  end
 79  
 80  local function read_field(lines, row, col, delim, nl)
 81    local field = {}
 82    local line = lines[row] or ""
 83    local ch = line:sub(col, col)
 84    if ch == QUOTE then
 85      return field, read_quoted_field(lines, row, col, nl, field)
 86    end
 87    return field, read_unquoted_field(lines, row, col, delim, field)
 88  end
 89  
 90  function M.parse(lines, format)
 91    local delim = delimiter_for_format(format)
 92    local records = {}
 93    local nl = #lines
 94    local row = 1
 95    while row <= nl do
 96      local record = {
 97        buf_row_start = row,
 98        fields = {},
 99      }
100      local col = 1
101      while true do
102        local field, new_row, new_col = read_field(lines, row, col, delim, nl)
103        record.fields[#record.fields + 1] = field
104        if field.multiline then
105          record.multiline = true
106        end
107        row = new_row
108        col = new_col
109        local line = lines[row] or ""
110        if col <= #line and line:sub(col, col) == delim then
111          field.delim_row = row
112          field.delim_col = col
113          col = col + 1
114        else
115          break
116        end
117      end
118      record.buf_row_end = row
119      records[#records + 1] = record
120      row = row + 1
121    end
122    return records
123  end
124  
125  function M.find_record_start(lines, target_row)
126    local nl = #lines
127    if target_row < 1 then
128      return 1
129    end
130    local probe = math.max(1, target_row)
131    while probe > 1 do
132      local prev = lines[probe - 1] or ""
133      local quote_count = 0
134      for i = 1, #prev do
135        if prev:sub(i, i) == QUOTE then
136          quote_count = quote_count + 1
137        end
138      end
139      if quote_count % 2 == 0 then
140        break
141      end
142      probe = probe - 1
143    end
144    if probe < 1 then
145      probe = 1
146    end
147    if probe > nl then
148      probe = nl
149    end
150    return probe
151  end
152  
153  return M