/ jq.pyx
jq.pyx
  1  import io
  2  import json
  3  import threading
  4  
  5  from cpython.bytes cimport PyBytes_AsString
  6  from cpython.bytes cimport PyBytes_AsStringAndSize
  7  from libc.float cimport DBL_MAX
  8  from libc.math cimport INFINITY, modf
  9  
 10  
 11  cdef extern from "jv.h":
 12      ctypedef enum jv_kind:
 13        JV_KIND_INVALID,
 14        JV_KIND_NULL,
 15        JV_KIND_FALSE,
 16        JV_KIND_TRUE,
 17        JV_KIND_NUMBER,
 18        JV_KIND_STRING,
 19        JV_KIND_ARRAY,
 20        JV_KIND_OBJECT
 21  
 22      ctypedef struct jv:
 23          pass
 24  
 25      jv_kind jv_get_kind(jv)
 26      int jv_is_valid(jv)
 27      jv jv_copy(jv)
 28      void jv_free(jv)
 29      jv jv_invalid_get_msg(jv)
 30      int jv_invalid_has_msg(jv)
 31      char* jv_string_value(jv)
 32      jv jv_dump_string(jv, int flags)
 33      int jv_string_length_bytes(jv)
 34      int jv_is_integer(jv)
 35      double jv_number_value(jv)
 36      jv jv_array()
 37      jv jv_array_append(jv, jv)
 38      int jv_array_length(jv)
 39      jv jv_array_get(jv, int)
 40      int jv_object_iter(jv)
 41      int jv_object_iter_next(jv, int)
 42      int jv_object_iter_valid(jv, int)
 43      jv jv_object_iter_key(jv, int)
 44      jv jv_object_iter_value(jv, int)
 45  
 46      cdef struct jv_parser:
 47          pass
 48  
 49      jv_parser* jv_parser_new(int)
 50      void jv_parser_free(jv_parser*)
 51      void jv_parser_set_buf(jv_parser*, const char*, int, int)
 52      jv jv_parser_next(jv_parser*)
 53  
 54      jv jv_parse(const char*)
 55  
 56  
 57  cdef extern from "jq.h":
 58      ctypedef struct jq_state:
 59          pass
 60  
 61      ctypedef void (*jq_err_cb)(void *, jv)
 62  
 63      jq_state *jq_init()
 64      void jq_teardown(jq_state **)
 65      int jq_compile(jq_state *, const char* str)
 66      int jq_compile_args(jq_state *, const char* str, jv)
 67      void jq_start(jq_state *, jv value, int flags)
 68      jv jq_next(jq_state *)
 69      void jq_set_error_cb(jq_state *, jq_err_cb, void *)
 70      void jq_get_error_cb(jq_state *, jq_err_cb *, void **)
 71  
 72  
 73  cdef object _jv_to_python(jv value):
 74      """Unpack a jv value into a Python value"""
 75      cdef jv_kind kind = jv_get_kind(value)
 76      cdef int idx
 77      cdef jv property_key
 78      cdef jv property_value
 79      cdef object python_value
 80      cdef double number_value
 81  
 82      if kind == JV_KIND_INVALID:
 83          raise ValueError("Invalid value")
 84      elif kind == JV_KIND_NULL:
 85          python_value = None
 86      elif kind == JV_KIND_FALSE:
 87          python_value = False
 88      elif kind == JV_KIND_TRUE:
 89          python_value = True
 90      elif kind == JV_KIND_NUMBER:
 91          number_value = jv_number_value(value)
 92          if number_value == INFINITY:
 93              python_value = DBL_MAX
 94          elif number_value == -INFINITY:
 95              python_value = -DBL_MAX
 96          elif number_value != number_value:
 97              python_value = None
 98          elif _is_integer(number_value):
 99              python_value = int(number_value)
100          else:
101              python_value = number_value
102      elif kind == JV_KIND_STRING:
103          python_value = jv_string_to_py_string(value)
104      elif kind == JV_KIND_ARRAY:
105          python_value = []
106          for idx in range(0, jv_array_length(jv_copy(value))):
107              property_value = jv_array_get(jv_copy(value), idx)
108              python_value.append(_jv_to_python(property_value))
109      elif kind == JV_KIND_OBJECT:
110          python_value = {}
111          idx = jv_object_iter(value)
112          while jv_object_iter_valid(value, idx):
113              property_key = jv_object_iter_key(value, idx)
114              property_value = jv_object_iter_value(value, idx)
115              try:
116                  python_value[jv_string_to_py_string(property_key)] = \
117                      _jv_to_python(property_value)
118              finally:
119                  jv_free(property_key)
120              idx = jv_object_iter_next(value, idx)
121      else:
122          raise ValueError("Invalid value kind: " + str(kind))
123      jv_free(value)
124      return python_value
125  
126  
127  cdef int _is_integer(double value) noexcept:
128      cdef double integral_part
129      cdef double fractional_part = modf(value, &integral_part)
130  
131      return fractional_part == 0
132  
133  
134  def compile(object program, args=None):
135      cdef object program_bytes = program.encode("utf8")
136      return _Program(program_bytes, args=args)
137  
138  
139  _compilation_lock = threading.Lock()
140  
141  
142  cdef jq_state* _compile(object program_bytes, object args) except NULL:
143      cdef jq_state *jq = jq_init()
144      cdef _ErrorStore error_store
145      cdef jv jv_args
146      cdef int compiled
147      try:
148          if not jq:
149              raise Exception("jq_init failed")
150  
151          error_store = _ErrorStore()
152  
153          with _compilation_lock:
154              jq_set_error_cb(jq, _store_error, <void*>error_store)
155  
156              if args is None:
157                  compiled = jq_compile(jq, program_bytes)
158              else:
159                  args_bytes = json.dumps(args).encode("utf-8")
160                  jv_args = jv_parse(PyBytes_AsString(args_bytes))
161                  compiled = jq_compile_args(jq, program_bytes, jv_args)
162  
163              if error_store.has_errors():
164                  raise ValueError(error_store.error_string())
165  
166          if not compiled:
167              raise ValueError("program was not valid")
168      except:
169          jq_teardown(&jq)
170          raise
171      # TODO: unset error callback?
172  
173      return jq
174  
175  
176  cdef void _store_error(void* store_ptr, jv error) noexcept:
177      cdef _ErrorStore store = <_ErrorStore>store_ptr
178  
179      error_string = _jq_error_to_py_string(error)
180      store.store_error(error_string)
181  
182      jv_free(error)
183  
184  
185  cdef unicode _jq_error_to_py_string(jv error) noexcept:
186      error = jv_copy(error)
187  
188      if jv_get_kind(error) == JV_KIND_STRING:
189          try:
190              return jv_string_to_py_string(error)
191          except:
192              return u"Internal error"
193      else:
194          return json.dumps(_jv_to_python(error))
195  
196  
197  cdef class _ErrorStore(object):
198      cdef object _errors
199  
200      def __cinit__(self):
201          self.clear()
202  
203      cdef int has_errors(self):
204          return len(self._errors)
205  
206      cdef object error_string(self):
207          return "\n".join(self._errors)
208  
209      cdef void store_error(self, unicode error):
210          self._errors.append(error)
211  
212      cdef void clear(self):
213          self._errors = []
214  
215  
216  class _EmptyValue(object):
217      pass
218  
219  _NO_VALUE = _EmptyValue()
220  
221  
222  cdef class _JqStatePool(object):
223      cdef jq_state* _jq_state
224      cdef object _program_bytes
225      cdef object _args
226      cdef object _lock
227  
228      def __cinit__(self, program_bytes, args):
229          self._program_bytes = program_bytes
230          self._args = args
231          self._jq_state = _compile(self._program_bytes, args=self._args)
232          self._lock = threading.Lock()
233  
234      def __dealloc__(self):
235          jq_teardown(&self._jq_state)
236  
237      cdef jq_state* acquire(self):
238          with self._lock:
239              if self._jq_state == NULL:
240                  return _compile(self._program_bytes, args=self._args)
241              else:
242                  state = self._jq_state
243                  self._jq_state = NULL
244                  return state
245  
246      cdef void release(self, jq_state* state):
247          with self._lock:
248              if self._jq_state == NULL:
249                  self._jq_state = state
250              else:
251                  jq_teardown(&state)
252  
253  
254  cdef class _Program(object):
255      cdef object _program_bytes
256      cdef _JqStatePool _jq_state_pool
257  
258      def __cinit__(self, program_bytes, args):
259          self._program_bytes = program_bytes
260          self._jq_state_pool = _JqStatePool(program_bytes, args=args)
261  
262      def input(self, value=_NO_VALUE, text=_NO_VALUE):
263          if (value is _NO_VALUE) == (text is _NO_VALUE):
264              raise ValueError("Either the value or text argument should be set")
265  
266          if text is not _NO_VALUE:
267              return self.input_text(text)
268          else:
269              return self.input_value(value)
270  
271      def input_value(self, value):
272          return self.input_text(json.dumps(value))
273  
274      def input_values(self, values):
275          fileobj = io.StringIO()
276          for value in values:
277              json.dump(value, fileobj)
278              fileobj.write("\n")
279          return self.input_text(fileobj.getvalue())
280  
281      def input_text(self, text, *, slurp=False):
282          return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp)
283  
284      @property
285      def program_string(self):
286          return self._program_bytes.decode("utf8")
287  
288      def __repr__(self):
289          return "jq.compile({!r})".format(self.program_string)
290  
291      # Support the 0.1.x API for backwards compatibility
292      def transform(self, value=_NO_VALUE, text=_NO_VALUE, text_output=False, multiple_output=False):
293          program_with_input = self.input(value, text=text)
294          if text_output:
295              return program_with_input.text()
296          elif multiple_output:
297              return program_with_input.all()
298          else:
299              return program_with_input.first()
300  
301  
302  cdef class _ProgramWithInput(object):
303      cdef _JqStatePool _jq_state_pool
304      cdef object _bytes_input
305      cdef bint _slurp
306  
307      def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp):
308          self._jq_state_pool = jq_state_pool
309          self._bytes_input = bytes_input
310          self._slurp = slurp
311  
312      def __iter__(self):
313          return self._make_iterator()
314  
315      cdef _ResultIterator _make_iterator(self):
316          return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp)
317  
318      def text(self):
319          # Performance testing suggests that using _jv_to_python (within the
320          # result iterator) followed by json.dumps is faster than using
321          # jv_dump_string to generate the string directly from the jv values.
322          # See: https://github.com/mwilliamson/jq.py/pull/50
323          return "\n".join(json.dumps(v) for v in self)
324  
325      def all(self):
326          return list(self)
327  
328      def first(self):
329          return next(_iter(self))
330  
331  
332  cdef class _ResultIterator(object):
333      cdef _JqStatePool _jq_state_pool
334      cdef jq_state* _jq
335      cdef jv_parser* _parser
336      cdef bytes _bytes_input
337      cdef bint _slurp
338      cdef bint _ready
339  
340      def __dealloc__(self):
341          self._jq_state_pool.release(self._jq)
342          jv_parser_free(self._parser)
343  
344      def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp):
345          self._jq_state_pool = jq_state_pool
346          self._jq = jq_state_pool.acquire()
347          self._bytes_input = bytes_input
348          self._slurp = slurp
349          self._ready = False
350          cdef jv_parser* parser = jv_parser_new(0)
351          cdef char* cbytes_input
352          cdef ssize_t clen_input
353          PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input)
354          jv_parser_set_buf(parser, cbytes_input, clen_input, 0)
355          self._parser = parser
356  
357      def __iter__(self):
358          return self
359  
360      def __next__(self):
361          while True:
362              if not self._ready:
363                  self._ready_next_input()
364                  self._ready = True
365  
366              result = jq_next(self._jq)
367              if jv_is_valid(result):
368                  return _jv_to_python(result)
369              elif jv_invalid_has_msg(jv_copy(result)):
370                  error_message = jv_invalid_get_msg(result)
371                  message = _jq_error_to_py_string(error_message)
372                  jv_free(error_message)
373                  raise ValueError(message)
374              else:
375                  jv_free(result)
376                  self._ready = False
377  
378      cdef bint _ready_next_input(self) except 1:
379          cdef int jq_flags = 0
380          cdef jv value
381  
382          if self._slurp:
383              value = jv_array()
384  
385              while True:
386                  try:
387                      next_value = self._parse_next_input()
388                      value = jv_array_append(value, next_value)
389                  except StopIteration:
390                      self._slurp = False
391                      break
392          else:
393              value = self._parse_next_input()
394  
395          jq_start(self._jq, value, jq_flags)
396          return 0
397  
398      cdef inline jv _parse_next_input(self) except *:
399          cdef jv value = jv_parser_next(self._parser)
400          if jv_is_valid(value):
401              return value
402          elif jv_invalid_has_msg(jv_copy(value)):
403              error_message = jv_invalid_get_msg(value)
404              message = _jq_error_to_py_string(error_message)
405              jv_free(error_message)
406              raise ValueError(u"parse error: " + message)
407          else:
408              jv_free(value)
409              raise StopIteration()
410  
411  
412  def all(program, value=_NO_VALUE, text=_NO_VALUE):
413      return compile(program).input(value, text=text).all()
414  
415  
416  def first(program, value=_NO_VALUE, text=_NO_VALUE):
417      return compile(program).input(value, text=text).first()
418  
419  
420  _iter = iter
421  
422  
423  def iter(program, value=_NO_VALUE, text=_NO_VALUE):
424      return _iter(compile(program).input(value, text=text))
425  
426  
427  def text(program, value=_NO_VALUE, text=_NO_VALUE):
428      return compile(program).input(value, text=text).text()
429  
430  
431  # Support the 0.1.x API for backwards compatibility
432  def jq(object program):
433      return compile(program)
434  
435  
436  cdef unicode jv_string_to_py_string(jv value):
437      cdef int length = jv_string_length_bytes(jv_copy(value))
438      cdef char* string_value = jv_string_value(value)
439      return string_value[:length].decode("utf-8")