/ jq.pyx
jq.pyx
1 import io 2 import json 3 import threading 4 5 from cpython.bytes cimport PyBytes_AsString 6 from cpython.bytes cimport PyBytes_AsStringAndSize 7 from libc.float cimport DBL_MAX 8 from libc.math cimport INFINITY, modf 9 10 11 cdef extern from "jv.h": 12 ctypedef enum jv_kind: 13 JV_KIND_INVALID, 14 JV_KIND_NULL, 15 JV_KIND_FALSE, 16 JV_KIND_TRUE, 17 JV_KIND_NUMBER, 18 JV_KIND_STRING, 19 JV_KIND_ARRAY, 20 JV_KIND_OBJECT 21 22 ctypedef struct jv: 23 pass 24 25 jv_kind jv_get_kind(jv) 26 int jv_is_valid(jv) 27 jv jv_copy(jv) 28 void jv_free(jv) 29 jv jv_invalid_get_msg(jv) 30 int jv_invalid_has_msg(jv) 31 char* jv_string_value(jv) 32 jv jv_dump_string(jv, int flags) 33 int jv_string_length_bytes(jv) 34 int jv_is_integer(jv) 35 double jv_number_value(jv) 36 jv jv_array() 37 jv jv_array_append(jv, jv) 38 int jv_array_length(jv) 39 jv jv_array_get(jv, int) 40 int jv_object_iter(jv) 41 int jv_object_iter_next(jv, int) 42 int jv_object_iter_valid(jv, int) 43 jv jv_object_iter_key(jv, int) 44 jv jv_object_iter_value(jv, int) 45 46 cdef struct jv_parser: 47 pass 48 49 jv_parser* jv_parser_new(int) 50 void jv_parser_free(jv_parser*) 51 void jv_parser_set_buf(jv_parser*, const char*, int, int) 52 jv jv_parser_next(jv_parser*) 53 54 jv jv_parse(const char*) 55 56 57 cdef extern from "jq.h": 58 ctypedef struct jq_state: 59 pass 60 61 ctypedef void (*jq_err_cb)(void *, jv) 62 63 jq_state *jq_init() 64 void jq_teardown(jq_state **) 65 int jq_compile(jq_state *, const char* str) 66 int jq_compile_args(jq_state *, const char* str, jv) 67 void jq_start(jq_state *, jv value, int flags) 68 jv jq_next(jq_state *) 69 void jq_set_error_cb(jq_state *, jq_err_cb, void *) 70 void jq_get_error_cb(jq_state *, jq_err_cb *, void **) 71 72 73 cdef object _jv_to_python(jv value): 74 """Unpack a jv value into a Python value""" 75 cdef jv_kind kind = jv_get_kind(value) 76 cdef int idx 77 cdef jv property_key 78 cdef jv property_value 79 cdef object python_value 80 cdef double number_value 81 82 if kind == JV_KIND_INVALID: 83 raise ValueError("Invalid value") 84 elif kind == JV_KIND_NULL: 85 python_value = None 86 elif kind == JV_KIND_FALSE: 87 python_value = False 88 elif kind == JV_KIND_TRUE: 89 python_value = True 90 elif kind == JV_KIND_NUMBER: 91 number_value = jv_number_value(value) 92 if number_value == INFINITY: 93 python_value = DBL_MAX 94 elif number_value == -INFINITY: 95 python_value = -DBL_MAX 96 elif number_value != number_value: 97 python_value = None 98 elif _is_integer(number_value): 99 python_value = int(number_value) 100 else: 101 python_value = number_value 102 elif kind == JV_KIND_STRING: 103 python_value = jv_string_to_py_string(value) 104 elif kind == JV_KIND_ARRAY: 105 python_value = [] 106 for idx in range(0, jv_array_length(jv_copy(value))): 107 property_value = jv_array_get(jv_copy(value), idx) 108 python_value.append(_jv_to_python(property_value)) 109 elif kind == JV_KIND_OBJECT: 110 python_value = {} 111 idx = jv_object_iter(value) 112 while jv_object_iter_valid(value, idx): 113 property_key = jv_object_iter_key(value, idx) 114 property_value = jv_object_iter_value(value, idx) 115 try: 116 python_value[jv_string_to_py_string(property_key)] = \ 117 _jv_to_python(property_value) 118 finally: 119 jv_free(property_key) 120 idx = jv_object_iter_next(value, idx) 121 else: 122 raise ValueError("Invalid value kind: " + str(kind)) 123 jv_free(value) 124 return python_value 125 126 127 cdef int _is_integer(double value) noexcept: 128 cdef double integral_part 129 cdef double fractional_part = modf(value, &integral_part) 130 131 return fractional_part == 0 132 133 134 def compile(object program, args=None): 135 cdef object program_bytes = program.encode("utf8") 136 return _Program(program_bytes, args=args) 137 138 139 _compilation_lock = threading.Lock() 140 141 142 cdef jq_state* _compile(object program_bytes, object args) except NULL: 143 cdef jq_state *jq = jq_init() 144 cdef _ErrorStore error_store 145 cdef jv jv_args 146 cdef int compiled 147 try: 148 if not jq: 149 raise Exception("jq_init failed") 150 151 error_store = _ErrorStore() 152 153 with _compilation_lock: 154 jq_set_error_cb(jq, _store_error, <void*>error_store) 155 156 if args is None: 157 compiled = jq_compile(jq, program_bytes) 158 else: 159 args_bytes = json.dumps(args).encode("utf-8") 160 jv_args = jv_parse(PyBytes_AsString(args_bytes)) 161 compiled = jq_compile_args(jq, program_bytes, jv_args) 162 163 if error_store.has_errors(): 164 raise ValueError(error_store.error_string()) 165 166 if not compiled: 167 raise ValueError("program was not valid") 168 except: 169 jq_teardown(&jq) 170 raise 171 # TODO: unset error callback? 172 173 return jq 174 175 176 cdef void _store_error(void* store_ptr, jv error) noexcept: 177 cdef _ErrorStore store = <_ErrorStore>store_ptr 178 179 error_string = _jq_error_to_py_string(error) 180 store.store_error(error_string) 181 182 jv_free(error) 183 184 185 cdef unicode _jq_error_to_py_string(jv error) noexcept: 186 error = jv_copy(error) 187 188 if jv_get_kind(error) == JV_KIND_STRING: 189 try: 190 return jv_string_to_py_string(error) 191 except: 192 return u"Internal error" 193 else: 194 return json.dumps(_jv_to_python(error)) 195 196 197 cdef class _ErrorStore(object): 198 cdef object _errors 199 200 def __cinit__(self): 201 self.clear() 202 203 cdef int has_errors(self): 204 return len(self._errors) 205 206 cdef object error_string(self): 207 return "\n".join(self._errors) 208 209 cdef void store_error(self, unicode error): 210 self._errors.append(error) 211 212 cdef void clear(self): 213 self._errors = [] 214 215 216 class _EmptyValue(object): 217 pass 218 219 _NO_VALUE = _EmptyValue() 220 221 222 cdef class _JqStatePool(object): 223 cdef jq_state* _jq_state 224 cdef object _program_bytes 225 cdef object _args 226 cdef object _lock 227 228 def __cinit__(self, program_bytes, args): 229 self._program_bytes = program_bytes 230 self._args = args 231 self._jq_state = _compile(self._program_bytes, args=self._args) 232 self._lock = threading.Lock() 233 234 def __dealloc__(self): 235 jq_teardown(&self._jq_state) 236 237 cdef jq_state* acquire(self): 238 with self._lock: 239 if self._jq_state == NULL: 240 return _compile(self._program_bytes, args=self._args) 241 else: 242 state = self._jq_state 243 self._jq_state = NULL 244 return state 245 246 cdef void release(self, jq_state* state): 247 with self._lock: 248 if self._jq_state == NULL: 249 self._jq_state = state 250 else: 251 jq_teardown(&state) 252 253 254 cdef class _Program(object): 255 cdef object _program_bytes 256 cdef _JqStatePool _jq_state_pool 257 258 def __cinit__(self, program_bytes, args): 259 self._program_bytes = program_bytes 260 self._jq_state_pool = _JqStatePool(program_bytes, args=args) 261 262 def input(self, value=_NO_VALUE, text=_NO_VALUE): 263 if (value is _NO_VALUE) == (text is _NO_VALUE): 264 raise ValueError("Either the value or text argument should be set") 265 266 if text is not _NO_VALUE: 267 return self.input_text(text) 268 else: 269 return self.input_value(value) 270 271 def input_value(self, value): 272 return self.input_text(json.dumps(value)) 273 274 def input_values(self, values): 275 fileobj = io.StringIO() 276 for value in values: 277 json.dump(value, fileobj) 278 fileobj.write("\n") 279 return self.input_text(fileobj.getvalue()) 280 281 def input_text(self, text, *, slurp=False): 282 return _ProgramWithInput(self._jq_state_pool, text.encode("utf8"), slurp=slurp) 283 284 @property 285 def program_string(self): 286 return self._program_bytes.decode("utf8") 287 288 def __repr__(self): 289 return "jq.compile({!r})".format(self.program_string) 290 291 # Support the 0.1.x API for backwards compatibility 292 def transform(self, value=_NO_VALUE, text=_NO_VALUE, text_output=False, multiple_output=False): 293 program_with_input = self.input(value, text=text) 294 if text_output: 295 return program_with_input.text() 296 elif multiple_output: 297 return program_with_input.all() 298 else: 299 return program_with_input.first() 300 301 302 cdef class _ProgramWithInput(object): 303 cdef _JqStatePool _jq_state_pool 304 cdef object _bytes_input 305 cdef bint _slurp 306 307 def __cinit__(self, jq_state_pool, bytes_input, *, bint slurp): 308 self._jq_state_pool = jq_state_pool 309 self._bytes_input = bytes_input 310 self._slurp = slurp 311 312 def __iter__(self): 313 return self._make_iterator() 314 315 cdef _ResultIterator _make_iterator(self): 316 return _ResultIterator(self._jq_state_pool, self._bytes_input, slurp=self._slurp) 317 318 def text(self): 319 # Performance testing suggests that using _jv_to_python (within the 320 # result iterator) followed by json.dumps is faster than using 321 # jv_dump_string to generate the string directly from the jv values. 322 # See: https://github.com/mwilliamson/jq.py/pull/50 323 return "\n".join(json.dumps(v) for v in self) 324 325 def all(self): 326 return list(self) 327 328 def first(self): 329 return next(_iter(self)) 330 331 332 cdef class _ResultIterator(object): 333 cdef _JqStatePool _jq_state_pool 334 cdef jq_state* _jq 335 cdef jv_parser* _parser 336 cdef bytes _bytes_input 337 cdef bint _slurp 338 cdef bint _ready 339 340 def __dealloc__(self): 341 self._jq_state_pool.release(self._jq) 342 jv_parser_free(self._parser) 343 344 def __cinit__(self, _JqStatePool jq_state_pool, bytes bytes_input, *, bint slurp): 345 self._jq_state_pool = jq_state_pool 346 self._jq = jq_state_pool.acquire() 347 self._bytes_input = bytes_input 348 self._slurp = slurp 349 self._ready = False 350 cdef jv_parser* parser = jv_parser_new(0) 351 cdef char* cbytes_input 352 cdef ssize_t clen_input 353 PyBytes_AsStringAndSize(bytes_input, &cbytes_input, &clen_input) 354 jv_parser_set_buf(parser, cbytes_input, clen_input, 0) 355 self._parser = parser 356 357 def __iter__(self): 358 return self 359 360 def __next__(self): 361 while True: 362 if not self._ready: 363 self._ready_next_input() 364 self._ready = True 365 366 result = jq_next(self._jq) 367 if jv_is_valid(result): 368 return _jv_to_python(result) 369 elif jv_invalid_has_msg(jv_copy(result)): 370 error_message = jv_invalid_get_msg(result) 371 message = _jq_error_to_py_string(error_message) 372 jv_free(error_message) 373 raise ValueError(message) 374 else: 375 jv_free(result) 376 self._ready = False 377 378 cdef bint _ready_next_input(self) except 1: 379 cdef int jq_flags = 0 380 cdef jv value 381 382 if self._slurp: 383 value = jv_array() 384 385 while True: 386 try: 387 next_value = self._parse_next_input() 388 value = jv_array_append(value, next_value) 389 except StopIteration: 390 self._slurp = False 391 break 392 else: 393 value = self._parse_next_input() 394 395 jq_start(self._jq, value, jq_flags) 396 return 0 397 398 cdef inline jv _parse_next_input(self) except *: 399 cdef jv value = jv_parser_next(self._parser) 400 if jv_is_valid(value): 401 return value 402 elif jv_invalid_has_msg(jv_copy(value)): 403 error_message = jv_invalid_get_msg(value) 404 message = _jq_error_to_py_string(error_message) 405 jv_free(error_message) 406 raise ValueError(u"parse error: " + message) 407 else: 408 jv_free(value) 409 raise StopIteration() 410 411 412 def all(program, value=_NO_VALUE, text=_NO_VALUE): 413 return compile(program).input(value, text=text).all() 414 415 416 def first(program, value=_NO_VALUE, text=_NO_VALUE): 417 return compile(program).input(value, text=text).first() 418 419 420 _iter = iter 421 422 423 def iter(program, value=_NO_VALUE, text=_NO_VALUE): 424 return _iter(compile(program).input(value, text=text)) 425 426 427 def text(program, value=_NO_VALUE, text=_NO_VALUE): 428 return compile(program).input(value, text=text).text() 429 430 431 # Support the 0.1.x API for backwards compatibility 432 def jq(object program): 433 return compile(program) 434 435 436 cdef unicode jv_string_to_py_string(jv value): 437 cdef int length = jv_string_length_bytes(jv_copy(value)) 438 cdef char* string_value = jv_string_value(value) 439 return string_value[:length].decode("utf-8")