extensions.pxi
1 # support for extension functions in XPath and XSLT 2 3 cdef class XPathError(LxmlError): 4 """Base class of all XPath errors. 5 """ 6 7 cdef class XPathEvalError(XPathError): 8 """Error during XPath evaluation. 9 """ 10 11 cdef class XPathFunctionError(XPathEvalError): 12 """Internal error looking up an XPath extension function. 13 """ 14 15 cdef class XPathResultError(XPathEvalError): 16 """Error handling an XPath result. 17 """ 18 19 20 # forward declarations 21 22 ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf) 23 cdef class _ExsltRegExp 24 25 ################################################################################ 26 # Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... 27 28 @cython.internal 29 cdef class _BaseContext: 30 cdef xpath.xmlXPathContext* _xpathCtxt 31 cdef _Document _doc 32 cdef dict _extensions 33 cdef list _namespaces 34 cdef list _global_namespaces 35 cdef dict _utf_refs 36 cdef dict _function_cache 37 cdef dict _eval_context_dict 38 cdef bint _build_smart_strings 39 # for exception handling and temporary reference keeping: 40 cdef _TempStore _temp_refs 41 cdef set _temp_documents 42 cdef _ExceptionContext _exc 43 cdef _ErrorLog _error_log 44 45 def __cinit__(self): 46 self._xpathCtxt = NULL 47 48 def __init__(self, namespaces, extensions, error_log, enable_regexp, 49 build_smart_strings): 50 cdef _ExsltRegExp _regexp 51 cdef dict new_extensions 52 cdef list ns 53 self._utf_refs = {} 54 self._global_namespaces = [] 55 self._function_cache = {} 56 self._eval_context_dict = None 57 self._error_log = error_log 58 59 if extensions is not None: 60 # convert extensions to UTF-8 61 if isinstance(extensions, dict): 62 extensions = (extensions,) 63 # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function} 64 new_extensions = {} 65 for extension in extensions: 66 for (ns_uri, name), function in extension.items(): 67 if name is None: 68 raise ValueError, u"extensions must have non empty names" 69 ns_utf = self._to_utf(ns_uri) 70 name_utf = self._to_utf(name) 71 new_extensions[(ns_utf, name_utf)] = function 72 extensions = new_extensions or None 73 74 if namespaces is not None: 75 if isinstance(namespaces, dict): 76 namespaces = namespaces.items() 77 if namespaces: 78 ns = [] 79 for prefix, ns_uri in namespaces: 80 if prefix is None or not prefix: 81 raise TypeError, \ 82 u"empty namespace prefix is not supported in XPath" 83 if ns_uri is None or not ns_uri: 84 raise TypeError, \ 85 u"setting default namespace is not supported in XPath" 86 prefix_utf = self._to_utf(prefix) 87 ns_uri_utf = self._to_utf(ns_uri) 88 ns.append( (prefix_utf, ns_uri_utf) ) 89 namespaces = ns 90 else: 91 namespaces = None 92 93 self._doc = None 94 self._exc = _ExceptionContext() 95 self._extensions = extensions 96 self._namespaces = namespaces 97 self._temp_refs = _TempStore() 98 self._temp_documents = set() 99 self._build_smart_strings = build_smart_strings 100 101 if enable_regexp: 102 _regexp = _ExsltRegExp() 103 _regexp._register_in_context(self) 104 105 cdef _BaseContext _copy(self): 106 cdef _BaseContext context 107 if self._namespaces is not None: 108 namespaces = self._namespaces[:] 109 else: 110 namespaces = None 111 context = self.__class__(namespaces, None, self._error_log, False, 112 self._build_smart_strings) 113 if self._extensions is not None: 114 context._extensions = self._extensions.copy() 115 return context 116 117 cdef bytes _to_utf(self, s): 118 u"Convert to UTF-8 and keep a reference to the encoded string" 119 cdef python.PyObject* dict_result 120 if s is None: 121 return None 122 dict_result = python.PyDict_GetItem(self._utf_refs, s) 123 if dict_result is not NULL: 124 return <bytes>dict_result 125 utf = _utf8(s) 126 self._utf_refs[s] = utf 127 if python.IS_PYPY: 128 # use C level refs, PyPy refs are not enough! 129 python.Py_INCREF(utf) 130 return utf 131 132 cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt): 133 self._xpathCtxt = xpathCtxt 134 xpathCtxt.userData = <void*>self 135 xpathCtxt.error = _receiveXPathError 136 137 @cython.final 138 cdef _register_context(self, _Document doc): 139 self._doc = doc 140 self._exc.clear() 141 142 @cython.final 143 cdef _cleanup_context(self): 144 #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) 145 #self.unregisterGlobalNamespaces() 146 if python.IS_PYPY: 147 # clean up double refs in PyPy (see "_to_utf()" method) 148 for ref in self._utf_refs.itervalues(): 149 python.Py_DECREF(ref) 150 self._utf_refs.clear() 151 self._eval_context_dict = None 152 self._doc = None 153 154 @cython.final 155 cdef _release_context(self): 156 if self._xpathCtxt is not NULL: 157 self._xpathCtxt.userData = NULL 158 self._xpathCtxt = NULL 159 160 # namespaces (internal UTF-8 methods with leading '_') 161 162 cdef addNamespace(self, prefix, ns_uri): 163 cdef list namespaces 164 if prefix is None: 165 raise TypeError, u"empty prefix is not supported in XPath" 166 prefix_utf = self._to_utf(prefix) 167 ns_uri_utf = self._to_utf(ns_uri) 168 new_item = (prefix_utf, ns_uri_utf) 169 if self._namespaces is None: 170 self._namespaces = [new_item] 171 else: 172 namespaces = [] 173 for item in self._namespaces: 174 if item[0] == prefix_utf: 175 item = new_item 176 new_item = None 177 namespaces.append(item) 178 if new_item is not None: 179 namespaces.append(new_item) 180 self._namespaces = namespaces 181 if self._xpathCtxt is not NULL: 182 xpath.xmlXPathRegisterNs( 183 self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) 184 185 cdef registerNamespace(self, prefix, ns_uri): 186 if prefix is None: 187 raise TypeError, u"empty prefix is not supported in XPath" 188 prefix_utf = self._to_utf(prefix) 189 ns_uri_utf = self._to_utf(ns_uri) 190 self._global_namespaces.append(prefix_utf) 191 xpath.xmlXPathRegisterNs(self._xpathCtxt, 192 _xcstr(prefix_utf), _xcstr(ns_uri_utf)) 193 194 cdef registerLocalNamespaces(self): 195 if self._namespaces is None: 196 return 197 for prefix_utf, ns_uri_utf in self._namespaces: 198 xpath.xmlXPathRegisterNs( 199 self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) 200 201 cdef registerGlobalNamespaces(self): 202 cdef list ns_prefixes = _find_all_extension_prefixes() 203 if python.PyList_GET_SIZE(ns_prefixes) > 0: 204 for prefix_utf, ns_uri_utf in ns_prefixes: 205 self._global_namespaces.append(prefix_utf) 206 xpath.xmlXPathRegisterNs( 207 self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) 208 209 cdef unregisterGlobalNamespaces(self): 210 if python.PyList_GET_SIZE(self._global_namespaces) > 0: 211 for prefix_utf in self._global_namespaces: 212 xpath.xmlXPathRegisterNs(self._xpathCtxt, 213 _xcstr(prefix_utf), NULL) 214 del self._global_namespaces[:] 215 216 cdef void _unregisterNamespace(self, prefix_utf): 217 xpath.xmlXPathRegisterNs(self._xpathCtxt, 218 _xcstr(prefix_utf), NULL) 219 220 # extension functions 221 222 cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1: 223 if self._extensions is None: 224 self._extensions = {} 225 self._extensions[(ns_utf, name_utf)] = function 226 return 0 227 228 cdef registerGlobalFunctions(self, void* ctxt, 229 _register_function reg_func): 230 cdef python.PyObject* dict_result 231 cdef dict d 232 for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): 233 dict_result = python.PyDict_GetItem( 234 self._function_cache, ns_utf) 235 if dict_result is not NULL: 236 d = <dict>dict_result 237 else: 238 d = {} 239 self._function_cache[ns_utf] = d 240 for name_utf, function in ns_functions.iteritems(): 241 d[name_utf] = function 242 reg_func(ctxt, name_utf, ns_utf) 243 244 cdef registerLocalFunctions(self, void* ctxt, 245 _register_function reg_func): 246 cdef python.PyObject* dict_result 247 cdef dict d 248 if self._extensions is None: 249 return # done 250 last_ns = None 251 d = None 252 for (ns_utf, name_utf), function in self._extensions.iteritems(): 253 if ns_utf is not last_ns or d is None: 254 last_ns = ns_utf 255 dict_result = python.PyDict_GetItem( 256 self._function_cache, ns_utf) 257 if dict_result is not NULL: 258 d = <dict>dict_result 259 else: 260 d = {} 261 self._function_cache[ns_utf] = d 262 d[name_utf] = function 263 reg_func(ctxt, name_utf, ns_utf) 264 265 cdef unregisterAllFunctions(self, void* ctxt, 266 _register_function unreg_func): 267 for ns_utf, functions in self._function_cache.iteritems(): 268 for name_utf in functions: 269 unreg_func(ctxt, name_utf, ns_utf) 270 271 cdef unregisterGlobalFunctions(self, void* ctxt, 272 _register_function unreg_func): 273 for ns_utf, functions in self._function_cache.items(): 274 for name_utf in functions: 275 if self._extensions is None or \ 276 (ns_utf, name_utf) not in self._extensions: 277 unreg_func(ctxt, name_utf, ns_utf) 278 279 @cython.final 280 cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name): 281 u"""Lookup an extension function in the cache and return it. 282 283 Parameters: c_ns_uri may be NULL, c_name must not be NULL 284 """ 285 cdef python.PyObject* c_dict 286 cdef python.PyObject* dict_result 287 c_dict = python.PyDict_GetItem( 288 self._function_cache, None if c_ns_uri is NULL else c_ns_uri) 289 if c_dict is not NULL: 290 dict_result = python.PyDict_GetItem( 291 <object>c_dict, <unsigned char*>c_name) 292 if dict_result is not NULL: 293 return <object>dict_result 294 return None 295 296 # Python access to the XPath context for extension functions 297 298 @property 299 def context_node(self): 300 cdef xmlNode* c_node 301 if self._xpathCtxt is NULL: 302 raise XPathError, \ 303 u"XPath context is only usable during the evaluation" 304 c_node = self._xpathCtxt.node 305 if c_node is NULL: 306 raise XPathError, u"no context node" 307 if c_node.doc != self._xpathCtxt.doc: 308 raise XPathError, \ 309 u"document-external context nodes are not supported" 310 if self._doc is None: 311 raise XPathError, u"document context is missing" 312 return _elementFactory(self._doc, c_node) 313 314 @property 315 def eval_context(self): 316 if self._eval_context_dict is None: 317 self._eval_context_dict = {} 318 return self._eval_context_dict 319 320 # Python reference keeping during XPath function evaluation 321 322 @cython.final 323 cdef _release_temp_refs(self): 324 u"Free temporarily referenced objects from this context." 325 self._temp_refs.clear() 326 self._temp_documents.clear() 327 328 @cython.final 329 cdef _hold(self, obj): 330 u"""A way to temporarily hold references to nodes in the evaluator. 331 332 This is needed because otherwise nodes created in XPath extension 333 functions would be reference counted too soon, during the XPath 334 evaluation. This is most important in the case of exceptions. 335 """ 336 cdef _Element element 337 if isinstance(obj, _Element): 338 self._temp_refs.add(obj) 339 self._temp_documents.add((<_Element>obj)._doc) 340 return 341 elif _isString(obj) or not python.PySequence_Check(obj): 342 return 343 for o in obj: 344 if isinstance(o, _Element): 345 #print "Holding element:", <int>element._c_node 346 self._temp_refs.add(o) 347 #print "Holding document:", <int>element._doc._c_doc 348 self._temp_documents.add((<_Element>o)._doc) 349 350 @cython.final 351 cdef _Document _findDocumentForNode(self, xmlNode* c_node): 352 u"""If an XPath expression returns an element from a different 353 document than the current context document, we call this to 354 see if it was possibly created by an extension and is a known 355 document instance. 356 """ 357 cdef _Document doc 358 for doc in self._temp_documents: 359 if doc is not None and doc._c_doc is c_node.doc: 360 return doc 361 return None 362 363 364 # libxml2 keeps these error messages in a static array in its code 365 # and doesn't give us access to them ... 366 367 cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = ( 368 b"Ok", 369 b"Number encoding", 370 b"Unfinished literal", 371 b"Start of literal", 372 b"Expected $ for variable reference", 373 b"Undefined variable", 374 b"Invalid predicate", 375 b"Invalid expression", 376 b"Missing closing curly brace", 377 b"Unregistered function", 378 b"Invalid operand", 379 b"Invalid type", 380 b"Invalid number of arguments", 381 b"Invalid context size", 382 b"Invalid context position", 383 b"Memory allocation error", 384 b"Syntax error", 385 b"Resource error", 386 b"Sub resource error", 387 b"Undefined namespace prefix", 388 b"Encoding error", 389 b"Char out of XML range", 390 b"Invalid or incomplete context", 391 b"Stack usage error", 392 b"Forbidden variable\n", 393 b"?? Unknown error ??\n", 394 ) 395 396 cdef void _forwardXPathError(void* c_ctxt, xmlerror.xmlError* c_error) with gil: 397 cdef xmlerror.xmlError error 398 cdef int xpath_code 399 if c_error.message is not NULL: 400 error.message = c_error.message 401 else: 402 xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK 403 if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES): 404 error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code]) 405 else: 406 error.message = b"unknown error" 407 error.domain = c_error.domain 408 error.code = c_error.code 409 error.level = c_error.level 410 error.line = c_error.line 411 error.int2 = c_error.int1 # column 412 error.file = c_error.file 413 error.node = NULL 414 415 (<_BaseContext>c_ctxt)._error_log._receive(&error) 416 417 cdef void _receiveXPathError(void* c_context, xmlerror.xmlError* error) nogil: 418 if not __DEBUG: 419 return 420 if c_context is NULL: 421 _forwardError(NULL, error) 422 else: 423 _forwardXPathError(c_context, error) 424 425 426 def Extension(module, function_mapping=None, *, ns=None): 427 u"""Extension(module, function_mapping=None, ns=None) 428 429 Build a dictionary of extension functions from the functions 430 defined in a module or the methods of an object. 431 432 As second argument, you can pass an additional mapping of 433 attribute names to XPath function names, or a list of function 434 names that should be taken. 435 436 The ``ns`` keyword argument accepts a namespace URI for the XPath 437 functions. 438 """ 439 cdef dict functions = {} 440 if isinstance(function_mapping, dict): 441 for function_name, xpath_name in function_mapping.items(): 442 functions[(ns, xpath_name)] = getattr(module, function_name) 443 else: 444 if function_mapping is None: 445 function_mapping = [ name for name in dir(module) 446 if not name.startswith(u'_') ] 447 for function_name in function_mapping: 448 functions[(ns, function_name)] = getattr(module, function_name) 449 return functions 450 451 ################################################################################ 452 # EXSLT regexp implementation 453 454 @cython.final 455 @cython.internal 456 cdef class _ExsltRegExp: 457 cdef dict _compile_map 458 def __cinit__(self): 459 self._compile_map = {} 460 461 cdef _make_string(self, value): 462 if _isString(value): 463 return value 464 elif isinstance(value, list): 465 # node set: take recursive text concatenation of first element 466 if python.PyList_GET_SIZE(value) == 0: 467 return u'' 468 firstnode = value[0] 469 if _isString(firstnode): 470 return firstnode 471 elif isinstance(firstnode, _Element): 472 c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node) 473 if c_text is NULL: 474 raise MemoryError() 475 try: 476 return funicode(c_text) 477 finally: 478 tree.xmlFree(c_text) 479 else: 480 return unicode(firstnode) 481 else: 482 return unicode(value) 483 484 cdef _compile(self, rexp, ignore_case): 485 cdef python.PyObject* c_result 486 rexp = self._make_string(rexp) 487 key = (rexp, ignore_case) 488 c_result = python.PyDict_GetItem(self._compile_map, key) 489 if c_result is not NULL: 490 return <object>c_result 491 py_flags = re.UNICODE 492 if ignore_case: 493 py_flags = py_flags | re.IGNORECASE 494 rexp_compiled = re.compile(rexp, py_flags) 495 self._compile_map[key] = rexp_compiled 496 return rexp_compiled 497 498 def test(self, ctxt, s, rexp, flags=u''): 499 flags = self._make_string(flags) 500 s = self._make_string(s) 501 rexpc = self._compile(rexp, u'i' in flags) 502 if rexpc.search(s) is None: 503 return False 504 else: 505 return True 506 507 def match(self, ctxt, s, rexp, flags=u''): 508 cdef list result_list 509 flags = self._make_string(flags) 510 s = self._make_string(s) 511 rexpc = self._compile(rexp, u'i' in flags) 512 if u'g' in flags: 513 results = rexpc.findall(s) 514 if not results: 515 return () 516 else: 517 result = rexpc.search(s) 518 if not result: 519 return () 520 results = [ result.group() ] 521 results.extend( result.groups(u'') ) 522 result_list = [] 523 root = Element(u'matches') 524 join_groups = u''.join 525 for s_match in results: 526 if python.PyTuple_CheckExact(s_match): 527 s_match = join_groups(s_match) 528 elem = SubElement(root, u'match') 529 elem.text = s_match 530 result_list.append(elem) 531 return result_list 532 533 def replace(self, ctxt, s, rexp, flags, replacement): 534 replacement = self._make_string(replacement) 535 flags = self._make_string(flags) 536 s = self._make_string(s) 537 rexpc = self._compile(rexp, u'i' in flags) 538 if u'g' in flags: 539 count = 0 540 else: 541 count = 1 542 return rexpc.sub(replacement, s, count) 543 544 cdef _register_in_context(self, _BaseContext context): 545 ns = b"http://exslt.org/regular-expressions" 546 context._addLocalExtensionFunction(ns, b"test", self.test) 547 context._addLocalExtensionFunction(ns, b"match", self.match) 548 context._addLocalExtensionFunction(ns, b"replace", self.replace) 549 550 551 ################################################################################ 552 # helper functions 553 554 cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc, 555 _BaseContext context) except NULL: 556 cdef xpath.xmlNodeSet* resultSet 557 cdef _Element fake_node = None 558 cdef xmlNode* c_node 559 560 if isinstance(obj, unicode): 561 obj = _utf8(obj) 562 if isinstance(obj, bytes): 563 # libxml2 copies the string value 564 return xpath.xmlXPathNewCString(_cstr(obj)) 565 if isinstance(obj, bool): 566 return xpath.xmlXPathNewBoolean(obj) 567 if python.PyNumber_Check(obj): 568 return xpath.xmlXPathNewFloat(obj) 569 if obj is None: 570 resultSet = xpath.xmlXPathNodeSetCreate(NULL) 571 elif isinstance(obj, _Element): 572 resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node) 573 elif python.PySequence_Check(obj): 574 resultSet = xpath.xmlXPathNodeSetCreate(NULL) 575 try: 576 for value in obj: 577 if isinstance(value, _Element): 578 if context is not None: 579 context._hold(value) 580 xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node) 581 else: 582 if context is None or doc is None: 583 raise XPathResultError, \ 584 f"Non-Element values not supported at this point - got {value!r}" 585 # support strings by appending text nodes to an Element 586 if isinstance(value, unicode): 587 value = _utf8(value) 588 if isinstance(value, bytes): 589 if fake_node is None: 590 fake_node = _makeElement("text-root", NULL, doc, None, 591 None, None, None, None, None) 592 context._hold(fake_node) 593 else: 594 # append a comment node to keep the text nodes separate 595 c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"") 596 if c_node is NULL: 597 raise MemoryError() 598 tree.xmlAddChild(fake_node._c_node, c_node) 599 context._hold(value) 600 c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value)) 601 if c_node is NULL: 602 raise MemoryError() 603 tree.xmlAddChild(fake_node._c_node, c_node) 604 xpath.xmlXPathNodeSetAdd(resultSet, c_node) 605 else: 606 raise XPathResultError, \ 607 f"This is not a supported node-set result: {value!r}" 608 except: 609 xpath.xmlXPathFreeNodeSet(resultSet) 610 raise 611 else: 612 raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}" 613 return xpath.xmlXPathWrapNodeSet(resultSet) 614 615 cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, 616 _Document doc, _BaseContext context): 617 if xpathObj.type == xpath.XPATH_UNDEFINED: 618 raise XPathResultError, u"Undefined xpath result" 619 elif xpathObj.type == xpath.XPATH_NODESET: 620 return _createNodeSetResult(xpathObj, doc, context) 621 elif xpathObj.type == xpath.XPATH_BOOLEAN: 622 return xpathObj.boolval 623 elif xpathObj.type == xpath.XPATH_NUMBER: 624 return xpathObj.floatval 625 elif xpathObj.type == xpath.XPATH_STRING: 626 stringval = funicode(xpathObj.stringval) 627 if context._build_smart_strings: 628 stringval = _elementStringResultFactory( 629 stringval, None, None, 0) 630 return stringval 631 elif xpathObj.type == xpath.XPATH_POINT: 632 raise NotImplementedError, u"XPATH_POINT" 633 elif xpathObj.type == xpath.XPATH_RANGE: 634 raise NotImplementedError, u"XPATH_RANGE" 635 elif xpathObj.type == xpath.XPATH_LOCATIONSET: 636 raise NotImplementedError, u"XPATH_LOCATIONSET" 637 elif xpathObj.type == xpath.XPATH_USERS: 638 raise NotImplementedError, u"XPATH_USERS" 639 elif xpathObj.type == xpath.XPATH_XSLT_TREE: 640 return _createNodeSetResult(xpathObj, doc, context) 641 else: 642 raise XPathResultError, f"Unknown xpath result {xpathObj.type}" 643 644 cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc, 645 _BaseContext context): 646 cdef xmlNode* c_node 647 cdef int i 648 cdef list result 649 result = [] 650 if xpathObj.nodesetval is NULL: 651 return result 652 for i in range(xpathObj.nodesetval.nodeNr): 653 c_node = xpathObj.nodesetval.nodeTab[i] 654 _unpackNodeSetEntry(result, c_node, doc, context, 655 xpathObj.type == xpath.XPATH_XSLT_TREE) 656 return result 657 658 cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc, 659 _BaseContext context, bint is_fragment): 660 cdef xmlNode* c_child 661 if _isElement(c_node): 662 if c_node.doc != doc._c_doc and c_node.doc._private is NULL: 663 # XXX: works, but maybe not always the right thing to do? 664 # XPath: only runs when extensions create or copy trees 665 # -> we store Python refs to these, so that is OK 666 # XSLT: can it leak when merging trees from multiple sources? 667 c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) 668 # FIXME: call _instantiateElementFromXPath() instead? 669 results.append( 670 _fakeDocElementFactory(doc, c_node)) 671 elif c_node.type == tree.XML_TEXT_NODE or \ 672 c_node.type == tree.XML_CDATA_SECTION_NODE or \ 673 c_node.type == tree.XML_ATTRIBUTE_NODE: 674 results.append( 675 _buildElementStringResult(doc, c_node, context)) 676 elif c_node.type == tree.XML_NAMESPACE_DECL: 677 results.append( (funicodeOrNone((<xmlNs*>c_node).prefix), 678 funicodeOrNone((<xmlNs*>c_node).href)) ) 679 elif c_node.type == tree.XML_DOCUMENT_NODE or \ 680 c_node.type == tree.XML_HTML_DOCUMENT_NODE: 681 # ignored for everything but result tree fragments 682 if is_fragment: 683 c_child = c_node.children 684 while c_child is not NULL: 685 _unpackNodeSetEntry(results, c_child, doc, context, 0) 686 c_child = c_child.next 687 elif c_node.type == tree.XML_XINCLUDE_START or \ 688 c_node.type == tree.XML_XINCLUDE_END: 689 pass 690 else: 691 raise NotImplementedError, \ 692 f"Not yet implemented result node type: {c_node.type}" 693 694 cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj): 695 u"""Free the XPath object, but *never* free the *content* of node sets. 696 Python dealloc will do that for us. 697 """ 698 if xpathObj.nodesetval is not NULL: 699 xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval) 700 xpathObj.nodesetval = NULL 701 xpath.xmlXPathFreeObject(xpathObj) 702 703 cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc, 704 _BaseContext context): 705 # NOTE: this may copy the element - only call this when it can't leak 706 if c_node.doc != doc._c_doc and c_node.doc._private is NULL: 707 # not from the context document and not from a fake document 708 # either => may still be from a known document, e.g. one 709 # created by an extension function 710 node_doc = context._findDocumentForNode(c_node) 711 if node_doc is None: 712 # not from a known document at all! => can only make a 713 # safety copy here 714 c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) 715 else: 716 doc = node_doc 717 return _fakeDocElementFactory(doc, c_node) 718 719 ################################################################################ 720 # special str/unicode subclasses 721 722 @cython.final 723 cdef class _ElementUnicodeResult(unicode): 724 cdef _Element _parent 725 cdef readonly object attrname 726 cdef readonly bint is_tail 727 cdef readonly bint is_text 728 cdef readonly bint is_attribute 729 730 def getparent(self): 731 return self._parent 732 733 cdef object _PyElementUnicodeResult 734 if python.IS_PYPY: 735 class _PyElementUnicodeResult(unicode): 736 # we need to use a Python class here, or PyPy will crash on creation 737 # https://bitbucket.org/pypy/pypy/issues/2021/pypy3-pytype_ready-crashes-for-extension 738 def getparent(self): 739 return self._parent 740 741 class _ElementStringResult(bytes): 742 # we need to use a Python class here, bytes cannot be C-subclassed 743 # in Pyrex/Cython 744 def getparent(self): 745 return self._parent 746 747 cdef object _elementStringResultFactory(string_value, _Element parent, 748 attrname, bint is_tail): 749 cdef _ElementUnicodeResult uresult 750 cdef bint is_text 751 cdef bint is_attribute = attrname is not None 752 if parent is None: 753 is_text = 0 754 else: 755 is_text = not (is_tail or is_attribute) 756 757 if type(string_value) is bytes: 758 result = _ElementStringResult(string_value) 759 result._parent = parent 760 result.is_attribute = is_attribute 761 result.is_tail = is_tail 762 result.is_text = is_text 763 result.attrname = attrname 764 return result 765 elif python.IS_PYPY: 766 result = _PyElementUnicodeResult(string_value) 767 result._parent = parent 768 result.is_attribute = is_attribute 769 result.is_tail = is_tail 770 result.is_text = is_text 771 result.attrname = attrname 772 return result 773 else: 774 uresult = _ElementUnicodeResult(string_value) 775 uresult._parent = parent 776 uresult.is_attribute = is_attribute 777 uresult.is_tail = is_tail 778 uresult.is_text = is_text 779 uresult.attrname = attrname 780 return uresult 781 782 cdef object _buildElementStringResult(_Document doc, xmlNode* c_node, 783 _BaseContext context): 784 cdef _Element parent = None 785 cdef object attrname = None 786 cdef xmlNode* c_element 787 cdef bint is_tail 788 789 if c_node.type == tree.XML_ATTRIBUTE_NODE: 790 attrname = _namespacedName(c_node) 791 is_tail = 0 792 s = tree.xmlNodeGetContent(c_node) 793 try: 794 value = funicode(s) 795 finally: 796 tree.xmlFree(s) 797 c_element = NULL 798 else: 799 #assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type" 800 # may be tail text or normal text 801 value = funicode(c_node.content) 802 c_element = _previousElement(c_node) 803 is_tail = c_element is not NULL 804 805 if not context._build_smart_strings: 806 return value 807 808 if c_element is NULL: 809 # non-tail text or attribute text 810 c_element = c_node.parent 811 while c_element is not NULL and not _isElement(c_element): 812 c_element = c_element.parent 813 814 if c_element is not NULL: 815 parent = _instantiateElementFromXPath(c_element, doc, context) 816 817 return _elementStringResultFactory( 818 value, parent, attrname, is_tail) 819 820 ################################################################################ 821 # callbacks for XPath/XSLT extension functions 822 823 cdef void _extension_function_call(_BaseContext context, function, 824 xpath.xmlXPathParserContext* ctxt, int nargs): 825 cdef _Document doc 826 cdef xpath.xmlXPathObject* obj 827 cdef list args 828 cdef int i 829 doc = context._doc 830 try: 831 args = [] 832 for i in range(nargs): 833 obj = xpath.valuePop(ctxt) 834 o = _unwrapXPathObject(obj, doc, context) 835 _freeXPathObject(obj) 836 args.append(o) 837 args.reverse() 838 839 res = function(context, *args) 840 # wrap result for XPath consumption 841 obj = _wrapXPathObject(res, doc, context) 842 # prevent Python from deallocating elements handed to libxml2 843 context._hold(res) 844 xpath.valuePush(ctxt, obj) 845 except: 846 xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) 847 context._exc._store_raised() 848 finally: 849 return # swallow any further exceptions 850 851 # lookup the function by name and call it 852 853 cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, 854 int nargs) with gil: 855 cdef _BaseContext context 856 cdef xpath.xmlXPathContext* rctxt = ctxt.context 857 context = <_BaseContext> rctxt.userData 858 try: 859 function = context._find_cached_function(rctxt.functionURI, rctxt.function) 860 if function is not None: 861 _extension_function_call(context, function, ctxt, nargs) 862 else: 863 xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) 864 context._exc._store_exception(XPathFunctionError( 865 f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found")) 866 except: 867 # may not be the right error, but we need to tell libxml2 *something* 868 xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) 869 context._exc._store_raised() 870 finally: 871 return # swallow any further exceptions