readonlytree.pxi
1 # read-only tree implementation 2 3 @cython.internal 4 cdef class _ReadOnlyProxy: 5 u"A read-only proxy class suitable for PIs/Comments (for internal use only!)." 6 cdef bint _free_after_use 7 cdef xmlNode* _c_node 8 cdef _ReadOnlyProxy _source_proxy 9 cdef list _dependent_proxies 10 def __cinit__(self): 11 self._c_node = NULL 12 self._free_after_use = 0 13 14 cdef int _assertNode(self) except -1: 15 u"""This is our way of saying: this proxy is invalid! 16 """ 17 if not self._c_node: 18 raise ReferenceError("Proxy invalidated!") 19 return 0 20 21 cdef int _raise_unsupported_type(self) except -1: 22 raise TypeError(f"Unsupported node type: {self._c_node.type}") 23 24 cdef void free_after_use(self): 25 u"""Should the xmlNode* be freed when releasing the proxy? 26 """ 27 self._free_after_use = 1 28 29 @property 30 def tag(self): 31 """Element tag 32 """ 33 self._assertNode() 34 if self._c_node.type == tree.XML_ELEMENT_NODE: 35 return _namespacedName(self._c_node) 36 elif self._c_node.type == tree.XML_PI_NODE: 37 return ProcessingInstruction 38 elif self._c_node.type == tree.XML_COMMENT_NODE: 39 return Comment 40 elif self._c_node.type == tree.XML_ENTITY_REF_NODE: 41 return Entity 42 else: 43 self._raise_unsupported_type() 44 45 @property 46 def text(self): 47 """Text before the first subelement. This is either a string or 48 the value None, if there was no text. 49 """ 50 self._assertNode() 51 if self._c_node.type == tree.XML_ELEMENT_NODE: 52 return _collectText(self._c_node.children) 53 elif self._c_node.type in (tree.XML_PI_NODE, 54 tree.XML_COMMENT_NODE): 55 if self._c_node.content is NULL: 56 return '' 57 else: 58 return funicode(self._c_node.content) 59 elif self._c_node.type == tree.XML_ENTITY_REF_NODE: 60 return f'&{funicode(self._c_node.name)};' 61 else: 62 self._raise_unsupported_type() 63 64 @property 65 def tail(self): 66 """Text after this element's end tag, but before the next sibling 67 element's start tag. This is either a string or the value None, if 68 there was no text. 69 """ 70 self._assertNode() 71 return _collectText(self._c_node.next) 72 73 @property 74 def sourceline(self): 75 """Original line number as found by the parser or None if unknown. 76 """ 77 cdef long line 78 self._assertNode() 79 line = tree.xmlGetLineNo(self._c_node) 80 if line > 0: 81 return line 82 else: 83 return None 84 85 def __repr__(self): 86 self._assertNode() 87 if self._c_node.type == tree.XML_ELEMENT_NODE: 88 return "<Element %s at 0x%x>" % (strrepr(self.tag), id(self)) 89 elif self._c_node.type == tree.XML_COMMENT_NODE: 90 return "<!--%s-->" % strrepr(self.text) 91 elif self._c_node.type == tree.XML_ENTITY_NODE: 92 return "&%s;" % strrepr(funicode(self._c_node.name)) 93 elif self._c_node.type == tree.XML_PI_NODE: 94 text = self.text 95 if text: 96 return "<?%s %s?>" % (strrepr(self.target), text) 97 else: 98 return "<?%s?>" % strrepr(self.target) 99 else: 100 self._raise_unsupported_type() 101 102 def __getitem__(self, x): 103 u"""Returns the subelement at the given position or the requested 104 slice. 105 """ 106 cdef xmlNode* c_node = NULL 107 cdef Py_ssize_t step = 0, slicelength = 0 108 cdef Py_ssize_t c, i 109 cdef _node_to_node_function next_element 110 cdef list result 111 self._assertNode() 112 if isinstance(x, slice): 113 # slicing 114 if _isFullSlice(<slice>x): 115 return _collectChildren(self) 116 _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength) 117 if c_node is NULL: 118 return [] 119 if step > 0: 120 next_element = _nextElement 121 else: 122 step = -step 123 next_element = _previousElement 124 result = [] 125 c = 0 126 while c_node is not NULL and c < slicelength: 127 result.append(_newReadOnlyProxy(self._source_proxy, c_node)) 128 result.append(_elementFactory(self._doc, c_node)) 129 c = c + 1 130 for i from 0 <= i < step: 131 c_node = next_element(c_node) 132 return result 133 else: 134 # indexing 135 c_node = _findChild(self._c_node, x) 136 if c_node is NULL: 137 raise IndexError, u"list index out of range" 138 return _newReadOnlyProxy(self._source_proxy, c_node) 139 140 def __len__(self): 141 u"""Returns the number of subelements. 142 """ 143 cdef Py_ssize_t c 144 cdef xmlNode* c_node 145 self._assertNode() 146 c = 0 147 c_node = self._c_node.children 148 while c_node is not NULL: 149 if tree._isElement(c_node): 150 c = c + 1 151 c_node = c_node.next 152 return c 153 154 def __nonzero__(self): 155 cdef xmlNode* c_node 156 self._assertNode() 157 c_node = _findChildBackwards(self._c_node, 0) 158 return c_node != NULL 159 160 def __deepcopy__(self, memo): 161 u"__deepcopy__(self, memo)" 162 return self.__copy__() 163 164 cpdef __copy__(self): 165 u"__copy__(self)" 166 cdef xmlDoc* c_doc 167 cdef xmlNode* c_node 168 cdef _Document new_doc 169 if self._c_node is NULL: 170 return self 171 c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive 172 new_doc = _documentFactory(c_doc, None) 173 root = new_doc.getroot() 174 if root is not None: 175 return root 176 # Comment/PI 177 c_node = c_doc.children 178 while c_node is not NULL and c_node.type != self._c_node.type: 179 c_node = c_node.next 180 if c_node is NULL: 181 return None 182 return _elementFactory(new_doc, c_node) 183 184 def __iter__(self): 185 return iter(self.getchildren()) 186 187 def iterchildren(self, tag=None, *, reversed=False): 188 u"""iterchildren(self, tag=None, reversed=False) 189 190 Iterate over the children of this element. 191 """ 192 children = self.getchildren() 193 if tag is not None and tag != '*': 194 children = [ el for el in children if el.tag == tag ] 195 if reversed: 196 children = children[::-1] 197 return iter(children) 198 199 cpdef getchildren(self): 200 u"""Returns all subelements. The elements are returned in document 201 order. 202 """ 203 cdef xmlNode* c_node 204 cdef list result 205 self._assertNode() 206 result = [] 207 c_node = self._c_node.children 208 while c_node is not NULL: 209 if tree._isElement(c_node): 210 result.append(_newReadOnlyProxy(self._source_proxy, c_node)) 211 c_node = c_node.next 212 return result 213 214 def getparent(self): 215 u"""Returns the parent of this element or None for the root element. 216 """ 217 cdef xmlNode* c_parent 218 self._assertNode() 219 c_parent = self._c_node.parent 220 if c_parent is NULL or not tree._isElement(c_parent): 221 return None 222 else: 223 return _newReadOnlyProxy(self._source_proxy, c_parent) 224 225 def getnext(self): 226 u"""Returns the following sibling of this element or None. 227 """ 228 cdef xmlNode* c_node 229 self._assertNode() 230 c_node = _nextElement(self._c_node) 231 if c_node is not NULL: 232 return _newReadOnlyProxy(self._source_proxy, c_node) 233 return None 234 235 def getprevious(self): 236 u"""Returns the preceding sibling of this element or None. 237 """ 238 cdef xmlNode* c_node 239 self._assertNode() 240 c_node = _previousElement(self._c_node) 241 if c_node is not NULL: 242 return _newReadOnlyProxy(self._source_proxy, c_node) 243 return None 244 245 246 @cython.final 247 @cython.internal 248 cdef class _ReadOnlyPIProxy(_ReadOnlyProxy): 249 """A read-only proxy for processing instructions (for internal use only!)""" 250 @property 251 def target(self): 252 self._assertNode() 253 return funicode(self._c_node.name) 254 255 @cython.final 256 @cython.internal 257 cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy): 258 """A read-only proxy for entity references (for internal use only!)""" 259 property name: 260 def __get__(self): 261 return funicode(self._c_node.name) 262 263 def __set__(self, value): 264 value_utf = _utf8(value) 265 if u'&' in value or u';' in value: 266 raise ValueError(f"Invalid entity name '{value}'") 267 tree.xmlNodeSetName(self._c_node, _xcstr(value_utf)) 268 269 @property 270 def text(self): 271 return f'&{funicode(self._c_node.name)};' 272 273 274 @cython.internal 275 cdef class _ReadOnlyElementProxy(_ReadOnlyProxy): 276 """The main read-only Element proxy class (for internal use only!).""" 277 278 @property 279 def attrib(self): 280 self._assertNode() 281 return dict(_collectAttributes(self._c_node, 3)) 282 283 @property 284 def prefix(self): 285 """Namespace prefix or None. 286 """ 287 self._assertNode() 288 if self._c_node.ns is not NULL: 289 if self._c_node.ns.prefix is not NULL: 290 return funicode(self._c_node.ns.prefix) 291 return None 292 293 @property 294 def nsmap(self): 295 """Namespace prefix->URI mapping known in the context of this 296 Element. This includes all namespace declarations of the 297 parents. 298 299 Note that changing the returned dict has no effect on the Element. 300 """ 301 self._assertNode() 302 return _build_nsmap(self._c_node) 303 304 def get(self, key, default=None): 305 u"""Gets an element attribute. 306 """ 307 self._assertNode() 308 return _getNodeAttributeValue(self._c_node, key, default) 309 310 def keys(self): 311 u"""Gets a list of attribute names. The names are returned in an 312 arbitrary order (just like for an ordinary Python dictionary). 313 """ 314 self._assertNode() 315 return _collectAttributes(self._c_node, 1) 316 317 def values(self): 318 u"""Gets element attributes, as a sequence. The attributes are returned 319 in an arbitrary order. 320 """ 321 self._assertNode() 322 return _collectAttributes(self._c_node, 2) 323 324 def items(self): 325 u"""Gets element attributes, as a sequence. The attributes are returned 326 in an arbitrary order. 327 """ 328 self._assertNode() 329 return _collectAttributes(self._c_node, 3) 330 331 cdef _ReadOnlyProxy _newReadOnlyProxy( 332 _ReadOnlyProxy source_proxy, xmlNode* c_node): 333 cdef _ReadOnlyProxy el 334 if c_node.type == tree.XML_ELEMENT_NODE: 335 el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy) 336 elif c_node.type == tree.XML_PI_NODE: 337 el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy) 338 elif c_node.type in (tree.XML_COMMENT_NODE, 339 tree.XML_ENTITY_REF_NODE): 340 el = _ReadOnlyProxy.__new__(_ReadOnlyProxy) 341 else: 342 raise TypeError(f"Unsupported element type: {c_node.type}") 343 el._c_node = c_node 344 _initReadOnlyProxy(el, source_proxy) 345 return el 346 347 cdef inline _initReadOnlyProxy(_ReadOnlyProxy el, 348 _ReadOnlyProxy source_proxy): 349 if source_proxy is None: 350 el._source_proxy = el 351 el._dependent_proxies = [el] 352 else: 353 el._source_proxy = source_proxy 354 source_proxy._dependent_proxies.append(el) 355 356 cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy): 357 cdef xmlNode* c_node 358 cdef _ReadOnlyProxy el 359 if sourceProxy is None: 360 return 361 if sourceProxy._dependent_proxies is None: 362 return 363 for el in sourceProxy._dependent_proxies: 364 c_node = el._c_node 365 el._c_node = NULL 366 if el._free_after_use: 367 tree.xmlFreeNode(c_node) 368 del sourceProxy._dependent_proxies[:] 369 370 # opaque wrapper around non-element nodes, e.g. the document node 371 # 372 # This class does not imply any restrictions on modifiability or 373 # read-only status of the node, so use with caution. 374 375 @cython.internal 376 cdef class _OpaqueNodeWrapper: 377 cdef tree.xmlNode* _c_node 378 def __init__(self): 379 raise TypeError, u"This type cannot be instantiated from Python" 380 381 @cython.final 382 @cython.internal 383 cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper): 384 cdef int _assertNode(self) except -1: 385 u"""This is our way of saying: this proxy is invalid! 386 """ 387 assert self._c_node is not NULL, u"Proxy invalidated!" 388 return 0 389 390 cpdef append(self, other_element): 391 u"""Append a copy of an Element to the list of children. 392 """ 393 cdef xmlNode* c_next 394 cdef xmlNode* c_node 395 self._assertNode() 396 c_node = _roNodeOf(other_element) 397 if c_node.type == tree.XML_ELEMENT_NODE: 398 if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL: 399 raise ValueError, u"cannot append, document already has a root element" 400 elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE): 401 raise TypeError, f"unsupported element type for top-level node: {c_node.type}" 402 c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node) 403 c_next = c_node.next 404 tree.xmlAddChild(self._c_node, c_node) 405 _moveTail(c_next, c_node) 406 407 def extend(self, elements): 408 u"""Append a copy of all Elements from a sequence to the list of 409 children. 410 """ 411 self._assertNode() 412 for element in elements: 413 self.append(element) 414 415 cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node): 416 cdef _OpaqueNodeWrapper node 417 if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE): 418 node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper) 419 else: 420 node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper) 421 node._c_node = c_node 422 return node 423 424 # element proxies that allow restricted modification 425 426 @cython.internal 427 cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy): 428 u"""A read-only proxy that allows changing the text content. 429 """ 430 property text: 431 def __get__(self): 432 self._assertNode() 433 if self._c_node.content is NULL: 434 return '' 435 else: 436 return funicode(self._c_node.content) 437 438 def __set__(self, value): 439 cdef tree.xmlDict* c_dict 440 self._assertNode() 441 if value is None: 442 c_text = <const_xmlChar*>NULL 443 else: 444 value = _utf8(value) 445 c_text = _xcstr(value) 446 tree.xmlNodeSetContent(self._c_node, c_text) 447 448 @cython.final 449 @cython.internal 450 cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy): 451 """A read-only proxy that allows changing the text/target content of a 452 processing instruction. 453 """ 454 property target: 455 def __get__(self): 456 self._assertNode() 457 return funicode(self._c_node.name) 458 459 def __set__(self, value): 460 self._assertNode() 461 value = _utf8(value) 462 c_text = _xcstr(value) 463 tree.xmlNodeSetName(self._c_node, c_text) 464 465 @cython.final 466 @cython.internal 467 cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy): 468 "A read-only proxy for entity references (for internal use only!)" 469 property name: 470 def __get__(self): 471 return funicode(self._c_node.name) 472 473 def __set__(self, value): 474 value = _utf8(value) 475 assert u'&' not in value and u';' not in value, \ 476 f"Invalid entity name '{value}'" 477 c_text = _xcstr(value) 478 tree.xmlNodeSetName(self._c_node, c_text) 479 480 481 @cython.final 482 @cython.internal 483 cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy): 484 u"""A read-only element that allows adding children and changing the 485 text content (i.e. everything that adds to the subtree). 486 """ 487 cpdef append(self, other_element): 488 u"""Append a copy of an Element to the list of children. 489 """ 490 cdef xmlNode* c_next 491 cdef xmlNode* c_node 492 self._assertNode() 493 c_node = _roNodeOf(other_element) 494 c_node = _copyNodeToDoc(c_node, self._c_node.doc) 495 c_next = c_node.next 496 tree.xmlAddChild(self._c_node, c_node) 497 _moveTail(c_next, c_node) 498 499 def extend(self, elements): 500 u"""Append a copy of all Elements from a sequence to the list of 501 children. 502 """ 503 self._assertNode() 504 for element in elements: 505 self.append(element) 506 507 property text: 508 """Text before the first subelement. This is either a string or the 509 value None, if there was no text. 510 """ 511 def __get__(self): 512 self._assertNode() 513 return _collectText(self._c_node.children) 514 515 def __set__(self, value): 516 self._assertNode() 517 if isinstance(value, QName): 518 value = _resolveQNameText(self, value).decode('utf8') 519 _setNodeText(self._c_node, value) 520 521 522 cdef _ReadOnlyProxy _newAppendOnlyProxy( 523 _ReadOnlyProxy source_proxy, xmlNode* c_node): 524 cdef _ReadOnlyProxy el 525 if c_node.type == tree.XML_ELEMENT_NODE: 526 el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy) 527 elif c_node.type == tree.XML_PI_NODE: 528 el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy) 529 elif c_node.type == tree.XML_COMMENT_NODE: 530 el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy) 531 else: 532 raise TypeError(f"Unsupported element type: {c_node.type}") 533 el._c_node = c_node 534 _initReadOnlyProxy(el, source_proxy) 535 return el 536 537 cdef xmlNode* _roNodeOf(element) except NULL: 538 cdef xmlNode* c_node 539 if isinstance(element, _Element): 540 c_node = (<_Element>element)._c_node 541 elif isinstance(element, _ReadOnlyProxy): 542 c_node = (<_ReadOnlyProxy>element)._c_node 543 elif isinstance(element, _OpaqueNodeWrapper): 544 c_node = (<_OpaqueNodeWrapper>element)._c_node 545 else: 546 raise TypeError, f"invalid argument type {type(element)}" 547 548 if c_node is NULL: 549 raise TypeError, u"invalid element" 550 return c_node 551 552 cdef xmlNode* _nonRoNodeOf(element) except NULL: 553 cdef xmlNode* c_node 554 if isinstance(element, _Element): 555 c_node = (<_Element>element)._c_node 556 elif isinstance(element, _AppendOnlyElementProxy): 557 c_node = (<_AppendOnlyElementProxy>element)._c_node 558 elif isinstance(element, _OpaqueNodeWrapper): 559 c_node = (<_OpaqueNodeWrapper>element)._c_node 560 else: 561 raise TypeError, f"invalid argument type {type(element)}" 562 563 if c_node is NULL: 564 raise TypeError, u"invalid element" 565 return c_node