/ lib / lxml / readonlytree.pxi
readonlytree.pxi
  1  # read-only tree implementation
  2  
  3  @cython.internal
  4  cdef class _ReadOnlyProxy:
  5      u"A read-only proxy class suitable for PIs/Comments (for internal use only!)."
  6      cdef bint _free_after_use
  7      cdef xmlNode* _c_node
  8      cdef _ReadOnlyProxy _source_proxy
  9      cdef list _dependent_proxies
 10      def __cinit__(self):
 11          self._c_node = NULL
 12          self._free_after_use = 0
 13  
 14      cdef int _assertNode(self) except -1:
 15          u"""This is our way of saying: this proxy is invalid!
 16          """
 17          if not self._c_node:
 18              raise ReferenceError("Proxy invalidated!")
 19          return 0
 20  
 21      cdef int _raise_unsupported_type(self) except -1:
 22          raise TypeError(f"Unsupported node type: {self._c_node.type}")
 23  
 24      cdef void free_after_use(self):
 25          u"""Should the xmlNode* be freed when releasing the proxy?
 26          """
 27          self._free_after_use = 1
 28  
 29      @property
 30      def tag(self):
 31          """Element tag
 32          """
 33          self._assertNode()
 34          if self._c_node.type == tree.XML_ELEMENT_NODE:
 35              return _namespacedName(self._c_node)
 36          elif self._c_node.type == tree.XML_PI_NODE:
 37              return ProcessingInstruction
 38          elif self._c_node.type == tree.XML_COMMENT_NODE:
 39              return Comment
 40          elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
 41              return Entity
 42          else:
 43              self._raise_unsupported_type()
 44  
 45      @property
 46      def text(self):
 47          """Text before the first subelement. This is either a string or
 48          the value None, if there was no text.
 49          """
 50          self._assertNode()
 51          if self._c_node.type == tree.XML_ELEMENT_NODE:
 52              return _collectText(self._c_node.children)
 53          elif self._c_node.type in (tree.XML_PI_NODE,
 54                                     tree.XML_COMMENT_NODE):
 55              if self._c_node.content is NULL:
 56                  return ''
 57              else:
 58                  return funicode(self._c_node.content)
 59          elif self._c_node.type == tree.XML_ENTITY_REF_NODE:
 60              return f'&{funicode(self._c_node.name)};'
 61          else:
 62              self._raise_unsupported_type()
 63          
 64      @property
 65      def tail(self):
 66          """Text after this element's end tag, but before the next sibling
 67          element's start tag. This is either a string or the value None, if
 68          there was no text.
 69          """
 70          self._assertNode()
 71          return _collectText(self._c_node.next)
 72  
 73      @property
 74      def sourceline(self):
 75          """Original line number as found by the parser or None if unknown.
 76          """
 77          cdef long line
 78          self._assertNode()
 79          line = tree.xmlGetLineNo(self._c_node)
 80          if line > 0:
 81              return line
 82          else:
 83              return None
 84  
 85      def __repr__(self):
 86          self._assertNode()
 87          if self._c_node.type == tree.XML_ELEMENT_NODE:
 88              return "<Element %s at 0x%x>" % (strrepr(self.tag), id(self))
 89          elif self._c_node.type == tree.XML_COMMENT_NODE:
 90              return "<!--%s-->" % strrepr(self.text)
 91          elif self._c_node.type == tree.XML_ENTITY_NODE:
 92              return "&%s;" % strrepr(funicode(self._c_node.name))
 93          elif self._c_node.type == tree.XML_PI_NODE:
 94              text = self.text
 95              if text:
 96                  return "<?%s %s?>" % (strrepr(self.target), text)
 97              else:
 98                  return "<?%s?>" % strrepr(self.target)
 99          else:
100              self._raise_unsupported_type()
101  
102      def __getitem__(self, x):
103          u"""Returns the subelement at the given position or the requested
104          slice.
105          """
106          cdef xmlNode* c_node = NULL
107          cdef Py_ssize_t step = 0, slicelength = 0
108          cdef Py_ssize_t c, i
109          cdef _node_to_node_function next_element
110          cdef list result
111          self._assertNode()
112          if isinstance(x, slice):
113              # slicing
114              if _isFullSlice(<slice>x):
115                  return _collectChildren(self)
116              _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
117              if c_node is NULL:
118                  return []
119              if step > 0:
120                  next_element = _nextElement
121              else:
122                  step = -step
123                  next_element = _previousElement
124              result = []
125              c = 0
126              while c_node is not NULL and c < slicelength:
127                  result.append(_newReadOnlyProxy(self._source_proxy, c_node))
128                  result.append(_elementFactory(self._doc, c_node))
129                  c = c + 1
130                  for i from 0 <= i < step:
131                      c_node = next_element(c_node)
132              return result
133          else:
134              # indexing
135              c_node = _findChild(self._c_node, x)
136              if c_node is NULL:
137                  raise IndexError, u"list index out of range"
138              return _newReadOnlyProxy(self._source_proxy, c_node)
139  
140      def __len__(self):
141          u"""Returns the number of subelements.
142          """
143          cdef Py_ssize_t c
144          cdef xmlNode* c_node
145          self._assertNode()
146          c = 0
147          c_node = self._c_node.children
148          while c_node is not NULL:
149              if tree._isElement(c_node):
150                  c = c + 1
151              c_node = c_node.next
152          return c
153  
154      def __nonzero__(self):
155          cdef xmlNode* c_node
156          self._assertNode()
157          c_node = _findChildBackwards(self._c_node, 0)
158          return c_node != NULL
159  
160      def __deepcopy__(self, memo):
161          u"__deepcopy__(self, memo)"
162          return self.__copy__()
163          
164      cpdef __copy__(self):
165          u"__copy__(self)"
166          cdef xmlDoc* c_doc
167          cdef xmlNode* c_node
168          cdef _Document new_doc
169          if self._c_node is NULL:
170              return self
171          c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive
172          new_doc = _documentFactory(c_doc, None)
173          root = new_doc.getroot()
174          if root is not None:
175              return root
176          # Comment/PI
177          c_node = c_doc.children
178          while c_node is not NULL and c_node.type != self._c_node.type:
179              c_node = c_node.next
180          if c_node is NULL:
181              return None
182          return _elementFactory(new_doc, c_node)
183  
184      def __iter__(self):
185          return iter(self.getchildren())
186  
187      def iterchildren(self, tag=None, *, reversed=False):
188          u"""iterchildren(self, tag=None, reversed=False)
189  
190          Iterate over the children of this element.
191          """
192          children = self.getchildren()
193          if tag is not None and tag != '*':
194              children = [ el for el in children if el.tag == tag ]
195          if reversed:
196              children = children[::-1]
197          return iter(children)
198  
199      cpdef getchildren(self):
200          u"""Returns all subelements. The elements are returned in document
201          order.
202          """
203          cdef xmlNode* c_node
204          cdef list result
205          self._assertNode()
206          result = []
207          c_node = self._c_node.children
208          while c_node is not NULL:
209              if tree._isElement(c_node):
210                  result.append(_newReadOnlyProxy(self._source_proxy, c_node))
211              c_node = c_node.next
212          return result
213  
214      def getparent(self):
215          u"""Returns the parent of this element or None for the root element.
216          """
217          cdef xmlNode* c_parent
218          self._assertNode()
219          c_parent = self._c_node.parent
220          if c_parent is NULL or not tree._isElement(c_parent):
221              return None
222          else:
223              return _newReadOnlyProxy(self._source_proxy, c_parent)
224  
225      def getnext(self):
226          u"""Returns the following sibling of this element or None.
227          """
228          cdef xmlNode* c_node
229          self._assertNode()
230          c_node = _nextElement(self._c_node)
231          if c_node is not NULL:
232              return _newReadOnlyProxy(self._source_proxy, c_node)
233          return None
234  
235      def getprevious(self):
236          u"""Returns the preceding sibling of this element or None.
237          """
238          cdef xmlNode* c_node
239          self._assertNode()
240          c_node = _previousElement(self._c_node)
241          if c_node is not NULL:
242              return _newReadOnlyProxy(self._source_proxy, c_node)
243          return None
244  
245  
246  @cython.final
247  @cython.internal
248  cdef class _ReadOnlyPIProxy(_ReadOnlyProxy):
249      """A read-only proxy for processing instructions (for internal use only!)"""
250      @property
251      def target(self):
252          self._assertNode()
253          return funicode(self._c_node.name)
254  
255  @cython.final
256  @cython.internal
257  cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy):
258      """A read-only proxy for entity references (for internal use only!)"""
259      property name:
260          def __get__(self):
261              return funicode(self._c_node.name)
262  
263          def __set__(self, value):
264              value_utf = _utf8(value)
265              if u'&' in value or u';' in value:
266                  raise ValueError(f"Invalid entity name '{value}'")
267              tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
268  
269      @property
270      def text(self):
271          return f'&{funicode(self._c_node.name)};'
272  
273  
274  @cython.internal
275  cdef class _ReadOnlyElementProxy(_ReadOnlyProxy):
276      """The main read-only Element proxy class (for internal use only!)."""
277  
278      @property
279      def attrib(self):
280          self._assertNode()
281          return dict(_collectAttributes(self._c_node, 3))
282  
283      @property
284      def prefix(self):
285          """Namespace prefix or None.
286          """
287          self._assertNode()
288          if self._c_node.ns is not NULL:
289              if self._c_node.ns.prefix is not NULL:
290                  return funicode(self._c_node.ns.prefix)
291          return None
292  
293      @property
294      def nsmap(self):
295          """Namespace prefix->URI mapping known in the context of this
296          Element.  This includes all namespace declarations of the
297          parents.
298  
299          Note that changing the returned dict has no effect on the Element.
300          """
301          self._assertNode()
302          return _build_nsmap(self._c_node)
303  
304      def get(self, key, default=None):
305          u"""Gets an element attribute.
306          """
307          self._assertNode()
308          return _getNodeAttributeValue(self._c_node, key, default)
309  
310      def keys(self):
311          u"""Gets a list of attribute names. The names are returned in an
312          arbitrary order (just like for an ordinary Python dictionary).
313          """
314          self._assertNode()
315          return _collectAttributes(self._c_node, 1)
316  
317      def values(self):
318          u"""Gets element attributes, as a sequence. The attributes are returned
319          in an arbitrary order.
320          """
321          self._assertNode()
322          return _collectAttributes(self._c_node, 2)
323  
324      def items(self):
325          u"""Gets element attributes, as a sequence. The attributes are returned
326          in an arbitrary order.
327          """
328          self._assertNode()
329          return _collectAttributes(self._c_node, 3)
330  
331  cdef _ReadOnlyProxy _newReadOnlyProxy(
332      _ReadOnlyProxy source_proxy, xmlNode* c_node):
333      cdef _ReadOnlyProxy el
334      if c_node.type == tree.XML_ELEMENT_NODE:
335          el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy)
336      elif c_node.type == tree.XML_PI_NODE:
337          el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy)
338      elif c_node.type in (tree.XML_COMMENT_NODE,
339                           tree.XML_ENTITY_REF_NODE):
340          el = _ReadOnlyProxy.__new__(_ReadOnlyProxy)
341      else:
342          raise TypeError(f"Unsupported element type: {c_node.type}")
343      el._c_node = c_node
344      _initReadOnlyProxy(el, source_proxy)
345      return el
346  
347  cdef inline _initReadOnlyProxy(_ReadOnlyProxy el,
348                                 _ReadOnlyProxy source_proxy):
349      if source_proxy is None:
350          el._source_proxy = el
351          el._dependent_proxies = [el]
352      else:
353          el._source_proxy = source_proxy
354          source_proxy._dependent_proxies.append(el)
355  
356  cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy):
357      cdef xmlNode* c_node
358      cdef _ReadOnlyProxy el
359      if sourceProxy is None:
360          return
361      if sourceProxy._dependent_proxies is None:
362          return
363      for el in sourceProxy._dependent_proxies:
364          c_node = el._c_node
365          el._c_node = NULL
366          if el._free_after_use:
367              tree.xmlFreeNode(c_node)
368      del sourceProxy._dependent_proxies[:]
369  
370  # opaque wrapper around non-element nodes, e.g. the document node
371  #
372  # This class does not imply any restrictions on modifiability or
373  # read-only status of the node, so use with caution.
374  
375  @cython.internal
376  cdef class _OpaqueNodeWrapper:
377      cdef tree.xmlNode* _c_node
378      def __init__(self):
379          raise TypeError, u"This type cannot be instantiated from Python"
380  
381  @cython.final
382  @cython.internal
383  cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper):
384      cdef int _assertNode(self) except -1:
385          u"""This is our way of saying: this proxy is invalid!
386          """
387          assert self._c_node is not NULL, u"Proxy invalidated!"
388          return 0
389  
390      cpdef append(self, other_element):
391          u"""Append a copy of an Element to the list of children.
392          """
393          cdef xmlNode* c_next
394          cdef xmlNode* c_node
395          self._assertNode()
396          c_node = _roNodeOf(other_element)
397          if c_node.type == tree.XML_ELEMENT_NODE:
398              if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL:
399                  raise ValueError, u"cannot append, document already has a root element"
400          elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE):
401              raise TypeError, f"unsupported element type for top-level node: {c_node.type}"
402          c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node)
403          c_next = c_node.next
404          tree.xmlAddChild(self._c_node, c_node)
405          _moveTail(c_next, c_node)
406  
407      def extend(self, elements):
408          u"""Append a copy of all Elements from a sequence to the list of
409          children.
410          """
411          self._assertNode()
412          for element in elements:
413              self.append(element)
414  
415  cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node):
416      cdef _OpaqueNodeWrapper node
417      if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
418          node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper)
419      else:
420          node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper)
421      node._c_node = c_node
422      return node
423  
424  # element proxies that allow restricted modification
425  
426  @cython.internal
427  cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy):
428      u"""A read-only proxy that allows changing the text content.
429      """
430      property text:
431          def __get__(self):
432              self._assertNode()
433              if self._c_node.content is NULL:
434                  return ''
435              else:
436                  return funicode(self._c_node.content)
437  
438          def __set__(self, value):
439              cdef tree.xmlDict* c_dict
440              self._assertNode()
441              if value is None:
442                  c_text = <const_xmlChar*>NULL
443              else:
444                  value = _utf8(value)
445                  c_text = _xcstr(value)
446              tree.xmlNodeSetContent(self._c_node, c_text)
447  
448  @cython.final
449  @cython.internal
450  cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy):
451      """A read-only proxy that allows changing the text/target content of a
452      processing instruction.
453      """
454      property target:
455          def __get__(self):
456              self._assertNode()
457              return funicode(self._c_node.name)
458  
459          def __set__(self, value):
460              self._assertNode()
461              value = _utf8(value)
462              c_text = _xcstr(value)
463              tree.xmlNodeSetName(self._c_node, c_text)
464  
465  @cython.final
466  @cython.internal
467  cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy):
468      "A read-only proxy for entity references (for internal use only!)"
469      property name:
470          def __get__(self):
471              return funicode(self._c_node.name)
472  
473          def __set__(self, value):
474              value = _utf8(value)
475              assert u'&' not in value and u';' not in value, \
476                  f"Invalid entity name '{value}'"
477              c_text = _xcstr(value)
478              tree.xmlNodeSetName(self._c_node, c_text)
479  
480  
481  @cython.final
482  @cython.internal
483  cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy):
484      u"""A read-only element that allows adding children and changing the
485      text content (i.e. everything that adds to the subtree).
486      """
487      cpdef append(self, other_element):
488          u"""Append a copy of an Element to the list of children.
489          """
490          cdef xmlNode* c_next
491          cdef xmlNode* c_node
492          self._assertNode()
493          c_node = _roNodeOf(other_element)
494          c_node = _copyNodeToDoc(c_node, self._c_node.doc)
495          c_next = c_node.next
496          tree.xmlAddChild(self._c_node, c_node)
497          _moveTail(c_next, c_node)
498              
499      def extend(self, elements):
500          u"""Append a copy of all Elements from a sequence to the list of
501          children.
502          """
503          self._assertNode()
504          for element in elements:
505              self.append(element)
506  
507      property text:
508          """Text before the first subelement. This is either a string or the
509          value None, if there was no text.
510          """
511          def __get__(self):
512              self._assertNode()
513              return _collectText(self._c_node.children)
514  
515          def __set__(self, value):
516              self._assertNode()
517              if isinstance(value, QName):
518                  value = _resolveQNameText(self, value).decode('utf8')
519              _setNodeText(self._c_node, value)
520  
521  
522  cdef _ReadOnlyProxy _newAppendOnlyProxy(
523      _ReadOnlyProxy source_proxy, xmlNode* c_node):
524      cdef _ReadOnlyProxy el
525      if c_node.type == tree.XML_ELEMENT_NODE:
526          el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy)
527      elif c_node.type == tree.XML_PI_NODE:
528          el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy)
529      elif c_node.type == tree.XML_COMMENT_NODE:
530          el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy)
531      else:
532          raise TypeError(f"Unsupported element type: {c_node.type}")
533      el._c_node = c_node
534      _initReadOnlyProxy(el, source_proxy)
535      return el
536  
537  cdef xmlNode* _roNodeOf(element) except NULL:
538      cdef xmlNode* c_node
539      if isinstance(element, _Element):
540          c_node = (<_Element>element)._c_node
541      elif isinstance(element, _ReadOnlyProxy):
542          c_node = (<_ReadOnlyProxy>element)._c_node
543      elif isinstance(element, _OpaqueNodeWrapper):
544          c_node = (<_OpaqueNodeWrapper>element)._c_node
545      else:
546          raise TypeError, f"invalid argument type {type(element)}"
547  
548      if c_node is NULL:
549          raise TypeError, u"invalid element"
550      return c_node
551  
552  cdef xmlNode* _nonRoNodeOf(element) except NULL:
553      cdef xmlNode* c_node
554      if isinstance(element, _Element):
555          c_node = (<_Element>element)._c_node
556      elif isinstance(element, _AppendOnlyElementProxy):
557          c_node = (<_AppendOnlyElementProxy>element)._c_node
558      elif isinstance(element, _OpaqueNodeWrapper):
559          c_node = (<_OpaqueNodeWrapper>element)._c_node
560      else:
561          raise TypeError, f"invalid argument type {type(element)}"
562  
563      if c_node is NULL:
564          raise TypeError, u"invalid element"
565      return c_node