Cradicle Explorer

/ lib / lxml / etree.pyx
etree.pyx
   1  # cython: binding=True
   2  # cython: auto_pickle=False
   3  # cython: language_level=2
   4  
   5  """
   6  The ``lxml.etree`` module implements the extended ElementTree API for XML.
   7  """
   8  
   9  from __future__ import absolute_import
  10  
  11  __docformat__ = u"restructuredtext en"
  12  
  13  __all__ = [
  14      'AttributeBasedElementClassLookup', 'C14NError', 'C14NWriterTarget', 'CDATA',
  15      'Comment', 'CommentBase', 'CustomElementClassLookup', 'DEBUG',
  16      'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError',
  17      'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element',
  18      'ElementBase', 'ElementClassLookup', 'ElementDefaultClassLookup',
  19      'ElementNamespaceClassLookup', 'ElementTree', 'Entity', 'EntityBase',
  20      'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', 'Extension',
  21      'FallbackElementClassLookup', 'FunctionNamespace', 'HTML',
  22      'HTMLParser', 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
  23      'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', 'LXML_VERSION',
  24      'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
  25      'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError',
  26      'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction',
  27      'PyErrorLog', 'PythonElementClassLookup', 'QName', 'RelaxNG',
  28      'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError',
  29      'RelaxNGValidateError', 'Resolver', 'Schematron', 'SchematronError',
  30      'SchematronParseError', 'SchematronValidateError', 'SerialisationError',
  31      'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML',
  32      'XMLDTDID', 'XMLID', 'XMLParser', 'XMLSchema', 'XMLSchemaError',
  33      'XMLSchemaParseError', 'XMLSchemaValidateError', 'XMLSyntaxError',
  34      'XMLTreeBuilder', 'XPath', 'XPathDocumentEvaluator', 'XPathError',
  35      'XPathEvalError', 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError',
  36      'XPathSyntaxError', 'XSLT', 'XSLTAccessControl', 'XSLTApplyError',
  37      'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError',
  38      'XSLTSaveError', 'canonicalize',
  39      'cleanup_namespaces', 'clear_error_log', 'dump',
  40      'fromstring', 'fromstringlist', 'get_default_parser', 'iselement',
  41      'iterparse', 'iterwalk', 'parse', 'parseid', 'register_namespace',
  42      'set_default_parser', 'set_element_class_lookup', 'strip_attributes',
  43      'strip_elements', 'strip_tags', 'tostring', 'tostringlist', 'tounicode',
  44      'use_global_python_log'
  45      ]
  46  
  47  cimport cython
  48  
  49  from lxml cimport python
  50  from lxml.includes cimport tree, config
  51  from lxml.includes.tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs
  52  from lxml.includes.tree cimport const_xmlChar, xmlChar, _xcstr
  53  from lxml.python cimport _cstr, _isString
  54  from lxml.includes cimport xpath
  55  from lxml.includes cimport c14n
  56  
  57  # Cython's standard declarations
  58  cimport cpython.mem
  59  cimport cpython.ref
  60  from libc cimport limits, stdio, stdlib
  61  from libc cimport string as cstring_h   # not to be confused with stdlib 'string'
  62  from libc.string cimport const_char
  63  
  64  cdef object os_path_abspath
  65  from os.path import abspath as os_path_abspath
  66  
  67  cdef object BytesIO, StringIO
  68  from io import BytesIO, StringIO
  69  
  70  cdef object OrderedDict
  71  from collections import OrderedDict
  72  
  73  cdef object _elementpath
  74  from lxml import _elementpath
  75  
  76  cdef object sys
  77  import sys
  78  
  79  cdef object re
  80  import re
  81  
  82  cdef object partial
  83  from functools import partial
  84  
  85  cdef object islice
  86  from itertools import islice
  87  
  88  cdef object ITER_EMPTY = iter(())
  89  
  90  cdef object MutableMapping
  91  try:
  92      from collections.abc import MutableMapping  # Py3.3+
  93  except ImportError:
  94      from collections import MutableMapping  # Py2.7
  95  
  96  class _ImmutableMapping(MutableMapping):
  97      def __getitem__(self, key):
  98          raise KeyError, key
  99  
 100      def __setitem__(self, key, value):
 101          raise KeyError, key
 102  
 103      def __delitem__(self, key):
 104          raise KeyError, key
 105  
 106      def __contains__(self, key):
 107          return False
 108  
 109      def __len__(self):
 110          return 0
 111  
 112      def __iter__(self):
 113          return ITER_EMPTY
 114      iterkeys = itervalues = iteritems = __iter__
 115  
 116  cdef object IMMUTABLE_EMPTY_MAPPING = _ImmutableMapping()
 117  del _ImmutableMapping
 118  
 119  
 120  # the rules
 121  # ---------
 122  # any libxml C argument/variable is prefixed with c_
 123  # any non-public function/class is prefixed with an underscore
 124  # instance creation is always through factories
 125  
 126  # what to do with libxml2/libxslt error messages?
 127  # 0 : drop
 128  # 1 : use log
 129  DEF __DEBUG = 1
 130  
 131  # maximum number of lines in the libxml2/xslt log if __DEBUG == 1
 132  DEF __MAX_LOG_SIZE = 100
 133  
 134  # make the compiled-in debug state publicly available
 135  DEBUG = __DEBUG
 136  
 137  # A struct to store a cached qualified tag name+href pair.
 138  # While we can borrow the c_name from the document dict,
 139  # PyPy requires us to store a Python reference for the
 140  # namespace in order to keep the byte buffer alive.
 141  cdef struct qname:
 142      const_xmlChar* c_name
 143      python.PyObject* href
 144  
 145  # global per-thread setup
 146  tree.xmlThrDefIndentTreeOutput(1)
 147  tree.xmlThrDefLineNumbersDefaultValue(1)
 148  
 149  _initThreadLogging()
 150  
 151  # initialize parser (and threading)
 152  xmlparser.xmlInitParser()
 153  
 154  # filename encoding
 155  cdef bytes _FILENAME_ENCODING = (sys.getfilesystemencoding() or sys.getdefaultencoding() or 'ascii').encode("UTF-8")
 156  cdef char* _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING)
 157  
 158  # set up some default namespace prefixes
 159  cdef dict _DEFAULT_NAMESPACE_PREFIXES = {
 160      b"http://www.w3.org/XML/1998/namespace": b'xml',
 161      b"http://www.w3.org/1999/xhtml": b"html",
 162      b"http://www.w3.org/1999/XSL/Transform": b"xsl",
 163      b"http://www.w3.org/1999/02/22-rdf-syntax-ns#": b"rdf",
 164      b"http://schemas.xmlsoap.org/wsdl/": b"wsdl",
 165      # xml schema
 166      b"http://www.w3.org/2001/XMLSchema": b"xs",
 167      b"http://www.w3.org/2001/XMLSchema-instance": b"xsi",
 168      # dublin core
 169      b"http://purl.org/dc/elements/1.1/": b"dc",
 170      # objectify
 171      b"http://codespeak.net/lxml/objectify/pytype" : b"py",
 172  }
 173  
 174  # To avoid runtime encoding overhead, we keep a Unicode copy
 175  # of the uri-prefix mapping as (str, str) items view (list in Py2).
 176  cdef object _DEFAULT_NAMESPACE_PREFIXES_ITEMS = []
 177  
 178  cdef _update_default_namespace_prefixes_items():
 179      cdef bytes ns, prefix
 180      global _DEFAULT_NAMESPACE_PREFIXES_ITEMS
 181      _DEFAULT_NAMESPACE_PREFIXES_ITEMS = {
 182          ns.decode('utf-8') : prefix.decode('utf-8')
 183          for ns, prefix in _DEFAULT_NAMESPACE_PREFIXES.items()
 184      }.items()
 185  
 186  _update_default_namespace_prefixes_items()
 187  
 188  cdef object _check_internal_prefix = re.compile(b"ns\d+$").match
 189  
 190  def register_namespace(prefix, uri):
 191      u"""Registers a namespace prefix that newly created Elements in that
 192      namespace will use.  The registry is global, and any existing
 193      mapping for either the given prefix or the namespace URI will be
 194      removed.
 195      """
 196      prefix_utf, uri_utf = _utf8(prefix), _utf8(uri)
 197      if _check_internal_prefix(prefix_utf):
 198          raise ValueError("Prefix format reserved for internal use")
 199      _tagValidOrRaise(prefix_utf)
 200      _uriValidOrRaise(uri_utf)
 201      if (uri_utf == b"http://www.w3.org/XML/1998/namespace" and prefix_utf != b'xml'
 202              or prefix_utf == b'xml' and uri_utf != b"http://www.w3.org/XML/1998/namespace"):
 203          raise ValueError("Cannot change the 'xml' prefix of the XML namespace")
 204      for k, v in list(_DEFAULT_NAMESPACE_PREFIXES.items()):
 205          if k == uri_utf or v == prefix_utf:
 206              del _DEFAULT_NAMESPACE_PREFIXES[k]
 207      _DEFAULT_NAMESPACE_PREFIXES[uri_utf] = prefix_utf
 208      _update_default_namespace_prefixes_items()
 209  
 210  
 211  # Error superclass for ElementTree compatibility
 212  cdef class Error(Exception):
 213      pass
 214  
 215  # module level superclass for all exceptions
 216  cdef class LxmlError(Error):
 217      """Main exception base class for lxml.  All other exceptions inherit from
 218      this one.
 219      """
 220      def __init__(self, message, error_log=None):
 221          super(_Error, self).__init__(message)
 222          if error_log is None:
 223              self.error_log = __copyGlobalErrorLog()
 224          else:
 225              self.error_log = error_log.copy()
 226  
 227  cdef object _Error = Error
 228  
 229  
 230  # superclass for all syntax errors
 231  class LxmlSyntaxError(LxmlError, SyntaxError):
 232      """Base class for all syntax errors.
 233      """
 234  
 235  cdef class C14NError(LxmlError):
 236      """Error during C14N serialisation.
 237      """
 238  
 239  # version information
 240  cdef __unpackDottedVersion(version):
 241      version_list = []
 242      l = (version.decode("ascii").replace(u'-', u'.').split(u'.') + [0]*4)[:4]
 243      for item in l:
 244          try:
 245              item = int(item)
 246          except ValueError:
 247              if item.startswith(u'dev'):
 248                  count = item[3:]
 249                  item = -300
 250              elif item.startswith(u'alpha'):
 251                  count = item[5:]
 252                  item = -200
 253              elif item.startswith(u'beta'):
 254                  count = item[4:]
 255                  item = -100
 256              else:
 257                  count = 0
 258              if count:
 259                  item += int(count)
 260          version_list.append(item)
 261      return tuple(version_list)
 262  
 263  cdef __unpackIntVersion(int c_version):
 264      return (
 265          ((c_version / (100*100)) % 100),
 266          ((c_version / 100)       % 100),
 267          (c_version               % 100)
 268          )
 269  
 270  cdef int _LIBXML_VERSION_INT
 271  try:
 272      _LIBXML_VERSION_INT = int(
 273          re.match(u'[0-9]+', (<unsigned char*>tree.xmlParserVersion).decode("ascii")).group(0))
 274  except Exception:
 275      print u"Unknown libxml2 version: %s" % (<unsigned char*>tree.xmlParserVersion).decode("latin1")
 276      _LIBXML_VERSION_INT = 0
 277  
 278  LIBXML_VERSION = __unpackIntVersion(_LIBXML_VERSION_INT)
 279  LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION)
 280  LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING)
 281  
 282  __version__ = tree.LXML_VERSION_STRING.decode("ascii")
 283  
 284  
 285  # class for temporary storage of Python references,
 286  # used e.g. for XPath results
 287  @cython.final
 288  @cython.internal
 289  cdef class _TempStore:
 290      cdef list _storage
 291      def __init__(self):
 292          self._storage = []
 293  
 294      cdef int add(self, obj) except -1:
 295          self._storage.append(obj)
 296          return 0
 297  
 298      cdef int clear(self) except -1:
 299          del self._storage[:]
 300          return 0
 301  
 302  
 303  # class for temporarily storing exceptions raised in extensions
 304  @cython.internal
 305  cdef class _ExceptionContext:
 306      cdef object _exc_info
 307      cdef int clear(self) except -1:
 308          self._exc_info = None
 309          return 0
 310  
 311      cdef void _store_raised(self):
 312          try:
 313              self._exc_info = sys.exc_info()
 314          except BaseException as e:
 315              self._store_exception(e)
 316          finally:
 317              return  # and swallow any further exceptions
 318  
 319      cdef int _store_exception(self, exception) except -1:
 320          self._exc_info = (exception, None, None)
 321          return 0
 322  
 323      cdef bint _has_raised(self) except -1:
 324          return self._exc_info is not None
 325  
 326      cdef int _raise_if_stored(self) except -1:
 327          if self._exc_info is None:
 328              return 0
 329          type, value, traceback = self._exc_info
 330          self._exc_info = None
 331          if value is None and traceback is None:
 332              raise type
 333          else:
 334              raise type, value, traceback
 335  
 336  
 337  # type of a function that steps from node to node
 338  ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*)
 339  
 340  
 341  ################################################################################
 342  # Include submodules
 343  
 344  include "proxy.pxi"        # Proxy handling (element backpointers/memory/etc.)
 345  include "apihelpers.pxi"   # Private helper functions
 346  include "xmlerror.pxi"     # Error and log handling
 347  
 348  
 349  ################################################################################
 350  # Public Python API
 351  
 352  @cython.final
 353  @cython.freelist(8)
 354  cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
 355      u"""Internal base class to reference a libxml document.
 356  
 357      When instances of this class are garbage collected, the libxml
 358      document is cleaned up.
 359      """
 360      cdef int _ns_counter
 361      cdef bytes _prefix_tail
 362      cdef xmlDoc* _c_doc
 363      cdef _BaseParser _parser
 364  
 365      def __dealloc__(self):
 366          # if there are no more references to the document, it is safe
 367          # to clean the whole thing up, as all nodes have a reference to
 368          # the document
 369          tree.xmlFreeDoc(self._c_doc)
 370  
 371      @cython.final
 372      cdef getroot(self):
 373          # return an element proxy for the document root
 374          cdef xmlNode* c_node
 375          c_node = tree.xmlDocGetRootElement(self._c_doc)
 376          if c_node is NULL:
 377              return None
 378          return _elementFactory(self, c_node)
 379  
 380      @cython.final
 381      cdef bint hasdoctype(self):
 382          # DOCTYPE gets parsed into internal subset (xmlDTD*)
 383          return self._c_doc is not NULL and self._c_doc.intSubset is not NULL
 384  
 385      @cython.final
 386      cdef getdoctype(self):
 387          # get doctype info: root tag, public/system ID (or None if not known)
 388          cdef tree.xmlDtd* c_dtd
 389          cdef xmlNode* c_root_node
 390          public_id = None
 391          sys_url   = None
 392          c_dtd = self._c_doc.intSubset
 393          if c_dtd is not NULL:
 394              if c_dtd.ExternalID is not NULL:
 395                  public_id = funicode(c_dtd.ExternalID)
 396              if c_dtd.SystemID is not NULL:
 397                  sys_url = funicode(c_dtd.SystemID)
 398          c_dtd = self._c_doc.extSubset
 399          if c_dtd is not NULL:
 400              if not public_id and c_dtd.ExternalID is not NULL:
 401                  public_id = funicode(c_dtd.ExternalID)
 402              if not sys_url and c_dtd.SystemID is not NULL:
 403                  sys_url = funicode(c_dtd.SystemID)
 404          c_root_node = tree.xmlDocGetRootElement(self._c_doc)
 405          if c_root_node is NULL:
 406              root_name = None
 407          else:
 408              root_name = funicode(c_root_node.name)
 409          return root_name, public_id, sys_url
 410  
 411      @cython.final
 412      cdef getxmlinfo(self):
 413          # return XML version and encoding (or None if not known)
 414          cdef xmlDoc* c_doc = self._c_doc
 415          if c_doc.version is NULL:
 416              version = None
 417          else:
 418              version = funicode(c_doc.version)
 419          if c_doc.encoding is NULL:
 420              encoding = None
 421          else:
 422              encoding = funicode(c_doc.encoding)
 423          return version, encoding
 424  
 425      @cython.final
 426      cdef isstandalone(self):
 427          # returns True for "standalone=true",
 428          # False for "standalone=false", None if not provided
 429          if self._c_doc.standalone == -1:
 430              return None
 431          else:
 432              return <bint>(self._c_doc.standalone == 1)
 433  
 434      @cython.final
 435      cdef bytes buildNewPrefix(self):
 436          # get a new unique prefix ("nsX") for this document
 437          cdef bytes ns
 438          if self._ns_counter < len(_PREFIX_CACHE):
 439              ns = _PREFIX_CACHE[self._ns_counter]
 440          else:
 441              ns = python.PyBytes_FromFormat("ns%d", self._ns_counter)
 442          if self._prefix_tail is not None:
 443              ns += self._prefix_tail
 444          self._ns_counter += 1
 445          if self._ns_counter < 0:
 446              # overflow!
 447              self._ns_counter = 0
 448              if self._prefix_tail is None:
 449                  self._prefix_tail = b"A"
 450              else:
 451                  self._prefix_tail += b"A"
 452          return ns
 453  
 454      @cython.final
 455      cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node,
 456                                     const_xmlChar* c_href, const_xmlChar* c_prefix,
 457                                     bint is_attribute) except NULL:
 458          u"""Get or create namespace structure for a node.  Reuses the prefix if
 459          possible.
 460          """
 461          cdef xmlNs* c_ns
 462          cdef xmlNs* c_doc_ns
 463          cdef python.PyObject* dict_result
 464          if c_node.type != tree.XML_ELEMENT_NODE:
 465              assert c_node.type == tree.XML_ELEMENT_NODE, \
 466                  u"invalid node type %d, expected %d" % (
 467                  c_node.type, tree.XML_ELEMENT_NODE)
 468          # look for existing ns declaration
 469          c_ns = _searchNsByHref(c_node, c_href, is_attribute)
 470          if c_ns is not NULL:
 471              if is_attribute and c_ns.prefix is NULL:
 472                  # do not put namespaced attributes into the default
 473                  # namespace as this would break serialisation
 474                  pass
 475              else:
 476                  return c_ns
 477  
 478          # none found => determine a suitable new prefix
 479          if c_prefix is NULL:
 480              dict_result = python.PyDict_GetItem(
 481                  _DEFAULT_NAMESPACE_PREFIXES, <unsigned char*>c_href)
 482              if dict_result is not NULL:
 483                  prefix = <object>dict_result
 484              else:
 485                  prefix = self.buildNewPrefix()
 486              c_prefix = _xcstr(prefix)
 487  
 488          # make sure the prefix is not in use already
 489          while tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL:
 490              prefix = self.buildNewPrefix()
 491              c_prefix = _xcstr(prefix)
 492  
 493          # declare the namespace and return it
 494          c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
 495          if c_ns is NULL:
 496              raise MemoryError()
 497          return c_ns
 498  
 499      @cython.final
 500      cdef int _setNodeNs(self, xmlNode* c_node, const_xmlChar* c_href) except -1:
 501          u"Lookup namespace structure and set it for the node."
 502          c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0)
 503          tree.xmlSetNs(c_node, c_ns)
 504  
 505  cdef tuple __initPrefixCache():
 506      cdef int i
 507      return tuple([ python.PyBytes_FromFormat("ns%d", i)
 508                     for i in range(30) ])
 509  
 510  cdef tuple _PREFIX_CACHE = __initPrefixCache()
 511  
 512  cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
 513      cdef _Document result
 514      result = _Document.__new__(_Document)
 515      result._c_doc = c_doc
 516      result._ns_counter = 0
 517      result._prefix_tail = None
 518      if parser is None:
 519          parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
 520      result._parser = parser
 521      return result
 522  
 523  
 524  cdef object _find_invalid_public_id_characters = re.compile(
 525      ur"[^\x20\x0D\x0Aa-zA-Z0-9'()+,./:=?;!*#@$_%-]+").search
 526  
 527  
 528  cdef class DocInfo:
 529      u"Document information provided by parser and DTD."
 530      cdef _Document _doc
 531      def __cinit__(self, tree):
 532          u"Create a DocInfo object for an ElementTree object or root Element."
 533          self._doc = _documentOrRaise(tree)
 534          root_name, public_id, system_url = self._doc.getdoctype()
 535          if not root_name and (public_id or system_url):
 536              raise ValueError, u"Could not find root node"
 537  
 538      @property
 539      def root_name(self):
 540          """Returns the name of the root node as defined by the DOCTYPE."""
 541          root_name, public_id, system_url = self._doc.getdoctype()
 542          return root_name
 543  
 544      @cython.final
 545      cdef tree.xmlDtd* _get_c_dtd(self):
 546          """"Return the DTD. Create it if it does not yet exist."""
 547          cdef xmlDoc* c_doc = self._doc._c_doc
 548          cdef xmlNode* c_root_node
 549          cdef const_xmlChar* c_name
 550  
 551          if c_doc.intSubset:
 552              return c_doc.intSubset
 553  
 554          c_root_node = tree.xmlDocGetRootElement(c_doc)
 555          c_name = c_root_node.name if c_root_node else NULL
 556          return  tree.xmlCreateIntSubset(c_doc, c_name, NULL, NULL)
 557  
 558      def clear(self):
 559          u"""Removes DOCTYPE and internal subset from the document."""
 560          cdef xmlDoc* c_doc = self._doc._c_doc
 561          cdef tree.xmlNode* c_dtd = <xmlNode*>c_doc.intSubset
 562          if c_dtd is NULL:
 563              return
 564          tree.xmlUnlinkNode(c_dtd)
 565          tree.xmlFreeNode(c_dtd)
 566  
 567      property public_id:
 568          u"""Public ID of the DOCTYPE.
 569  
 570          Mutable.  May be set to a valid string or None.  If a DTD does not
 571          exist, setting this variable (even to None) will create one.
 572          """
 573          def __get__(self):
 574              root_name, public_id, system_url = self._doc.getdoctype()
 575              return public_id
 576  
 577          def __set__(self, value):
 578              cdef xmlChar* c_value = NULL
 579              if value is not None:
 580                  match = _find_invalid_public_id_characters(value)
 581                  if match:
 582                      raise ValueError, f'Invalid character(s) {match.group(0)!r} in public_id.'
 583                  value = _utf8(value)
 584                  c_value = tree.xmlStrdup(_xcstr(value))
 585                  if not c_value:
 586                      raise MemoryError()
 587  
 588              c_dtd = self._get_c_dtd()
 589              if not c_dtd:
 590                  tree.xmlFree(c_value)
 591                  raise MemoryError()
 592              if c_dtd.ExternalID:
 593                  tree.xmlFree(<void*>c_dtd.ExternalID)
 594              c_dtd.ExternalID = c_value
 595  
 596      property system_url:
 597          u"""System ID of the DOCTYPE.
 598  
 599          Mutable.  May be set to a valid string or None.  If a DTD does not
 600          exist, setting this variable (even to None) will create one.
 601          """
 602          def __get__(self):
 603              root_name, public_id, system_url = self._doc.getdoctype()
 604              return system_url
 605  
 606          def __set__(self, value):
 607              cdef xmlChar* c_value = NULL
 608              if value is not None:
 609                  bvalue = _utf8(value)
 610                  # sys_url may be any valid unicode string that can be
 611                  # enclosed in single quotes or quotes.
 612                  if b"'" in bvalue and b'"' in bvalue:
 613                      raise ValueError(
 614                          'System URL may not contain both single (\') and double quotes (").')
 615                  c_value = tree.xmlStrdup(_xcstr(bvalue))
 616                  if not c_value:
 617                      raise MemoryError()
 618  
 619              c_dtd = self._get_c_dtd()
 620              if not c_dtd:
 621                  tree.xmlFree(c_value)
 622                  raise MemoryError()
 623              if c_dtd.SystemID:
 624                  tree.xmlFree(<void*>c_dtd.SystemID)
 625              c_dtd.SystemID = c_value
 626  
 627      @property
 628      def xml_version(self):
 629          """Returns the XML version as declared by the document."""
 630          xml_version, encoding = self._doc.getxmlinfo()
 631          return xml_version
 632  
 633      @property
 634      def encoding(self):
 635          """Returns the encoding name as declared by the document."""
 636          xml_version, encoding = self._doc.getxmlinfo()
 637          return encoding
 638  
 639      @property
 640      def standalone(self):
 641          """Returns the standalone flag as declared by the document.  The possible
 642          values are True (``standalone='yes'``), False
 643          (``standalone='no'`` or flag not provided in the declaration),
 644          and None (unknown or no declaration found).  Note that a
 645          normal truth test on this value will always tell if the
 646          ``standalone`` flag was set to ``'yes'`` or not.
 647          """
 648          return self._doc.isstandalone()
 649  
 650      property URL:
 651          u"The source URL of the document (or None if unknown)."
 652          def __get__(self):
 653              if self._doc._c_doc.URL is NULL:
 654                  return None
 655              return _decodeFilename(self._doc._c_doc.URL)
 656          def __set__(self, url):
 657              url = _encodeFilename(url)
 658              c_oldurl = self._doc._c_doc.URL
 659              if url is None:
 660                  self._doc._c_doc.URL = NULL
 661              else:
 662                  self._doc._c_doc.URL = tree.xmlStrdup(_xcstr(url))
 663              if c_oldurl is not NULL:
 664                  tree.xmlFree(<void*>c_oldurl)
 665  
 666      @property
 667      def doctype(self):
 668          """Returns a DOCTYPE declaration string for the document."""
 669          root_name, public_id, system_url = self._doc.getdoctype()
 670          if system_url:
 671              # If '"' in system_url, we must escape it with single
 672              # quotes, otherwise escape with double quotes. If url
 673              # contains both a single quote and a double quote, XML
 674              # standard is being violated.
 675              if '"' in system_url:
 676                  quoted_system_url = f"'{system_url}'"
 677              else:
 678                  quoted_system_url = f'"{system_url}"'
 679          if public_id:
 680              if system_url:
 681                  return f'<!DOCTYPE {root_name} PUBLIC "{public_id}" {quoted_system_url}>'
 682              else:
 683                  return f'<!DOCTYPE {root_name} PUBLIC "{public_id}">'
 684          elif system_url:
 685              return f'<!DOCTYPE {root_name} SYSTEM {quoted_system_url}>'
 686          elif self._doc.hasdoctype():
 687              return f'<!DOCTYPE {root_name}>'
 688          else:
 689              return u''
 690  
 691      @property
 692      def internalDTD(self):
 693          """Returns a DTD validator based on the internal subset of the document."""
 694          return _dtdFactory(self._doc._c_doc.intSubset)
 695  
 696      @property
 697      def externalDTD(self):
 698          """Returns a DTD validator based on the external subset of the document."""
 699          return _dtdFactory(self._doc._c_doc.extSubset)
 700  
 701  
 702  @cython.no_gc_clear
 703  cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
 704      u"""Element class.
 705  
 706      References a document object and a libxml node.
 707  
 708      By pointing to a Document instance, a reference is kept to
 709      _Document as long as there is some pointer to a node in it.
 710      """
 711      cdef _Document _doc
 712      cdef xmlNode* _c_node
 713      cdef object _tag
 714  
 715      def _init(self):
 716          u"""_init(self)
 717  
 718          Called after object initialisation.  Custom subclasses may override
 719          this if they recursively call _init() in the superclasses.
 720          """
 721  
 722      @cython.linetrace(False)
 723      @cython.profile(False)
 724      def __dealloc__(self):
 725          #print "trying to free node:", <int>self._c_node
 726          #displayNode(self._c_node, 0)
 727          if self._c_node is not NULL:
 728              _unregisterProxy(self)
 729              attemptDeallocation(self._c_node)
 730  
 731      # MANIPULATORS
 732  
 733      def __setitem__(self, x, value):
 734          u"""__setitem__(self, x, value)
 735  
 736          Replaces the given subelement index or slice.
 737          """
 738          cdef xmlNode* c_node = NULL
 739          cdef xmlNode* c_next
 740          cdef xmlDoc* c_source_doc
 741          cdef _Element element
 742          cdef bint left_to_right
 743          cdef Py_ssize_t slicelength = 0, step = 0
 744          _assertValidNode(self)
 745          if value is None:
 746              raise ValueError, u"cannot assign None"
 747          if isinstance(x, slice):
 748              # slice assignment
 749              _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
 750              if step > 0:
 751                  left_to_right = 1
 752              else:
 753                  left_to_right = 0
 754                  step = -step
 755              _replaceSlice(self, c_node, slicelength, step, left_to_right, value)
 756              return
 757          else:
 758              # otherwise: normal item assignment
 759              element = value
 760              _assertValidNode(element)
 761              c_node = _findChild(self._c_node, x)
 762              if c_node is NULL:
 763                  raise IndexError, u"list index out of range"
 764              c_source_doc = element._c_node.doc
 765              c_next = element._c_node.next
 766              _removeText(c_node.next)
 767              tree.xmlReplaceNode(c_node, element._c_node)
 768              _moveTail(c_next, element._c_node)
 769              moveNodeToDocument(self._doc, c_source_doc, element._c_node)
 770              if not attemptDeallocation(c_node):
 771                  moveNodeToDocument(self._doc, c_node.doc, c_node)
 772  
 773      def __delitem__(self, x):
 774          u"""__delitem__(self, x)
 775  
 776          Deletes the given subelement or a slice.
 777          """
 778          cdef xmlNode* c_node = NULL
 779          cdef xmlNode* c_next
 780          cdef Py_ssize_t step = 0, slicelength = 0
 781          _assertValidNode(self)
 782          if isinstance(x, slice):
 783              # slice deletion
 784              if _isFullSlice(<slice>x):
 785                  c_node = self._c_node.children
 786                  if c_node is not NULL:
 787                      if not _isElement(c_node):
 788                          c_node = _nextElement(c_node)
 789                      while c_node is not NULL:
 790                          c_next = _nextElement(c_node)
 791                          _removeNode(self._doc, c_node)
 792                          c_node = c_next
 793              else:
 794                  _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
 795                  _deleteSlice(self._doc, c_node, slicelength, step)
 796          else:
 797              # item deletion
 798              c_node = _findChild(self._c_node, x)
 799              if c_node is NULL:
 800                  raise IndexError, f"index out of range: {x}"
 801              _removeNode(self._doc, c_node)
 802  
 803      def __deepcopy__(self, memo):
 804          u"__deepcopy__(self, memo)"
 805          return self.__copy__()
 806  
 807      def __copy__(self):
 808          u"__copy__(self)"
 809          cdef xmlDoc* c_doc
 810          cdef xmlNode* c_node
 811          cdef _Document new_doc
 812          _assertValidNode(self)
 813          c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive
 814          new_doc = _documentFactory(c_doc, self._doc._parser)
 815          root = new_doc.getroot()
 816          if root is not None:
 817              return root
 818          # Comment/PI
 819          c_node = c_doc.children
 820          while c_node is not NULL and c_node.type != self._c_node.type:
 821              c_node = c_node.next
 822          if c_node is NULL:
 823              return None
 824          return _elementFactory(new_doc, c_node)
 825  
 826      def set(self, key, value):
 827          u"""set(self, key, value)
 828  
 829          Sets an element attribute.
 830          In HTML documents (not XML or XHTML), the value None is allowed and creates
 831          an attribute without value (just the attribute name).
 832          """
 833          _assertValidNode(self)
 834          _setAttributeValue(self, key, value)
 835  
 836      def append(self, _Element element not None):
 837          u"""append(self, element)
 838  
 839          Adds a subelement to the end of this element.
 840          """
 841          _assertValidNode(self)
 842          _assertValidNode(element)
 843          _appendChild(self, element)
 844  
 845      def addnext(self, _Element element not None):
 846          u"""addnext(self, element)
 847  
 848          Adds the element as a following sibling directly after this
 849          element.
 850  
 851          This is normally used to set a processing instruction or comment after
 852          the root node of a document.  Note that tail text is automatically
 853          discarded when adding at the root level.
 854          """
 855          _assertValidNode(self)
 856          _assertValidNode(element)
 857          if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
 858              if element._c_node.type != tree.XML_PI_NODE:
 859                  if element._c_node.type != tree.XML_COMMENT_NODE:
 860                      raise TypeError, u"Only processing instructions and comments can be siblings of the root element"
 861              element.tail = None
 862          _appendSibling(self, element)
 863  
 864      def addprevious(self, _Element element not None):
 865          u"""addprevious(self, element)
 866  
 867          Adds the element as a preceding sibling directly before this
 868          element.
 869  
 870          This is normally used to set a processing instruction or comment
 871          before the root node of a document.  Note that tail text is
 872          automatically discarded when adding at the root level.
 873          """
 874          _assertValidNode(self)
 875          _assertValidNode(element)
 876          if self._c_node.parent != NULL and not _isElement(self._c_node.parent):
 877              if element._c_node.type != tree.XML_PI_NODE:
 878                  if element._c_node.type != tree.XML_COMMENT_NODE:
 879                      raise TypeError, u"Only processing instructions and comments can be siblings of the root element"
 880              element.tail = None
 881          _prependSibling(self, element)
 882  
 883      def extend(self, elements):
 884          u"""extend(self, elements)
 885  
 886          Extends the current children by the elements in the iterable.
 887          """
 888          cdef _Element element
 889          _assertValidNode(self)
 890          for element in elements:
 891              if element is None:
 892                  raise TypeError, u"Node must not be None"
 893              _assertValidNode(element)
 894              _appendChild(self, element)
 895  
 896      def clear(self, bint keep_tail=False):
 897          u"""clear(self, keep_tail=False)
 898  
 899          Resets an element.  This function removes all subelements, clears
 900          all attributes and sets the text and tail properties to None.
 901  
 902          Pass ``keep_tail=True`` to leave the tail text untouched.
 903          """
 904          cdef xmlAttr* c_attr
 905          cdef xmlAttr* c_attr_next
 906          cdef xmlNode* c_node
 907          cdef xmlNode* c_node_next
 908          _assertValidNode(self)
 909          c_node = self._c_node
 910          # remove self.text and self.tail
 911          _removeText(c_node.children)
 912          if not keep_tail:
 913              _removeText(c_node.next)
 914          # remove all attributes
 915          c_attr = c_node.properties
 916          if c_attr:
 917              c_node.properties = NULL
 918              tree.xmlFreePropList(c_attr)
 919          # remove all subelements
 920          c_node = c_node.children
 921          if c_node and not _isElement(c_node):
 922              c_node = _nextElement(c_node)
 923          while c_node is not NULL:
 924              c_node_next = _nextElement(c_node)
 925              _removeNode(self._doc, c_node)
 926              c_node = c_node_next
 927  
 928      def insert(self, index: int, _Element element not None):
 929          u"""insert(self, index, element)
 930  
 931          Inserts a subelement at the given position in this element
 932          """
 933          cdef xmlNode* c_node
 934          cdef xmlNode* c_next
 935          cdef xmlDoc* c_source_doc
 936          _assertValidNode(self)
 937          _assertValidNode(element)
 938          c_node = _findChild(self._c_node, index)
 939          if c_node is NULL:
 940              _appendChild(self, element)
 941              return
 942          c_source_doc = element._c_node.doc
 943          c_next = element._c_node.next
 944          tree.xmlAddPrevSibling(c_node, element._c_node)
 945          _moveTail(c_next, element._c_node)
 946          moveNodeToDocument(self._doc, c_source_doc, element._c_node)
 947  
 948      def remove(self, _Element element not None):
 949          u"""remove(self, element)
 950  
 951          Removes a matching subelement. Unlike the find methods, this
 952          method compares elements based on identity, not on tag value
 953          or contents.
 954          """
 955          cdef xmlNode* c_node
 956          cdef xmlNode* c_next
 957          _assertValidNode(self)
 958          _assertValidNode(element)
 959          c_node = element._c_node
 960          if c_node.parent is not self._c_node:
 961              raise ValueError, u"Element is not a child of this node."
 962          c_next = element._c_node.next
 963          tree.xmlUnlinkNode(c_node)
 964          _moveTail(c_next, c_node)
 965          # fix namespace declarations
 966          moveNodeToDocument(self._doc, c_node.doc, c_node)
 967  
 968      def replace(self, _Element old_element not None,
 969                  _Element new_element not None):
 970          u"""replace(self, old_element, new_element)
 971  
 972          Replaces a subelement with the element passed as second argument.
 973          """
 974          cdef xmlNode* c_old_node
 975          cdef xmlNode* c_old_next
 976          cdef xmlNode* c_new_node
 977          cdef xmlNode* c_new_next
 978          cdef xmlDoc* c_source_doc
 979          _assertValidNode(self)
 980          _assertValidNode(old_element)
 981          _assertValidNode(new_element)
 982          c_old_node = old_element._c_node
 983          if c_old_node.parent is not self._c_node:
 984              raise ValueError, u"Element is not a child of this node."
 985          c_old_next = c_old_node.next
 986          c_new_node = new_element._c_node
 987          c_new_next = c_new_node.next
 988          c_source_doc = c_new_node.doc
 989          tree.xmlReplaceNode(c_old_node, c_new_node)
 990          _moveTail(c_new_next, c_new_node)
 991          _moveTail(c_old_next, c_old_node)
 992          moveNodeToDocument(self._doc, c_source_doc, c_new_node)
 993          # fix namespace declarations
 994          moveNodeToDocument(self._doc, c_old_node.doc, c_old_node)
 995  
 996      # PROPERTIES
 997      property tag:
 998          u"""Element tag
 999          """
1000          def __get__(self):
1001              if self._tag is not None:
1002                  return self._tag
1003              _assertValidNode(self)
1004              self._tag = _namespacedName(self._c_node)
1005              return self._tag
1006  
1007          def __set__(self, value):
1008              cdef _BaseParser parser
1009              _assertValidNode(self)
1010              ns, name = _getNsTag(value)
1011              parser = self._doc._parser
1012              if parser is not None and parser._for_html:
1013                  _htmlTagValidOrRaise(name)
1014              else:
1015                  _tagValidOrRaise(name)
1016              self._tag = value
1017              tree.xmlNodeSetName(self._c_node, _xcstr(name))
1018              if ns is None:
1019                  self._c_node.ns = NULL
1020              else:
1021                  self._doc._setNodeNs(self._c_node, _xcstr(ns))
1022  
1023      @property
1024      def attrib(self):
1025          """Element attribute dictionary. Where possible, use get(), set(),
1026          keys(), values() and items() to access element attributes.
1027          """
1028          return _Attrib.__new__(_Attrib, self)
1029  
1030      property text:
1031          u"""Text before the first subelement. This is either a string or
1032          the value None, if there was no text.
1033          """
1034          def __get__(self):
1035              _assertValidNode(self)
1036              return _collectText(self._c_node.children)
1037  
1038          def __set__(self, value):
1039              _assertValidNode(self)
1040              if isinstance(value, QName):
1041                  value = _resolveQNameText(self, value).decode('utf8')
1042              _setNodeText(self._c_node, value)
1043  
1044          # using 'del el.text' is the wrong thing to do
1045          #def __del__(self):
1046          #    _setNodeText(self._c_node, None)
1047  
1048      property tail:
1049          u"""Text after this element's end tag, but before the next sibling
1050          element's start tag. This is either a string or the value None, if
1051          there was no text.
1052          """
1053          def __get__(self):
1054              _assertValidNode(self)
1055              return _collectText(self._c_node.next)
1056  
1057          def __set__(self, value):
1058              _assertValidNode(self)
1059              _setTailText(self._c_node, value)
1060  
1061          # using 'del el.tail' is the wrong thing to do
1062          #def __del__(self):
1063          #    _setTailText(self._c_node, None)
1064  
1065      # not in ElementTree, read-only
1066      @property
1067      def prefix(self):
1068          """Namespace prefix or None.
1069          """
1070          if self._c_node.ns is not NULL:
1071              if self._c_node.ns.prefix is not NULL:
1072                  return funicode(self._c_node.ns.prefix)
1073          return None
1074  
1075      # not in ElementTree, read-only
1076      property sourceline:
1077          u"""Original line number as found by the parser or None if unknown.
1078          """
1079          def __get__(self):
1080              cdef long line
1081              _assertValidNode(self)
1082              line = tree.xmlGetLineNo(self._c_node)
1083              return line if line > 0 else None
1084  
1085          def __set__(self, line):
1086              _assertValidNode(self)
1087              if line <= 0:
1088                  self._c_node.line = 0
1089              else:
1090                  self._c_node.line = line
1091  
1092      # not in ElementTree, read-only
1093      @property
1094      def nsmap(self):
1095          """Namespace prefix->URI mapping known in the context of this
1096          Element.  This includes all namespace declarations of the
1097          parents.
1098  
1099          Note that changing the returned dict has no effect on the Element.
1100          """
1101          _assertValidNode(self)
1102          return _build_nsmap(self._c_node)
1103  
1104      # not in ElementTree, read-only
1105      property base:
1106          u"""The base URI of the Element (xml:base or HTML base URL).
1107          None if the base URI is unknown.
1108  
1109          Note that the value depends on the URL of the document that
1110          holds the Element if there is no xml:base attribute on the
1111          Element or its ancestors.
1112  
1113          Setting this property will set an xml:base attribute on the
1114          Element, regardless of the document type (XML or HTML).
1115          """
1116          def __get__(self):
1117              _assertValidNode(self)
1118              c_base = tree.xmlNodeGetBase(self._doc._c_doc, self._c_node)
1119              if c_base is NULL:
1120                  if self._doc._c_doc.URL is NULL:
1121                      return None
1122                  return _decodeFilename(self._doc._c_doc.URL)
1123              try:
1124                  base = _decodeFilename(c_base)
1125              finally:
1126                  tree.xmlFree(c_base)
1127              return base
1128  
1129          def __set__(self, url):
1130              _assertValidNode(self)
1131              if url is None:
1132                  c_base = <const_xmlChar*>NULL
1133              else:
1134                  url = _encodeFilename(url)
1135                  c_base = _xcstr(url)
1136              tree.xmlNodeSetBase(self._c_node, c_base)
1137  
1138      # ACCESSORS
1139      def __repr__(self):
1140          u"__repr__(self)"
1141          return "<Element %s at 0x%x>" % (strrepr(self.tag), id(self))
1142  
1143      def __getitem__(self, x):
1144          u"""Returns the subelement at the given position or the requested
1145          slice.
1146          """
1147          cdef xmlNode* c_node = NULL
1148          cdef Py_ssize_t step = 0, slicelength = 0
1149          cdef Py_ssize_t c, i
1150          cdef _node_to_node_function next_element
1151          cdef list result
1152          _assertValidNode(self)
1153          if isinstance(x, slice):
1154              # slicing
1155              if _isFullSlice(<slice>x):
1156                  return _collectChildren(self)
1157              _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength)
1158              if c_node is NULL:
1159                  return []
1160              if step > 0:
1161                  next_element = _nextElement
1162              else:
1163                  step = -step
1164                  next_element = _previousElement
1165              result = []
1166              c = 0
1167              while c_node is not NULL and c < slicelength:
1168                  result.append(_elementFactory(self._doc, c_node))
1169                  c += 1
1170                  for i in range(step):
1171                      c_node = next_element(c_node)
1172                      if c_node is NULL:
1173                          break
1174              return result
1175          else:
1176              # indexing
1177              c_node = _findChild(self._c_node, x)
1178              if c_node is NULL:
1179                  raise IndexError, u"list index out of range"
1180              return _elementFactory(self._doc, c_node)
1181  
1182      def __len__(self):
1183          u"""__len__(self)
1184  
1185          Returns the number of subelements.
1186          """
1187          _assertValidNode(self)
1188          return _countElements(self._c_node.children)
1189  
1190      def __nonzero__(self):
1191          #u"__nonzero__(self)" # currently fails in Py3.1
1192          import warnings
1193          warnings.warn(
1194              u"The behavior of this method will change in future versions. "
1195              u"Use specific 'len(elem)' or 'elem is not None' test instead.",
1196              FutureWarning
1197              )
1198          # emulate old behaviour
1199          _assertValidNode(self)
1200          return _hasChild(self._c_node)
1201  
1202      def __contains__(self, element):
1203          u"__contains__(self, element)"
1204          cdef xmlNode* c_node
1205          _assertValidNode(self)
1206          if not isinstance(element, _Element):
1207              return 0
1208          c_node = (<_Element>element)._c_node
1209          return c_node is not NULL and c_node.parent is self._c_node
1210  
1211      def __iter__(self):
1212          u"__iter__(self)"
1213          return ElementChildIterator(self)
1214  
1215      def __reversed__(self):
1216          u"__reversed__(self)"
1217          return ElementChildIterator(self, reversed=True)
1218  
1219      def index(self, _Element child not None, start: int = None, stop: int = None):
1220          u"""index(self, child, start=None, stop=None)
1221  
1222          Find the position of the child within the parent.
1223  
1224          This method is not part of the original ElementTree API.
1225          """
1226          cdef Py_ssize_t k, l
1227          cdef Py_ssize_t c_start, c_stop
1228          cdef xmlNode* c_child
1229          cdef xmlNode* c_start_node
1230          _assertValidNode(self)
1231          _assertValidNode(child)
1232          c_child = child._c_node
1233          if c_child.parent is not self._c_node:
1234              raise ValueError, u"Element is not a child of this node."
1235  
1236          # handle the unbounded search straight away (normal case)
1237          if stop is None and (start is None or start == 0):
1238              k = 0
1239              c_child = c_child.prev
1240              while c_child is not NULL:
1241                  if _isElement(c_child):
1242                      k += 1
1243                  c_child = c_child.prev
1244              return k
1245  
1246          # check indices
1247          if start is None:
1248              c_start = 0
1249          else:
1250              c_start = start
1251          if stop is None:
1252              c_stop = 0
1253          else:
1254              c_stop = stop
1255              if c_stop == 0 or \
1256                     c_start >= c_stop and (c_stop > 0 or c_start < 0):
1257                  raise ValueError, u"list.index(x): x not in slice"
1258  
1259          # for negative slice indices, check slice before searching index
1260          if c_start < 0 or c_stop < 0:
1261              # start from right, at most up to leftmost(c_start, c_stop)
1262              if c_start < c_stop:
1263                  k = -c_start
1264              else:
1265                  k = -c_stop
1266              c_start_node = self._c_node.last
1267              l = 1
1268              while c_start_node != c_child and l < k:
1269                  if _isElement(c_start_node):
1270                      l += 1
1271                  c_start_node = c_start_node.prev
1272              if c_start_node == c_child:
1273                  # found! before slice end?
1274                  if c_stop < 0 and l <= -c_stop:
1275                      raise ValueError, u"list.index(x): x not in slice"
1276              elif c_start < 0:
1277                  raise ValueError, u"list.index(x): x not in slice"
1278  
1279          # now determine the index backwards from child
1280          c_child = c_child.prev
1281          k = 0
1282          if c_stop > 0:
1283              # we can optimize: stop after c_stop elements if not found
1284              while c_child != NULL and k < c_stop:
1285                  if _isElement(c_child):
1286                      k += 1
1287                  c_child = c_child.prev
1288              if k < c_stop:
1289                  return k
1290          else:
1291              # traverse all
1292              while c_child != NULL:
1293                  if _isElement(c_child):
1294                      k = k + 1
1295                  c_child = c_child.prev
1296              if c_start > 0:
1297                  if k >= c_start:
1298                      return k
1299              else:
1300                  return k
1301          if c_start != 0 or c_stop != 0:
1302              raise ValueError, u"list.index(x): x not in slice"
1303          else:
1304              raise ValueError, u"list.index(x): x not in list"
1305  
1306      def get(self, key, default=None):
1307          u"""get(self, key, default=None)
1308  
1309          Gets an element attribute.
1310          """
1311          _assertValidNode(self)
1312          return _getAttributeValue(self, key, default)
1313  
1314      def keys(self):
1315          u"""keys(self)
1316  
1317          Gets a list of attribute names.  The names are returned in an
1318          arbitrary order (just like for an ordinary Python dictionary).
1319          """
1320          _assertValidNode(self)
1321          return _collectAttributes(self._c_node, 1)
1322  
1323      def values(self):
1324          u"""values(self)
1325  
1326          Gets element attribute values as a sequence of strings.  The
1327          attributes are returned in an arbitrary order.
1328          """
1329          _assertValidNode(self)
1330          return _collectAttributes(self._c_node, 2)
1331  
1332      def items(self):
1333          u"""items(self)
1334  
1335          Gets element attributes, as a sequence. The attributes are returned in
1336          an arbitrary order.
1337          """
1338          _assertValidNode(self)
1339          return _collectAttributes(self._c_node, 3)
1340  
1341      def getchildren(self):
1342          u"""getchildren(self)
1343  
1344          Returns all direct children.  The elements are returned in document
1345          order.
1346  
1347          :deprecated: Note that this method has been deprecated as of
1348            ElementTree 1.3 and lxml 2.0.  New code should use
1349            ``list(element)`` or simply iterate over elements.
1350          """
1351          _assertValidNode(self)
1352          return _collectChildren(self)
1353  
1354      def getparent(self):
1355          u"""getparent(self)
1356  
1357          Returns the parent of this element or None for the root element.
1358          """
1359          cdef xmlNode* c_node
1360          #_assertValidNode(self) # not needed
1361          c_node = _parentElement(self._c_node)
1362          if c_node is NULL:
1363              return None
1364          return _elementFactory(self._doc, c_node)
1365  
1366      def getnext(self):
1367          u"""getnext(self)
1368  
1369          Returns the following sibling of this element or None.
1370          """
1371          cdef xmlNode* c_node
1372          #_assertValidNode(self) # not needed
1373          c_node = _nextElement(self._c_node)
1374          if c_node is NULL:
1375              return None
1376          return _elementFactory(self._doc, c_node)
1377  
1378      def getprevious(self):
1379          u"""getprevious(self)
1380  
1381          Returns the preceding sibling of this element or None.
1382          """
1383          cdef xmlNode* c_node
1384          #_assertValidNode(self) # not needed
1385          c_node = _previousElement(self._c_node)
1386          if c_node is NULL:
1387              return None
1388          return _elementFactory(self._doc, c_node)
1389  
1390      def itersiblings(self, tag=None, *tags, preceding=False):
1391          u"""itersiblings(self, tag=None, *tags, preceding=False)
1392  
1393          Iterate over the following or preceding siblings of this element.
1394  
1395          The direction is determined by the 'preceding' keyword which
1396          defaults to False, i.e. forward iteration over the following
1397          siblings.  When True, the iterator yields the preceding
1398          siblings in reverse document order, i.e. starting right before
1399          the current element and going backwards.
1400  
1401          Can be restricted to find only elements with specific tags,
1402          see `iter`.
1403          """
1404          if preceding:
1405              if self._c_node and not self._c_node.prev:
1406                  return ITER_EMPTY
1407          elif self._c_node and not self._c_node.next:
1408              return ITER_EMPTY
1409          if tag is not None:
1410              tags += (tag,)
1411          return SiblingsIterator(self, tags, preceding=preceding)
1412  
1413      def iterancestors(self, tag=None, *tags):
1414          u"""iterancestors(self, tag=None, *tags)
1415  
1416          Iterate over the ancestors of this element (from parent to parent).
1417  
1418          Can be restricted to find only elements with specific tags,
1419          see `iter`.
1420          """
1421          if self._c_node and not self._c_node.parent:
1422              return ITER_EMPTY
1423          if tag is not None:
1424              tags += (tag,)
1425          return AncestorsIterator(self, tags)
1426  
1427      def iterdescendants(self, tag=None, *tags):
1428          u"""iterdescendants(self, tag=None, *tags)
1429  
1430          Iterate over the descendants of this element in document order.
1431  
1432          As opposed to ``el.iter()``, this iterator does not yield the element
1433          itself.  The returned elements can be restricted to find only elements
1434          with specific tags, see `iter`.
1435          """
1436          if self._c_node and not self._c_node.children:
1437              return ITER_EMPTY
1438          if tag is not None:
1439              tags += (tag,)
1440          return ElementDepthFirstIterator(self, tags, inclusive=False)
1441  
1442      def iterchildren(self, tag=None, *tags, reversed=False):
1443          u"""iterchildren(self, tag=None, *tags, reversed=False)
1444  
1445          Iterate over the children of this element.
1446  
1447          As opposed to using normal iteration on this element, the returned
1448          elements can be reversed with the 'reversed' keyword and restricted
1449          to find only elements with specific tags, see `iter`.
1450          """
1451          if self._c_node and not self._c_node.children:
1452              return ITER_EMPTY
1453          if tag is not None:
1454              tags += (tag,)
1455          return ElementChildIterator(self, tags, reversed=reversed)
1456  
1457      def getroottree(self):
1458          u"""getroottree(self)
1459  
1460          Return an ElementTree for the root node of the document that
1461          contains this element.
1462  
1463          This is the same as following element.getparent() up the tree until it
1464          returns None (for the root element) and then build an ElementTree for
1465          the last parent that was returned."""
1466          _assertValidDoc(self._doc)
1467          return _elementTreeFactory(self._doc, None)
1468  
1469      def getiterator(self, tag=None, *tags):
1470          u"""getiterator(self, tag=None, *tags)
1471  
1472          Returns a sequence or iterator of all elements in the subtree in
1473          document order (depth first pre-order), starting with this
1474          element.
1475  
1476          Can be restricted to find only elements with specific tags,
1477          see `iter`.
1478  
1479          :deprecated: Note that this method is deprecated as of
1480            ElementTree 1.3 and lxml 2.0.  It returns an iterator in
1481            lxml, which diverges from the original ElementTree
1482            behaviour.  If you want an efficient iterator, use the
1483            ``element.iter()`` method instead.  You should only use this
1484            method in new code if you require backwards compatibility
1485            with older versions of lxml or ElementTree.
1486          """
1487          if tag is not None:
1488              tags += (tag,)
1489          return ElementDepthFirstIterator(self, tags)
1490  
1491      def iter(self, tag=None, *tags):
1492          u"""iter(self, tag=None, *tags)
1493  
1494          Iterate over all elements in the subtree in document order (depth
1495          first pre-order), starting with this element.
1496  
1497          Can be restricted to find only elements with specific tags:
1498          pass ``"{ns}localname"`` as tag. Either or both of ``ns`` and
1499          ``localname`` can be ``*`` for a wildcard; ``ns`` can be empty
1500          for no namespace. ``"localname"`` is equivalent to ``"{}localname"``
1501          (i.e. no namespace) but ``"*"`` is ``"{*}*"`` (any or no namespace),
1502          not ``"{}*"``.
1503  
1504          You can also pass the Element, Comment, ProcessingInstruction and
1505          Entity factory functions to look only for the specific element type.
1506  
1507          Passing multiple tags (or a sequence of tags) instead of a single tag
1508          will let the iterator return all elements matching any of these tags,
1509          in document order.
1510          """
1511          if tag is not None:
1512              tags += (tag,)
1513          return ElementDepthFirstIterator(self, tags)
1514  
1515      def itertext(self, tag=None, *tags, with_tail=True):
1516          u"""itertext(self, tag=None, *tags, with_tail=True)
1517  
1518          Iterates over the text content of a subtree.
1519  
1520          You can pass tag names to restrict text content to specific elements,
1521          see `iter`.
1522  
1523          You can set the ``with_tail`` keyword argument to ``False`` to skip
1524          over tail text.
1525          """
1526          if tag is not None:
1527              tags += (tag,)
1528          return ElementTextIterator(self, tags, with_tail=with_tail)
1529  
1530      def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
1531          u"""makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
1532  
1533          Creates a new element associated with the same document.
1534          """
1535          _assertValidDoc(self._doc)
1536          return _makeElement(_tag, NULL, self._doc, None, None, None,
1537                              attrib, nsmap, _extra)
1538  
1539      def find(self, path, namespaces=None):
1540          u"""find(self, path, namespaces=None)
1541  
1542          Finds the first matching subelement, by tag name or path.
1543  
1544          The optional ``namespaces`` argument accepts a
1545          prefix-to-namespace mapping that allows the usage of XPath
1546          prefixes in the path expression.
1547          """
1548          if isinstance(path, QName):
1549              path = (<QName>path).text
1550          return _elementpath.find(self, path, namespaces)
1551  
1552      def findtext(self, path, default=None, namespaces=None):
1553          u"""findtext(self, path, default=None, namespaces=None)
1554  
1555          Finds text for the first matching subelement, by tag name or path.
1556  
1557          The optional ``namespaces`` argument accepts a
1558          prefix-to-namespace mapping that allows the usage of XPath
1559          prefixes in the path expression.
1560          """
1561          if isinstance(path, QName):
1562              path = (<QName>path).text
1563          return _elementpath.findtext(self, path, default, namespaces)
1564  
1565      def findall(self, path, namespaces=None):
1566          u"""findall(self, path, namespaces=None)
1567  
1568          Finds all matching subelements, by tag name or path.
1569  
1570          The optional ``namespaces`` argument accepts a
1571          prefix-to-namespace mapping that allows the usage of XPath
1572          prefixes in the path expression.
1573          """
1574          if isinstance(path, QName):
1575              path = (<QName>path).text
1576          return _elementpath.findall(self, path, namespaces)
1577  
1578      def iterfind(self, path, namespaces=None):
1579          u"""iterfind(self, path, namespaces=None)
1580  
1581          Iterates over all matching subelements, by tag name or path.
1582  
1583          The optional ``namespaces`` argument accepts a
1584          prefix-to-namespace mapping that allows the usage of XPath
1585          prefixes in the path expression.
1586          """
1587          if isinstance(path, QName):
1588              path = (<QName>path).text
1589          return _elementpath.iterfind(self, path, namespaces)
1590  
1591      def xpath(self, _path, *, namespaces=None, extensions=None,
1592                smart_strings=True, **_variables):
1593          u"""xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables)
1594  
1595          Evaluate an xpath expression using the element as context node.
1596          """
1597          evaluator = XPathElementEvaluator(self, namespaces=namespaces,
1598                                            extensions=extensions,
1599                                            smart_strings=smart_strings)
1600          return evaluator(_path, **_variables)
1601  
1602      def cssselect(self, expr, *, translator='xml'):
1603          """
1604          Run the CSS expression on this element and its children,
1605          returning a list of the results.
1606  
1607          Equivalent to lxml.cssselect.CSSSelect(expr)(self) -- note
1608          that pre-compiling the expression can provide a substantial
1609          speedup.
1610          """
1611          # Do the import here to make the dependency optional.
1612          from lxml.cssselect import CSSSelector
1613          return CSSSelector(expr, translator=translator)(self)
1614  
1615  
1616  cdef extern from "includes/etree_defs.h":
1617      # macro call to 't->tp_new()' for fast instantiation
1618      cdef object NEW_ELEMENT "PY_NEW" (object t)
1619  
1620  
1621  @cython.linetrace(False)
1622  cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
1623      cdef _Element result
1624      result = getProxy(c_node)
1625      if result is not None:
1626          return result
1627      if c_node is NULL:
1628          return None
1629  
1630      element_class = LOOKUP_ELEMENT_CLASS(
1631          ELEMENT_CLASS_LOOKUP_STATE, doc, c_node)
1632      if hasProxy(c_node):
1633          # prevent re-entry race condition - we just called into Python
1634          return getProxy(c_node)
1635      result = NEW_ELEMENT(element_class)
1636      if hasProxy(c_node):
1637          # prevent re-entry race condition - we just called into Python
1638          result._c_node = NULL
1639          return getProxy(c_node)
1640  
1641      _registerProxy(result, doc, c_node)
1642      if element_class is not _Element:
1643          result._init()
1644      return result
1645  
1646  
1647  @cython.internal
1648  cdef class __ContentOnlyElement(_Element):
1649      cdef int _raiseImmutable(self) except -1:
1650          raise TypeError, u"this element does not have children or attributes"
1651  
1652      def set(self, key, value):
1653          u"set(self, key, value)"
1654          self._raiseImmutable()
1655  
1656      def append(self, value):
1657          u"append(self, value)"
1658          self._raiseImmutable()
1659  
1660      def insert(self, index, value):
1661          u"insert(self, index, value)"
1662          self._raiseImmutable()
1663  
1664      def __setitem__(self, index, value):
1665          u"__setitem__(self, index, value)"
1666          self._raiseImmutable()
1667  
1668      @property
1669      def attrib(self):
1670          return IMMUTABLE_EMPTY_MAPPING
1671  
1672      property text:
1673          def __get__(self):
1674              _assertValidNode(self)
1675              return funicodeOrEmpty(self._c_node.content)
1676  
1677          def __set__(self, value):
1678              cdef tree.xmlDict* c_dict
1679              _assertValidNode(self)
1680              if value is None:
1681                  c_text = <const_xmlChar*>NULL
1682              else:
1683                  value = _utf8(value)
1684                  c_text = _xcstr(value)
1685              tree.xmlNodeSetContent(self._c_node, c_text)
1686  
1687      # ACCESSORS
1688      def __getitem__(self, x):
1689          u"__getitem__(self, x)"
1690          if isinstance(x, slice):
1691              return []
1692          else:
1693              raise IndexError, u"list index out of range"
1694  
1695      def __len__(self):
1696          u"__len__(self)"
1697          return 0
1698  
1699      def get(self, key, default=None):
1700          u"get(self, key, default=None)"
1701          return None
1702  
1703      def keys(self):
1704          u"keys(self)"
1705          return []
1706  
1707      def items(self):
1708          u"items(self)"
1709          return []
1710  
1711      def values(self):
1712          u"values(self)"
1713          return []
1714  
1715  cdef class _Comment(__ContentOnlyElement):
1716      @property
1717      def tag(self):
1718          return Comment
1719  
1720      def __repr__(self):
1721          return "<!--%s-->" % strrepr(self.text)
1722  
1723  cdef class _ProcessingInstruction(__ContentOnlyElement):
1724      @property
1725      def tag(self):
1726          return ProcessingInstruction
1727  
1728      property target:
1729          # not in ElementTree
1730          def __get__(self):
1731              _assertValidNode(self)
1732              return funicode(self._c_node.name)
1733  
1734          def __set__(self, value):
1735              _assertValidNode(self)
1736              value = _utf8(value)
1737              c_text = _xcstr(value)
1738              tree.xmlNodeSetName(self._c_node, c_text)
1739  
1740      def __repr__(self):
1741          text = self.text
1742          if text:
1743              return "<?%s %s?>" % (strrepr(self.target),
1744                                    strrepr(text))
1745          else:
1746              return "<?%s?>" % strrepr(self.target)
1747  
1748      def get(self, key, default=None):
1749          u"""get(self, key, default=None)
1750  
1751          Try to parse pseudo-attributes from the text content of the
1752          processing instruction, search for one with the given key as
1753          name and return its associated value.
1754  
1755          Note that this is only a convenience method for the most
1756          common case that all text content is structured in
1757          attribute-like name-value pairs with properly quoted values.
1758          It is not guaranteed to work for all possible text content.
1759          """
1760          return self.attrib.get(key, default)
1761  
1762      @property
1763      def attrib(self):
1764          """Returns a dict containing all pseudo-attributes that can be
1765          parsed from the text content of this processing instruction.
1766          Note that modifying the dict currently has no effect on the
1767          XML node, although this is not guaranteed to stay this way.
1768          """
1769          return { attr : (value1 or value2)
1770                   for attr, value1, value2 in _FIND_PI_ATTRIBUTES(u' ' + self.text) }
1771  
1772  cdef object _FIND_PI_ATTRIBUTES = re.compile(ur'\s+(\w+)\s*=\s*(?:\'([^\']*)\'|"([^"]*)")', re.U).findall
1773  
1774  cdef class _Entity(__ContentOnlyElement):
1775      @property
1776      def tag(self):
1777          return Entity
1778  
1779      property name:
1780          # not in ElementTree
1781          def __get__(self):
1782              _assertValidNode(self)
1783              return funicode(self._c_node.name)
1784  
1785          def __set__(self, value):
1786              _assertValidNode(self)
1787              value_utf = _utf8(value)
1788              if b'&' in value_utf or b';' in value_utf:
1789                  raise ValueError, f"Invalid entity name '{value}'"
1790              tree.xmlNodeSetName(self._c_node, _xcstr(value_utf))
1791  
1792      @property
1793      def text(self):
1794          # FIXME: should this be None or '&[VALUE];' or the resolved
1795          # entity value ?
1796          _assertValidNode(self)
1797          return f'&{funicode(self._c_node.name)};'
1798  
1799      def __repr__(self):
1800          return "&%s;" % strrepr(self.name)
1801  
1802  
1803  cdef class QName:
1804      u"""QName(text_or_uri_or_element, tag=None)
1805  
1806      QName wrapper for qualified XML names.
1807  
1808      Pass a tag name by itself or a namespace URI and a tag name to
1809      create a qualified name.  Alternatively, pass an Element to
1810      extract its tag name.  ``None`` as first argument is ignored in
1811      order to allow for generic 2-argument usage.
1812  
1813      The ``text`` property holds the qualified name in
1814      ``{namespace}tagname`` notation.  The ``namespace`` and
1815      ``localname`` properties hold the respective parts of the tag
1816      name.
1817  
1818      You can pass QName objects wherever a tag name is expected.  Also,
1819      setting Element text from a QName will resolve the namespace prefix
1820      on assignment and set a qualified text value.  This is helpful in XML
1821      languages like SOAP or XML-Schema that use prefixed tag names in
1822      their text content.
1823      """
1824      cdef readonly unicode text
1825      cdef readonly unicode localname
1826      cdef readonly unicode namespace
1827      def __init__(self, text_or_uri_or_element, tag=None):
1828          if text_or_uri_or_element is None:
1829              # Allow None as no namespace.
1830              text_or_uri_or_element, tag = tag, None
1831          if not _isString(text_or_uri_or_element):
1832              if isinstance(text_or_uri_or_element, _Element):
1833                  text_or_uri_or_element = (<_Element>text_or_uri_or_element).tag
1834                  if not _isString(text_or_uri_or_element):
1835                      raise ValueError, f"Invalid input tag of type {type(text_or_uri_or_element)!r}"
1836              elif isinstance(text_or_uri_or_element, QName):
1837                  text_or_uri_or_element = (<QName>text_or_uri_or_element).text
1838              elif text_or_uri_or_element is not None:
1839                  text_or_uri_or_element = unicode(text_or_uri_or_element)
1840              else:
1841                  raise ValueError, f"Invalid input tag of type {type(text_or_uri_or_element)!r}"
1842  
1843          ns_utf, tag_utf = _getNsTag(text_or_uri_or_element)
1844          if tag is not None:
1845              # either ('ns', 'tag') or ('{ns}oldtag', 'newtag')
1846              if ns_utf is None:
1847                  ns_utf = tag_utf # case 1: namespace ended up as tag name
1848              tag_utf = _utf8(tag)
1849          _tagValidOrRaise(tag_utf)
1850          self.localname = (<bytes>tag_utf).decode('utf8')
1851          if ns_utf is None:
1852              self.namespace = None
1853              self.text = self.localname
1854          else:
1855              self.namespace = (<bytes>ns_utf).decode('utf8')
1856              self.text = u"{%s}%s" % (self.namespace, self.localname)
1857      def __str__(self):
1858          return self.text
1859      def __hash__(self):
1860          return hash(self.text)
1861      def __richcmp__(self, other, int op):
1862          try:
1863              if type(other) is QName:
1864                  other = (<QName>other).text
1865              elif not isinstance(other, unicode):
1866                  other = unicode(other)
1867          except (ValueError, UnicodeDecodeError):
1868              return NotImplemented
1869          return python.PyObject_RichCompare(self.text, other, op)
1870  
1871  
1872  cdef public class _ElementTree [ type LxmlElementTreeType,
1873                                   object LxmlElementTree ]:
1874      cdef _Document _doc
1875      cdef _Element _context_node
1876  
1877      # Note that _doc is only used to store the original document if we do not
1878      # have a _context_node.  All methods should prefer self._context_node._doc
1879      # to honour tree restructuring.  _doc can happily be None!
1880  
1881      @cython.final
1882      cdef int _assertHasRoot(self) except -1:
1883          u"""We have to take care here: the document may not have a root node!
1884          This can happen if ElementTree() is called without any argument and
1885          the caller 'forgets' to call parse() afterwards, so this is a bug in
1886          the caller program.
1887          """
1888          assert self._context_node is not None, \
1889                 u"ElementTree not initialized, missing root"
1890          return 0
1891  
1892      def parse(self, source, _BaseParser parser=None, *, base_url=None):
1893          u"""parse(self, source, parser=None, base_url=None)
1894  
1895          Updates self with the content of source and returns its root.
1896          """
1897          cdef _Document doc = None
1898          try:
1899              doc = _parseDocument(source, parser, base_url)
1900          except _TargetParserResult as result_container:
1901              # raises a TypeError if we don't get an _Element
1902              self._context_node = result_container.result
1903          else:
1904              self._context_node = doc.getroot()
1905          self._doc = None if self._context_node is not None else doc
1906          return self._context_node
1907  
1908      def _setroot(self, _Element root not None):
1909          u"""_setroot(self, root)
1910  
1911          Relocate the ElementTree to a new root node.
1912          """
1913          _assertValidNode(root)
1914          if root._c_node.type != tree.XML_ELEMENT_NODE:
1915              raise TypeError, u"Only elements can be the root of an ElementTree"
1916          self._context_node = root
1917          self._doc = None
1918  
1919      def getroot(self):
1920          u"""getroot(self)
1921  
1922          Gets the root element for this tree.
1923          """
1924          return self._context_node
1925  
1926      def __copy__(self):
1927          return _elementTreeFactory(self._doc, self._context_node)
1928  
1929      def __deepcopy__(self, memo):
1930          cdef _Element root
1931          cdef _Document doc
1932          cdef xmlDoc* c_doc
1933          if self._context_node is not None:
1934              root = self._context_node.__copy__()
1935              assert root is not None
1936              _assertValidNode(root)
1937              _copyNonElementSiblings(self._context_node._c_node, root._c_node)
1938              return _elementTreeFactory(None, root)
1939          elif self._doc is not None:
1940              _assertValidDoc(self._doc)
1941              c_doc = tree.xmlCopyDoc(self._doc._c_doc, 1)
1942              if c_doc is NULL:
1943                  raise MemoryError()
1944              doc = _documentFactory(c_doc, self._doc._parser)
1945              return _elementTreeFactory(doc, None)
1946          else:
1947              # so what ...
1948              return self
1949  
1950      # not in ElementTree
1951      @property
1952      def docinfo(self) -> DocInfo:
1953          """Information about the document provided by parser and DTD."""
1954          self._assertHasRoot()
1955          return DocInfo(self._context_node._doc)
1956  
1957      # not in ElementTree, read-only
1958      @property
1959      def parser(self):
1960          """The parser that was used to parse the document in this ElementTree.
1961          """
1962          if self._context_node is not None and \
1963                 self._context_node._doc is not None:
1964              return self._context_node._doc._parser
1965          if self._doc is not None:
1966              return self._doc._parser
1967          return None
1968  
1969      def write(self, file, *, encoding=None, method="xml",
1970                bint pretty_print=False, xml_declaration=None, bint with_tail=True,
1971                standalone=None, doctype=None, compression=0,
1972                bint exclusive=False, inclusive_ns_prefixes=None,
1973                bint with_comments=True, bint strip_text=False,
1974                docstring=None):
1975          u"""write(self, file, encoding=None, method="xml",
1976                    pretty_print=False, xml_declaration=None, with_tail=True,
1977                    standalone=None, doctype=None, compression=0,
1978                    exclusive=False, inclusive_ns_prefixes=None,
1979                    with_comments=True, strip_text=False)
1980  
1981          Write the tree to a filename, file or file-like object.
1982  
1983          Defaults to ASCII encoding and writing a declaration as needed.
1984  
1985          The keyword argument 'method' selects the output method:
1986          'xml', 'html', 'text' or 'c14n'.  Default is 'xml'.
1987  
1988          With ``method="c14n"`` (C14N version 1), the options ``exclusive``,
1989          ``with_comments`` and ``inclusive_ns_prefixes`` request exclusive
1990          C14N, include comments, and list the inclusive prefixes respectively.
1991  
1992          With ``method="c14n2"`` (C14N version 2), the ``with_comments`` and
1993          ``strip_text`` options control the output of comments and text space
1994          according to C14N 2.0.
1995  
1996          Passing a boolean value to the ``standalone`` option will
1997          output an XML declaration with the corresponding
1998          ``standalone`` flag.
1999  
2000          The ``doctype`` option allows passing in a plain string that will
2001          be serialised before the XML tree.  Note that passing in non
2002          well-formed content here will make the XML output non well-formed.
2003          Also, an existing doctype in the document tree will not be removed
2004          when serialising an ElementTree instance.
2005  
2006          The ``compression`` option enables GZip compression level 1-9.
2007  
2008          The ``inclusive_ns_prefixes`` should be a list of namespace strings
2009          (i.e. ['xs', 'xsi']) that will be promoted to the top-level element
2010          during exclusive C14N serialisation.  This parameter is ignored if
2011          exclusive mode=False.
2012  
2013          If exclusive=True and no list is provided, a namespace will only be
2014          rendered if it is used by the immediate parent or one of its attributes
2015          and its prefix and values have not already been rendered by an ancestor
2016          of the namespace node's parent element.
2017          """
2018          cdef bint write_declaration
2019          cdef int is_standalone
2020  
2021          self._assertHasRoot()
2022          _assertValidNode(self._context_node)
2023          if compression is None or compression < 0:
2024              compression = 0
2025  
2026          # C14N serialisation
2027          if method in ('c14n', 'c14n2'):
2028              if encoding is not None:
2029                  raise ValueError("Cannot specify encoding with C14N")
2030              if xml_declaration:
2031                  raise ValueError("Cannot enable XML declaration in C14N")
2032  
2033              if method == 'c14n':
2034                  _tofilelikeC14N(file, self._context_node, exclusive, with_comments,
2035                                  compression, inclusive_ns_prefixes)
2036              else:  # c14n2
2037                  with _open_utf8_file(file, compression=compression) as f:
2038                      target = C14NWriterTarget(
2039                          f.write, with_comments=with_comments, strip_text=strip_text)
2040                      _tree_to_target(self, target)
2041              return
2042  
2043          if not with_comments:
2044              raise ValueError("Can only discard comments in C14N serialisation")
2045          # suppress decl. in default case (purely for ElementTree compatibility)
2046          if xml_declaration is not None:
2047              write_declaration = xml_declaration
2048              if encoding is None:
2049                  encoding = 'ASCII'
2050              else:
2051                  encoding = encoding.upper()
2052          elif encoding is None:
2053              encoding = 'ASCII'
2054              write_declaration = 0
2055          else:
2056              encoding = encoding.upper()
2057              write_declaration = encoding not in (
2058                  'US-ASCII', 'ASCII', 'UTF8', 'UTF-8')
2059          if standalone is None:
2060              is_standalone = -1
2061          elif standalone:
2062              write_declaration = 1
2063              is_standalone = 1
2064          else:
2065              write_declaration = 1
2066              is_standalone = 0
2067  
2068          if docstring is not None and doctype is None:
2069              import warnings
2070              warnings.warn(
2071                  "The 'docstring' option is deprecated. Use 'doctype' instead.",
2072                  DeprecationWarning)
2073              doctype = docstring
2074  
2075          _tofilelike(file, self._context_node, encoding, doctype, method,
2076                      write_declaration, 1, pretty_print, with_tail,
2077                      is_standalone, compression)
2078  
2079      def getpath(self, _Element element not None):
2080          u"""getpath(self, element)
2081  
2082          Returns a structural, absolute XPath expression to find the element.
2083  
2084          For namespaced elements, the expression uses prefixes from the
2085          document, which therefore need to be provided in order to make any
2086          use of the expression in XPath.
2087  
2088          Also see the method getelementpath(self, element), which returns a
2089          self-contained ElementPath expression.
2090          """
2091          cdef _Document doc
2092          cdef _Element root
2093          cdef xmlDoc* c_doc
2094          _assertValidNode(element)
2095          if self._context_node is not None:
2096              root = self._context_node
2097              doc = root._doc
2098          elif self._doc is not None:
2099              doc = self._doc
2100              root = doc.getroot()
2101          else:
2102              raise ValueError, u"Element is not in this tree."
2103          _assertValidDoc(doc)
2104          _assertValidNode(root)
2105          if element._doc is not doc:
2106              raise ValueError, u"Element is not in this tree."
2107  
2108          c_doc = _fakeRootDoc(doc._c_doc, root._c_node)
2109          c_path = tree.xmlGetNodePath(element._c_node)
2110          _destroyFakeDoc(doc._c_doc, c_doc)
2111          if c_path is NULL:
2112              raise MemoryError()
2113          path = funicode(c_path)
2114          tree.xmlFree(c_path)
2115          return path
2116  
2117      def getelementpath(self, _Element element not None):
2118          u"""getelementpath(self, element)
2119  
2120          Returns a structural, absolute ElementPath expression to find the
2121          element.  This path can be used in the .find() method to look up
2122          the element, provided that the elements along the path and their
2123          list of immediate children were not modified in between.
2124  
2125          ElementPath has the advantage over an XPath expression (as returned
2126          by the .getpath() method) that it does not require additional prefix
2127          declarations.  It is always self-contained.
2128          """
2129          cdef _Element root
2130          cdef Py_ssize_t count
2131          _assertValidNode(element)
2132          if element._c_node.type != tree.XML_ELEMENT_NODE:
2133              raise ValueError, u"input is not an Element"
2134          if self._context_node is not None:
2135              root = self._context_node
2136          elif self._doc is not None:
2137              root = self._doc.getroot()
2138          else:
2139              raise ValueError, u"Element is not in this tree"
2140          _assertValidNode(root)
2141          if element._doc is not root._doc:
2142              raise ValueError, u"Element is not in this tree"
2143  
2144          path = []
2145          c_element = element._c_node
2146          while c_element is not root._c_node:
2147              c_name = c_element.name
2148              c_href = _getNs(c_element)
2149              tag = _namespacedNameFromNsName(c_href, c_name)
2150              if c_href is NULL:
2151                  c_href = <const_xmlChar*>b''  # no namespace (NULL is wildcard)
2152              # use tag[N] if there are preceding siblings with the same tag
2153              count = 0
2154              c_node = c_element.prev
2155              while c_node is not NULL:
2156                  if c_node.type == tree.XML_ELEMENT_NODE:
2157                      if _tagMatches(c_node, c_href, c_name):
2158                          count += 1
2159                  c_node = c_node.prev
2160              if count:
2161                  tag = f'{tag}[{count+1}]'
2162              else:
2163                  # use tag[1] if there are following siblings with the same tag
2164                  c_node = c_element.next
2165                  while c_node is not NULL:
2166                      if c_node.type == tree.XML_ELEMENT_NODE:
2167                          if _tagMatches(c_node, c_href, c_name):
2168                              tag += '[1]'
2169                              break
2170                      c_node = c_node.next
2171  
2172              path.append(tag)
2173              c_element = c_element.parent
2174              if c_element is NULL or c_element.type != tree.XML_ELEMENT_NODE:
2175                  raise ValueError, u"Element is not in this tree."
2176          if not path:
2177              return '.'
2178          path.reverse()
2179          return '/'.join(path)
2180  
2181      def getiterator(self, tag=None, *tags):
2182          u"""getiterator(self, *tags, tag=None)
2183  
2184          Returns a sequence or iterator of all elements in document order
2185          (depth first pre-order), starting with the root element.
2186  
2187          Can be restricted to find only elements with specific tags,
2188          see `_Element.iter`.
2189  
2190          :deprecated: Note that this method is deprecated as of
2191            ElementTree 1.3 and lxml 2.0.  It returns an iterator in
2192            lxml, which diverges from the original ElementTree
2193            behaviour.  If you want an efficient iterator, use the
2194            ``tree.iter()`` method instead.  You should only use this
2195            method in new code if you require backwards compatibility
2196            with older versions of lxml or ElementTree.
2197          """
2198          root = self.getroot()
2199          if root is None:
2200              return ITER_EMPTY
2201          if tag is not None:
2202              tags += (tag,)
2203          return root.getiterator(*tags)
2204  
2205      def iter(self, tag=None, *tags):
2206          u"""iter(self, tag=None, *tags)
2207  
2208          Creates an iterator for the root element.  The iterator loops over
2209          all elements in this tree, in document order.  Note that siblings
2210          of the root element (comments or processing instructions) are not
2211          returned by the iterator.
2212  
2213          Can be restricted to find only elements with specific tags,
2214          see `_Element.iter`.
2215          """
2216          root = self.getroot()
2217          if root is None:
2218              return ITER_EMPTY
2219          if tag is not None:
2220              tags += (tag,)
2221          return root.iter(*tags)
2222  
2223      def find(self, path, namespaces=None):
2224          u"""find(self, path, namespaces=None)
2225  
2226          Finds the first toplevel element with given tag.  Same as
2227          ``tree.getroot().find(path)``.
2228  
2229          The optional ``namespaces`` argument accepts a
2230          prefix-to-namespace mapping that allows the usage of XPath
2231          prefixes in the path expression.
2232          """
2233          self._assertHasRoot()
2234          root = self.getroot()
2235          if _isString(path):
2236              if path[:1] == "/":
2237                  path = "." + path
2238          return root.find(path, namespaces)
2239  
2240      def findtext(self, path, default=None, namespaces=None):
2241          u"""findtext(self, path, default=None, namespaces=None)
2242  
2243          Finds the text for the first element matching the ElementPath
2244          expression.  Same as getroot().findtext(path)
2245  
2246          The optional ``namespaces`` argument accepts a
2247          prefix-to-namespace mapping that allows the usage of XPath
2248          prefixes in the path expression.
2249          """
2250          self._assertHasRoot()
2251          root = self.getroot()
2252          if _isString(path):
2253              if path[:1] == "/":
2254                  path = "." + path
2255          return root.findtext(path, default, namespaces)
2256  
2257      def findall(self, path, namespaces=None):
2258          u"""findall(self, path, namespaces=None)
2259  
2260          Finds all elements matching the ElementPath expression.  Same as
2261          getroot().findall(path).
2262  
2263          The optional ``namespaces`` argument accepts a
2264          prefix-to-namespace mapping that allows the usage of XPath
2265          prefixes in the path expression.
2266          """
2267          self._assertHasRoot()
2268          root = self.getroot()
2269          if _isString(path):
2270              if path[:1] == "/":
2271                  path = "." + path
2272          return root.findall(path, namespaces)
2273  
2274      def iterfind(self, path, namespaces=None):
2275          u"""iterfind(self, path, namespaces=None)
2276  
2277          Iterates over all elements matching the ElementPath expression.
2278          Same as getroot().iterfind(path).
2279  
2280          The optional ``namespaces`` argument accepts a
2281          prefix-to-namespace mapping that allows the usage of XPath
2282          prefixes in the path expression.
2283          """
2284          self._assertHasRoot()
2285          root = self.getroot()
2286          if _isString(path):
2287              if path[:1] == "/":
2288                  path = "." + path
2289          return root.iterfind(path, namespaces)
2290  
2291      def xpath(self, _path, *, namespaces=None, extensions=None,
2292                smart_strings=True, **_variables):
2293          u"""xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables)
2294  
2295          XPath evaluate in context of document.
2296  
2297          ``namespaces`` is an optional dictionary with prefix to namespace URI
2298          mappings, used by XPath.  ``extensions`` defines additional extension
2299          functions.
2300  
2301          Returns a list (nodeset), or bool, float or string.
2302  
2303          In case of a list result, return Element for element nodes,
2304          string for text and attribute values.
2305  
2306          Note: if you are going to apply multiple XPath expressions
2307          against the same document, it is more efficient to use
2308          XPathEvaluator directly.
2309          """
2310          self._assertHasRoot()
2311          evaluator = XPathDocumentEvaluator(self, namespaces=namespaces,
2312                                             extensions=extensions,
2313                                             smart_strings=smart_strings)
2314          return evaluator(_path, **_variables)
2315  
2316      def xslt(self, _xslt, extensions=None, access_control=None, **_kw):
2317          u"""xslt(self, _xslt, extensions=None, access_control=None, **_kw)
2318  
2319          Transform this document using other document.
2320  
2321          xslt is a tree that should be XSLT
2322          keyword parameters are XSLT transformation parameters.
2323  
2324          Returns the transformed tree.
2325  
2326          Note: if you are going to apply the same XSLT stylesheet against
2327          multiple documents, it is more efficient to use the XSLT
2328          class directly.
2329          """
2330          self._assertHasRoot()
2331          style = XSLT(_xslt, extensions=extensions,
2332                       access_control=access_control)
2333          return style(self, **_kw)
2334  
2335      def relaxng(self, relaxng):
2336          u"""relaxng(self, relaxng)
2337  
2338          Validate this document using other document.
2339  
2340          The relaxng argument is a tree that should contain a Relax NG schema.
2341  
2342          Returns True or False, depending on whether validation
2343          succeeded.
2344  
2345          Note: if you are going to apply the same Relax NG schema against
2346          multiple documents, it is more efficient to use the RelaxNG
2347          class directly.
2348          """
2349          self._assertHasRoot()
2350          schema = RelaxNG(relaxng)
2351          return schema.validate(self)
2352  
2353      def xmlschema(self, xmlschema):
2354          u"""xmlschema(self, xmlschema)
2355  
2356          Validate this document using other document.
2357  
2358          The xmlschema argument is a tree that should contain an XML Schema.
2359  
2360          Returns True or False, depending on whether validation
2361          succeeded.
2362  
2363          Note: If you are going to apply the same XML Schema against
2364          multiple documents, it is more efficient to use the XMLSchema
2365          class directly.
2366          """
2367          self._assertHasRoot()
2368          schema = XMLSchema(xmlschema)
2369          return schema.validate(self)
2370  
2371      def xinclude(self):
2372          u"""xinclude(self)
2373  
2374          Process the XInclude nodes in this document and include the
2375          referenced XML fragments.
2376  
2377          There is support for loading files through the file system, HTTP and
2378          FTP.
2379  
2380          Note that XInclude does not support custom resolvers in Python space
2381          due to restrictions of libxml2 <= 2.6.29.
2382          """
2383          self._assertHasRoot()
2384          XInclude()(self._context_node)
2385  
2386      def write_c14n(self, file, *, bint exclusive=False, bint with_comments=True,
2387                     compression=0, inclusive_ns_prefixes=None):
2388          u"""write_c14n(self, file, exclusive=False, with_comments=True,
2389                         compression=0, inclusive_ns_prefixes=None)
2390  
2391          C14N write of document. Always writes UTF-8.
2392  
2393          The ``compression`` option enables GZip compression level 1-9.
2394  
2395          The ``inclusive_ns_prefixes`` should be a list of namespace strings
2396          (i.e. ['xs', 'xsi']) that will be promoted to the top-level element
2397          during exclusive C14N serialisation.  This parameter is ignored if
2398          exclusive mode=False.
2399  
2400          If exclusive=True and no list is provided, a namespace will only be
2401          rendered if it is used by the immediate parent or one of its attributes
2402          and its prefix and values have not already been rendered by an ancestor
2403          of the namespace node's parent element.
2404  
2405          NOTE: This method is deprecated as of lxml 4.4 and will be removed in a
2406          future release.  Use ``.write(f, method="c14n")`` instead.
2407          """
2408          self._assertHasRoot()
2409          _assertValidNode(self._context_node)
2410          if compression is None or compression < 0:
2411              compression = 0
2412  
2413          _tofilelikeC14N(file, self._context_node, exclusive, with_comments,
2414                          compression, inclusive_ns_prefixes)
2415  
2416  cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node):
2417      return _newElementTree(doc, context_node, _ElementTree)
2418  
2419  cdef _ElementTree _newElementTree(_Document doc, _Element context_node,
2420                                    object baseclass):
2421      cdef _ElementTree result
2422      result = baseclass()
2423      if context_node is None and doc is not None:
2424          context_node = doc.getroot()
2425      if context_node is None:
2426          _assertValidDoc(doc)
2427          result._doc = doc
2428      else:
2429          _assertValidNode(context_node)
2430      result._context_node = context_node
2431      return result
2432  
2433  
2434  @cython.final
2435  @cython.freelist(16)
2436  cdef class _Attrib:
2437      u"""A dict-like proxy for the ``Element.attrib`` property.
2438      """
2439      cdef _Element _element
2440      def __cinit__(self, _Element element not None):
2441          _assertValidNode(element)
2442          self._element = element
2443  
2444      # MANIPULATORS
2445      def __setitem__(self, key, value):
2446          _assertValidNode(self._element)
2447          _setAttributeValue(self._element, key, value)
2448  
2449      def __delitem__(self, key):
2450          _assertValidNode(self._element)
2451          _delAttribute(self._element, key)
2452  
2453      def update(self, sequence_or_dict):
2454          _assertValidNode(self._element)
2455          if isinstance(sequence_or_dict, (dict, _Attrib)):
2456              sequence_or_dict = sequence_or_dict.items()
2457          for key, value in sequence_or_dict:
2458              _setAttributeValue(self._element, key, value)
2459  
2460      def pop(self, key, *default):
2461          if len(default) > 1:
2462              raise TypeError, f"pop expected at most 2 arguments, got {len(default)+1}"
2463          _assertValidNode(self._element)
2464          result = _getAttributeValue(self._element, key, None)
2465          if result is None:
2466              if not default:
2467                  raise KeyError, key
2468              result = default[0]
2469          else:
2470              _delAttribute(self._element, key)
2471          return result
2472  
2473      def clear(self):
2474          _assertValidNode(self._element)
2475          c_attrs = self._element._c_node.properties
2476          if c_attrs:
2477              self._element._c_node.properties = NULL
2478              tree.xmlFreePropList(c_attrs)
2479  
2480      # ACCESSORS
2481      def __repr__(self):
2482          _assertValidNode(self._element)
2483          return repr(dict( _collectAttributes(self._element._c_node, 3) ))
2484  
2485      def __copy__(self):
2486          _assertValidNode(self._element)
2487          return dict(_collectAttributes(self._element._c_node, 3))
2488  
2489      def __deepcopy__(self, memo):
2490          _assertValidNode(self._element)
2491          return dict(_collectAttributes(self._element._c_node, 3))
2492  
2493      def __getitem__(self, key):
2494          _assertValidNode(self._element)
2495          result = _getAttributeValue(self._element, key, None)
2496          if result is None:
2497              raise KeyError, key
2498          return result
2499  
2500      def __bool__(self):
2501          _assertValidNode(self._element)
2502          cdef xmlAttr* c_attr = self._element._c_node.properties
2503          while c_attr is not NULL:
2504              if c_attr.type == tree.XML_ATTRIBUTE_NODE:
2505                  return 1
2506              c_attr = c_attr.next
2507          return 0
2508  
2509      def __len__(self):
2510          _assertValidNode(self._element)
2511          cdef xmlAttr* c_attr = self._element._c_node.properties
2512          cdef Py_ssize_t c = 0
2513          while c_attr is not NULL:
2514              if c_attr.type == tree.XML_ATTRIBUTE_NODE:
2515                  c += 1
2516              c_attr = c_attr.next
2517          return c
2518  
2519      def get(self, key, default=None):
2520          _assertValidNode(self._element)
2521          return _getAttributeValue(self._element, key, default)
2522  
2523      def keys(self):
2524          _assertValidNode(self._element)
2525          return _collectAttributes(self._element._c_node, 1)
2526  
2527      def __iter__(self):
2528          _assertValidNode(self._element)
2529          return iter(_collectAttributes(self._element._c_node, 1))
2530  
2531      def iterkeys(self):
2532          _assertValidNode(self._element)
2533          return iter(_collectAttributes(self._element._c_node, 1))
2534  
2535      def values(self):
2536          _assertValidNode(self._element)
2537          return _collectAttributes(self._element._c_node, 2)
2538  
2539      def itervalues(self):
2540          _assertValidNode(self._element)
2541          return iter(_collectAttributes(self._element._c_node, 2))
2542  
2543      def items(self):
2544          _assertValidNode(self._element)
2545          return _collectAttributes(self._element._c_node, 3)
2546  
2547      def iteritems(self):
2548          _assertValidNode(self._element)
2549          return iter(_collectAttributes(self._element._c_node, 3))
2550  
2551      def has_key(self, key):
2552          _assertValidNode(self._element)
2553          return key in self
2554  
2555      def __contains__(self, key):
2556          _assertValidNode(self._element)
2557          cdef xmlNode* c_node
2558          ns, tag = _getNsTag(key)
2559          c_node = self._element._c_node
2560          c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns)
2561          return 1 if tree.xmlHasNsProp(c_node, _xcstr(tag), c_href) else 0
2562  
2563      def __richcmp__(self, other, int op):
2564          try:
2565              one = dict(self.items())
2566              if not isinstance(other, dict):
2567                  other = dict(other)
2568          except (TypeError, ValueError):
2569              return NotImplemented
2570          return python.PyObject_RichCompare(one, other, op)
2571  
2572  MutableMapping.register(_Attrib)
2573  
2574  
2575  @cython.final
2576  @cython.internal
2577  cdef class _AttribIterator:
2578      u"""Attribute iterator - for internal use only!
2579      """
2580      # XML attributes must not be removed while running!
2581      cdef _Element _node
2582      cdef xmlAttr* _c_attr
2583      cdef int _keysvalues # 1 - keys, 2 - values, 3 - items (key, value)
2584      def __iter__(self):
2585          return self
2586  
2587      def __next__(self):
2588          cdef xmlAttr* c_attr
2589          if self._node is None:
2590              raise StopIteration
2591          c_attr = self._c_attr
2592          while c_attr is not NULL and c_attr.type != tree.XML_ATTRIBUTE_NODE:
2593              c_attr = c_attr.next
2594          if c_attr is NULL:
2595              self._node = None
2596              raise StopIteration
2597  
2598          self._c_attr = c_attr.next
2599          if self._keysvalues == 1:
2600              return _namespacedName(<xmlNode*>c_attr)
2601          elif self._keysvalues == 2:
2602              return _attributeValue(self._node._c_node, c_attr)
2603          else:
2604              return (_namespacedName(<xmlNode*>c_attr),
2605                      _attributeValue(self._node._c_node, c_attr))
2606  
2607  cdef object _attributeIteratorFactory(_Element element, int keysvalues):
2608      cdef _AttribIterator attribs
2609      if element._c_node.properties is NULL:
2610          return ITER_EMPTY
2611      attribs = _AttribIterator()
2612      attribs._node = element
2613      attribs._c_attr = element._c_node.properties
2614      attribs._keysvalues = keysvalues
2615      return attribs
2616  
2617  
2618  cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher,
2619                                         type LxmlElementTagMatcherType ]:
2620      """
2621      Dead but public. :)
2622      """
2623      cdef object _pystrings
2624      cdef int _node_type
2625      cdef char* _href
2626      cdef char* _name
2627      cdef _initTagMatch(self, tag):
2628          self._href = NULL
2629          self._name = NULL
2630          if tag is None:
2631              self._node_type = 0
2632          elif tag is Comment:
2633              self._node_type = tree.XML_COMMENT_NODE
2634          elif tag is ProcessingInstruction:
2635              self._node_type = tree.XML_PI_NODE
2636          elif tag is Entity:
2637              self._node_type = tree.XML_ENTITY_REF_NODE
2638          elif tag is Element:
2639              self._node_type = tree.XML_ELEMENT_NODE
2640          else:
2641              self._node_type = tree.XML_ELEMENT_NODE
2642              self._pystrings = _getNsTag(tag)
2643              if self._pystrings[0] is not None:
2644                  self._href = _cstr(self._pystrings[0])
2645              self._name = _cstr(self._pystrings[1])
2646              if self._name[0] == c'*' and self._name[1] == c'\0':
2647                  self._name = NULL
2648  
2649  cdef public class _ElementIterator(_ElementTagMatcher) [
2650      object LxmlElementIterator, type LxmlElementIteratorType ]:
2651      """
2652      Dead but public. :)
2653      """
2654      # we keep Python references here to control GC
2655      cdef _Element _node
2656      cdef _node_to_node_function _next_element
2657      def __iter__(self):
2658          return self
2659  
2660      cdef void _storeNext(self, _Element node):
2661          cdef xmlNode* c_node
2662          c_node = self._next_element(node._c_node)
2663          while c_node is not NULL and \
2664                    self._node_type != 0 and \
2665                    (<tree.xmlElementType>self._node_type != c_node.type or
2666                     not _tagMatches(c_node, <const_xmlChar*>self._href, <const_xmlChar*>self._name)):
2667              c_node = self._next_element(c_node)
2668          if c_node is NULL:
2669              self._node = None
2670          else:
2671              # Python ref:
2672              self._node = _elementFactory(node._doc, c_node)
2673  
2674      def __next__(self):
2675          cdef xmlNode* c_node
2676          cdef _Element current_node
2677          if self._node is None:
2678              raise StopIteration
2679          # Python ref:
2680          current_node = self._node
2681          self._storeNext(current_node)
2682          return current_node
2683  
2684  @cython.final
2685  @cython.internal
2686  cdef class _MultiTagMatcher:
2687      """
2688      Match an xmlNode against a list of tags.
2689      """
2690      cdef list _py_tags
2691      cdef qname* _cached_tags
2692      cdef size_t _tag_count
2693      cdef size_t _cached_size
2694      cdef _Document _cached_doc
2695      cdef int _node_types
2696  
2697      def __cinit__(self, tags):
2698          self._py_tags = []
2699          self.initTagMatch(tags)
2700  
2701      def __dealloc__(self):
2702          self._clear()
2703  
2704      cdef bint rejectsAll(self):
2705          return not self._tag_count and not self._node_types
2706  
2707      cdef bint rejectsAllAttributes(self):
2708          return not self._tag_count
2709  
2710      cdef bint matchesType(self, int node_type):
2711          if node_type == tree.XML_ELEMENT_NODE and self._tag_count:
2712              return True
2713          return self._node_types & (1 << node_type)
2714  
2715      cdef void _clear(self):
2716          cdef size_t i, count
2717          count = self._tag_count
2718          self._tag_count = 0
2719          if self._cached_tags:
2720              for i in xrange(count):
2721                  cpython.ref.Py_XDECREF(self._cached_tags[i].href)
2722              python.lxml_free(self._cached_tags)
2723              self._cached_tags = NULL
2724  
2725      cdef initTagMatch(self, tags):
2726          self._cached_doc = None
2727          del self._py_tags[:]
2728          self._clear()
2729          if tags is None or tags == ():
2730              # no selection in tags argument => match anything
2731              self._node_types = (
2732                  1 << tree.XML_COMMENT_NODE |
2733                  1 << tree.XML_PI_NODE |
2734                  1 << tree.XML_ENTITY_REF_NODE |
2735                  1 << tree.XML_ELEMENT_NODE)
2736          else:
2737              self._node_types = 0
2738              self._storeTags(tags, set())
2739  
2740      cdef _storeTags(self, tag, set seen):
2741          if tag is Comment:
2742              self._node_types |= 1 << tree.XML_COMMENT_NODE
2743          elif tag is ProcessingInstruction:
2744              self._node_types |= 1 << tree.XML_PI_NODE
2745          elif tag is Entity:
2746              self._node_types |= 1 << tree.XML_ENTITY_REF_NODE
2747          elif tag is Element:
2748              self._node_types |= 1 << tree.XML_ELEMENT_NODE
2749          elif python._isString(tag):
2750              if tag in seen:
2751                  return
2752              seen.add(tag)
2753              if tag in ('*', '{*}*'):
2754                  self._node_types |= 1 << tree.XML_ELEMENT_NODE
2755              else:
2756                  href, name = _getNsTag(tag)
2757                  if name == b'*':
2758                      name = None
2759                  if href is None:
2760                      href = b''  # no namespace
2761                  elif href == b'*':
2762                      href = None  # wildcard: any namespace, including none
2763                  self._py_tags.append((href, name))
2764          elif isinstance(tag, QName):
2765              self._storeTags(tag.text, seen)
2766          else:
2767              # support a sequence of tags
2768              for item in tag:
2769                  self._storeTags(item, seen)
2770  
2771      cdef inline int cacheTags(self, _Document doc, bint force_into_dict=False) except -1:
2772          """
2773          Look up the tag names in the doc dict to enable string pointer comparisons.
2774          """
2775          cdef size_t dict_size = tree.xmlDictSize(doc._c_doc.dict)
2776          if doc is self._cached_doc and dict_size == self._cached_size:
2777              # doc and dict didn't change => names already cached
2778              return 0
2779          self._tag_count = 0
2780          if not self._py_tags:
2781              self._cached_doc = doc
2782              self._cached_size = dict_size
2783              return 0
2784          if not self._cached_tags:
2785              self._cached_tags = <qname*>python.lxml_malloc(len(self._py_tags), sizeof(qname))
2786              if not self._cached_tags:
2787                  self._cached_doc = None
2788                  raise MemoryError()
2789          self._tag_count = <size_t>_mapTagsToQnameMatchArray(
2790              doc._c_doc, self._py_tags, self._cached_tags, force_into_dict)
2791          self._cached_doc = doc
2792          self._cached_size = dict_size
2793          return 0
2794  
2795      cdef inline bint matches(self, xmlNode* c_node):
2796          cdef qname* c_qname
2797          if self._node_types & (1 << c_node.type):
2798              return True
2799          elif c_node.type == tree.XML_ELEMENT_NODE:
2800              for c_qname in self._cached_tags[:self._tag_count]:
2801                  if _tagMatchesExactly(c_node, c_qname):
2802                      return True
2803          return False
2804  
2805      cdef inline bint matchesNsTag(self, const_xmlChar* c_href,
2806                                    const_xmlChar* c_name):
2807          cdef qname* c_qname
2808          if self._node_types & (1 << tree.XML_ELEMENT_NODE):
2809              return True
2810          for c_qname in self._cached_tags[:self._tag_count]:
2811              if _nsTagMatchesExactly(c_href, c_name, c_qname):
2812                  return True
2813          return False
2814  
2815      cdef inline bint matchesAttribute(self, xmlAttr* c_attr):
2816          """Attribute matches differ from Element matches in that they do
2817          not care about node types.
2818          """
2819          cdef qname* c_qname
2820          for c_qname in self._cached_tags[:self._tag_count]:
2821              if _tagMatchesExactly(<xmlNode*>c_attr, c_qname):
2822                  return True
2823          return False
2824  
2825  cdef class _ElementMatchIterator:
2826      cdef _Element _node
2827      cdef _node_to_node_function _next_element
2828      cdef _MultiTagMatcher _matcher
2829  
2830      @cython.final
2831      cdef _initTagMatcher(self, tags):
2832          self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tags)
2833  
2834      def __iter__(self):
2835          return self
2836  
2837      @cython.final
2838      cdef int _storeNext(self, _Element node) except -1:
2839          self._matcher.cacheTags(node._doc)
2840          c_node = self._next_element(node._c_node)
2841          while c_node is not NULL and not self._matcher.matches(c_node):
2842              c_node = self._next_element(c_node)
2843          # store Python ref to next node to make sure it's kept alive
2844          self._node = _elementFactory(node._doc, c_node) if c_node is not NULL else None
2845          return 0
2846  
2847      def __next__(self):
2848          cdef _Element current_node = self._node
2849          if current_node is None:
2850              raise StopIteration
2851          self._storeNext(current_node)
2852          return current_node
2853  
2854  cdef class ElementChildIterator(_ElementMatchIterator):
2855      u"""ElementChildIterator(self, node, tag=None, reversed=False)
2856      Iterates over the children of an element.
2857      """
2858      def __cinit__(self, _Element node not None, tag=None, *, bint reversed=False):
2859          cdef xmlNode* c_node
2860          _assertValidNode(node)
2861          self._initTagMatcher(tag)
2862          if reversed:
2863              c_node = _findChildBackwards(node._c_node, 0)
2864              self._next_element = _previousElement
2865          else:
2866              c_node = _findChildForwards(node._c_node, 0)
2867              self._next_element = _nextElement
2868          self._matcher.cacheTags(node._doc)
2869          while c_node is not NULL and not self._matcher.matches(c_node):
2870              c_node = self._next_element(c_node)
2871          # store Python ref to next node to make sure it's kept alive
2872          self._node = _elementFactory(node._doc, c_node) if c_node is not NULL else None
2873  
2874  cdef class SiblingsIterator(_ElementMatchIterator):
2875      u"""SiblingsIterator(self, node, tag=None, preceding=False)
2876      Iterates over the siblings of an element.
2877  
2878      You can pass the boolean keyword ``preceding`` to specify the direction.
2879      """
2880      def __cinit__(self, _Element node not None, tag=None, *, bint preceding=False):
2881          _assertValidNode(node)
2882          self._initTagMatcher(tag)
2883          if preceding:
2884              self._next_element = _previousElement
2885          else:
2886              self._next_element = _nextElement
2887          self._storeNext(node)
2888  
2889  cdef class AncestorsIterator(_ElementMatchIterator):
2890      u"""AncestorsIterator(self, node, tag=None)
2891      Iterates over the ancestors of an element (from parent to parent).
2892      """
2893      def __cinit__(self, _Element node not None, tag=None):
2894          _assertValidNode(node)
2895          self._initTagMatcher(tag)
2896          self._next_element = _parentElement
2897          self._storeNext(node)
2898  
2899  cdef class ElementDepthFirstIterator:
2900      u"""ElementDepthFirstIterator(self, node, tag=None, inclusive=True)
2901      Iterates over an element and its sub-elements in document order (depth
2902      first pre-order).
2903  
2904      Note that this also includes comments, entities and processing
2905      instructions.  To filter them out, check if the ``tag`` property
2906      of the returned element is a string (i.e. not None and not a
2907      factory function), or pass the ``Element`` factory for the ``tag``
2908      argument to receive only Elements.
2909  
2910      If the optional ``tag`` argument is not None, the iterator returns only
2911      the elements that match the respective name and namespace.
2912  
2913      The optional boolean argument 'inclusive' defaults to True and can be set
2914      to False to exclude the start element itself.
2915  
2916      Note that the behaviour of this iterator is completely undefined if the
2917      tree it traverses is modified during iteration.
2918      """
2919      # we keep Python references here to control GC
2920      # keep the next Element after the one we return, and the (s)top node
2921      cdef _Element _next_node
2922      cdef _Element _top_node
2923      cdef _MultiTagMatcher _matcher
2924      def __cinit__(self, _Element node not None, tag=None, *, bint inclusive=True):
2925          _assertValidNode(node)
2926          self._top_node  = node
2927          self._next_node = node
2928          self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
2929          self._matcher.cacheTags(node._doc)
2930          if not inclusive or not self._matcher.matches(node._c_node):
2931              # find start node (this cannot raise StopIteration, self._next_node != None)
2932              next(self)
2933  
2934      def __iter__(self):
2935          return self
2936  
2937      def __next__(self):
2938          cdef xmlNode* c_node
2939          cdef _Element current_node = self._next_node
2940          if current_node is None:
2941              raise StopIteration
2942          c_node = current_node._c_node
2943          self._matcher.cacheTags(current_node._doc)
2944          if not self._matcher._tag_count:
2945              # no tag name was found in the dict => not in document either
2946              # try to match by node type
2947              c_node = self._nextNodeAnyTag(c_node)
2948          else:
2949              c_node = self._nextNodeMatchTag(c_node)
2950          if c_node is NULL:
2951              self._next_node = None
2952          else:
2953              self._next_node = _elementFactory(current_node._doc, c_node)
2954          return current_node
2955  
2956      @cython.final
2957      cdef xmlNode* _nextNodeAnyTag(self, xmlNode* c_node):
2958          cdef int node_types = self._matcher._node_types
2959          if not node_types:
2960              return NULL
2961          tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0)
2962          if node_types & (1 << c_node.type):
2963              return c_node
2964          tree.END_FOR_EACH_ELEMENT_FROM(c_node)
2965          return NULL
2966  
2967      @cython.final
2968      cdef xmlNode* _nextNodeMatchTag(self, xmlNode* c_node):
2969          tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0)
2970          if self._matcher.matches(c_node):
2971              return c_node
2972          tree.END_FOR_EACH_ELEMENT_FROM(c_node)
2973          return NULL
2974  
2975  cdef class ElementTextIterator:
2976      u"""ElementTextIterator(self, element, tag=None, with_tail=True)
2977      Iterates over the text content of a subtree.
2978  
2979      You can pass the ``tag`` keyword argument to restrict text content to a
2980      specific tag name.
2981  
2982      You can set the ``with_tail`` keyword argument to ``False`` to skip over
2983      tail text (e.g. if you know that it's only whitespace from pretty-printing).
2984      """
2985      cdef object _events
2986      cdef _Element _start_element
2987      def __cinit__(self, _Element element not None, tag=None, *, bint with_tail=True):
2988          _assertValidNode(element)
2989          if with_tail:
2990              events = (u"start", u"comment", u"pi", u"end")
2991          else:
2992              events = (u"start", u"comment", u"pi")
2993          self._start_element = element
2994          self._events = iterwalk(element, events=events, tag=tag)
2995  
2996      def __iter__(self):
2997          return self
2998  
2999      def __next__(self):
3000          cdef _Element element
3001          result = None
3002          while result is None:
3003              event, element = next(self._events)  # raises StopIteration
3004              if event == u"start":
3005                  result = element.text
3006              elif element is not self._start_element:
3007                  result = element.tail
3008          return result
3009  
3010  cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL:
3011      cdef xmlNode* c_node
3012      c_node = tree.xmlNewDocNode(c_doc, NULL, _xcstr(name_utf), NULL)
3013      return c_node
3014  
3015  cdef xmlNode* _createComment(xmlDoc* c_doc, const_xmlChar* text):
3016      cdef xmlNode* c_node
3017      c_node = tree.xmlNewDocComment(c_doc, text)
3018      return c_node
3019  
3020  cdef xmlNode* _createPI(xmlDoc* c_doc, const_xmlChar* target, const_xmlChar* text):
3021      cdef xmlNode* c_node
3022      c_node = tree.xmlNewDocPI(c_doc, target, text)
3023      return c_node
3024  
3025  cdef xmlNode* _createEntity(xmlDoc* c_doc, const_xmlChar* name):
3026      cdef xmlNode* c_node
3027      c_node = tree.xmlNewReference(c_doc, name)
3028      return c_node
3029  
3030  # module-level API for ElementTree
3031  
3032  def Element(_tag, attrib=None, nsmap=None, **_extra):
3033      u"""Element(_tag, attrib=None, nsmap=None, **_extra)
3034  
3035      Element factory.  This function returns an object implementing the
3036      Element interface.
3037  
3038      Also look at the `_Element.makeelement()` and
3039      `_BaseParser.makeelement()` methods, which provide a faster way to
3040      create an Element within a specific document or parser context.
3041      """
3042      return _makeElement(_tag, NULL, None, None, None, None,
3043                          attrib, nsmap, _extra)
3044  
3045  
3046  def Comment(text=None):
3047      u"""Comment(text=None)
3048  
3049      Comment element factory. This factory function creates a special element that will
3050      be serialized as an XML comment.
3051      """
3052      cdef _Document doc
3053      cdef xmlNode*  c_node
3054      cdef xmlDoc*   c_doc
3055  
3056      if text is None:
3057          text = b''
3058      else:
3059          text = _utf8(text)
3060          if b'--' in text or text.endswith(b'-'):
3061              raise ValueError("Comment may not contain '--' or end with '-'")
3062  
3063      c_doc = _newXMLDoc()
3064      doc = _documentFactory(c_doc, None)
3065      c_node = _createComment(c_doc, _xcstr(text))
3066      tree.xmlAddChild(<xmlNode*>c_doc, c_node)
3067      return _elementFactory(doc, c_node)
3068  
3069  
3070  def ProcessingInstruction(target, text=None):
3071      u"""ProcessingInstruction(target, text=None)
3072  
3073      ProcessingInstruction element factory. This factory function creates a
3074      special element that will be serialized as an XML processing instruction.
3075      """
3076      cdef _Document doc
3077      cdef xmlNode*  c_node
3078      cdef xmlDoc*   c_doc
3079  
3080      target = _utf8(target)
3081      _tagValidOrRaise(target)
3082      if target.lower() == b'xml':
3083          raise ValueError, f"Invalid PI name '{target}'"
3084  
3085      if text is None:
3086          text = b''
3087      else:
3088          text = _utf8(text)
3089          if b'?>' in text:
3090              raise ValueError, "PI text must not contain '?>'"
3091  
3092      c_doc = _newXMLDoc()
3093      doc = _documentFactory(c_doc, None)
3094      c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
3095      tree.xmlAddChild(<xmlNode*>c_doc, c_node)
3096      return _elementFactory(doc, c_node)
3097  
3098  PI = ProcessingInstruction
3099  
3100  
3101  cdef class CDATA:
3102      u"""CDATA(data)
3103  
3104      CDATA factory.  This factory creates an opaque data object that
3105      can be used to set Element text.  The usual way to use it is::
3106  
3107          >>> el = Element('content')
3108          >>> el.text = CDATA('a string')
3109  
3110          >>> print(el.text)
3111          a string
3112          >>> print(tostring(el, encoding="unicode"))
3113          <content><![CDATA[a string]]></content>
3114      """
3115      cdef bytes _utf8_data
3116      def __cinit__(self, data):
3117          _utf8_data = _utf8(data)
3118          if b']]>' in _utf8_data:
3119              raise ValueError, "']]>' not allowed inside CDATA"
3120          self._utf8_data = _utf8_data
3121  
3122  
3123  def Entity(name):
3124      u"""Entity(name)
3125  
3126      Entity factory.  This factory function creates a special element
3127      that will be serialized as an XML entity reference or character
3128      reference.  Note, however, that entities will not be automatically
3129      declared in the document.  A document that uses entity references
3130      requires a DTD to define the entities.
3131      """
3132      cdef _Document doc
3133      cdef xmlNode*  c_node
3134      cdef xmlDoc*   c_doc
3135      name_utf = _utf8(name)
3136      c_name = _xcstr(name_utf)
3137      if c_name[0] == c'#':
3138          if not _characterReferenceIsValid(c_name + 1):
3139              raise ValueError, f"Invalid character reference: '{name}'"
3140      elif not _xmlNameIsValid(c_name):
3141          raise ValueError, f"Invalid entity reference: '{name}'"
3142      c_doc = _newXMLDoc()
3143      doc = _documentFactory(c_doc, None)
3144      c_node = _createEntity(c_doc, c_name)
3145      tree.xmlAddChild(<xmlNode*>c_doc, c_node)
3146      return _elementFactory(doc, c_node)
3147  
3148  
3149  def SubElement(_Element _parent not None, _tag,
3150                 attrib=None, nsmap=None, **_extra):
3151      u"""SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra)
3152  
3153      Subelement factory.  This function creates an element instance, and
3154      appends it to an existing element.
3155      """
3156      return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
3157  
3158  
3159  def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None):
3160      u"""ElementTree(element=None, file=None, parser=None)
3161  
3162      ElementTree wrapper class.
3163      """
3164      cdef xmlNode* c_next
3165      cdef xmlNode* c_node
3166      cdef xmlNode* c_node_copy
3167      cdef xmlDoc*  c_doc
3168      cdef _ElementTree etree
3169      cdef _Document doc
3170  
3171      if element is not None:
3172          doc  = element._doc
3173      elif file is not None:
3174          try:
3175              doc = _parseDocument(file, parser, None)
3176          except _TargetParserResult as result_container:
3177              return result_container.result
3178      else:
3179          c_doc = _newXMLDoc()
3180          doc = _documentFactory(c_doc, parser)
3181  
3182      return _elementTreeFactory(doc, element)
3183  
3184  
3185  def HTML(text, _BaseParser parser=None, *, base_url=None):
3186      u"""HTML(text, parser=None, base_url=None)
3187  
3188      Parses an HTML document from a string constant.  Returns the root
3189      node (or the result returned by a parser target).  This function
3190      can be used to embed "HTML literals" in Python code.
3191  
3192      To override the parser with a different ``HTMLParser`` you can pass it to
3193      the ``parser`` keyword argument.
3194  
3195      The ``base_url`` keyword argument allows to set the original base URL of
3196      the document to support relative Paths when looking up external entities
3197      (DTD, XInclude, ...).
3198      """
3199      cdef _Document doc
3200      if parser is None:
3201          parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
3202          if not isinstance(parser, HTMLParser):
3203              parser = __DEFAULT_HTML_PARSER
3204      try:
3205          doc = _parseMemoryDocument(text, base_url, parser)
3206          return doc.getroot()
3207      except _TargetParserResult as result_container:
3208          return result_container.result
3209  
3210  
3211  def XML(text, _BaseParser parser=None, *, base_url=None):
3212      u"""XML(text, parser=None, base_url=None)
3213  
3214      Parses an XML document or fragment from a string constant.
3215      Returns the root node (or the result returned by a parser target).
3216      This function can be used to embed "XML literals" in Python code,
3217      like in
3218  
3219         >>> root = XML("<root><test/></root>")
3220         >>> print(root.tag)
3221         root
3222  
3223      To override the parser with a different ``XMLParser`` you can pass it to
3224      the ``parser`` keyword argument.
3225  
3226      The ``base_url`` keyword argument allows to set the original base URL of
3227      the document to support relative Paths when looking up external entities
3228      (DTD, XInclude, ...).
3229      """
3230      cdef _Document doc
3231      if parser is None:
3232          parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
3233          if not isinstance(parser, XMLParser):
3234              parser = __DEFAULT_XML_PARSER
3235      try:
3236          doc = _parseMemoryDocument(text, base_url, parser)
3237          return doc.getroot()
3238      except _TargetParserResult as result_container:
3239          return result_container.result
3240  
3241  
3242  def fromstring(text, _BaseParser parser=None, *, base_url=None):
3243      u"""fromstring(text, parser=None, base_url=None)
3244  
3245      Parses an XML document or fragment from a string.  Returns the
3246      root node (or the result returned by a parser target).
3247  
3248      To override the default parser with a different parser you can pass it to
3249      the ``parser`` keyword argument.
3250  
3251      The ``base_url`` keyword argument allows to set the original base URL of
3252      the document to support relative Paths when looking up external entities
3253      (DTD, XInclude, ...).
3254      """
3255      cdef _Document doc
3256      try:
3257          doc = _parseMemoryDocument(text, base_url, parser)
3258          return doc.getroot()
3259      except _TargetParserResult as result_container:
3260          return result_container.result
3261  
3262  
3263  def fromstringlist(strings, _BaseParser parser=None):
3264      u"""fromstringlist(strings, parser=None)
3265  
3266      Parses an XML document from a sequence of strings.  Returns the
3267      root node (or the result returned by a parser target).
3268  
3269      To override the default parser with a different parser you can pass it to
3270      the ``parser`` keyword argument.
3271      """
3272      cdef _Document doc
3273      if isinstance(strings, (bytes, unicode)):
3274          raise ValueError("passing a single string into fromstringlist() is not"
3275                           " efficient, use fromstring() instead")
3276      if parser is None:
3277          parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
3278      feed = parser.feed
3279      for data in strings:
3280          feed(data)
3281      return parser.close()
3282  
3283  
3284  def iselement(element):
3285      u"""iselement(element)
3286  
3287      Checks if an object appears to be a valid element object.
3288      """
3289      return isinstance(element, _Element) and (<_Element>element)._c_node is not NULL
3290  
3291  
3292  def indent(tree, space="  ", *, Py_ssize_t level=0):
3293      """indent(tree, space="  ", level=0)
3294  
3295      Indent an XML document by inserting newlines and indentation space
3296      after elements.
3297  
3298      *tree* is the ElementTree or Element to modify.  The (root) element
3299      itself will not be changed, but the tail text of all elements in its
3300      subtree will be adapted.
3301  
3302      *space* is the whitespace to insert for each indentation level, two
3303      space characters by default.
3304  
3305      *level* is the initial indentation level. Setting this to a higher
3306      value than 0 can be used for indenting subtrees that are more deeply
3307      nested inside of a document.
3308      """
3309      root = _rootNodeOrRaise(tree)
3310      if level < 0:
3311          raise ValueError(f"Initial indentation level must be >= 0, got {level}")
3312      if _hasChild(root._c_node):
3313          space = _utf8(space)
3314          indent = b"\n" + level * space
3315          _indent_children(root._c_node, 1, space, [indent, indent + space])
3316  
3317  
3318  cdef int _indent_children(xmlNode* c_node, Py_ssize_t level, bytes one_space, list indentations) except -1:
3319      # Reuse indentation strings for speed.
3320      if len(indentations) <= level:
3321          indentations.append(indentations[-1] + one_space)
3322  
3323      # Start a new indentation level for the first child.
3324      child_indentation = indentations[level]
3325      if not _hasNonWhitespaceText(c_node):
3326          _setNodeText(c_node, child_indentation)
3327  
3328      # Recursively indent all children.
3329      cdef xmlNode* c_child = _findChildForwards(c_node, 0)
3330      while c_child is not NULL:
3331          if _hasChild(c_child):
3332              _indent_children(c_child, level+1, one_space, indentations)
3333          c_next_child = _nextElement(c_child)
3334          if not _hasNonWhitespaceTail(c_child):
3335              if c_next_child is NULL:
3336                  # Dedent after the last child.
3337                  child_indentation = indentations[level-1]
3338              _setTailText(c_child, child_indentation)
3339          c_child = c_next_child
3340      return 0
3341  
3342  
3343  def dump(_Element elem not None, *, bint pretty_print=True, with_tail=True):
3344      u"""dump(elem, pretty_print=True, with_tail=True)
3345  
3346      Writes an element tree or element structure to sys.stdout. This function
3347      should be used for debugging only.
3348      """
3349      xml = tostring(elem, pretty_print=pretty_print, with_tail=with_tail,
3350                     encoding=None if python.IS_PYTHON2 else 'unicode')
3351      if not pretty_print:
3352          xml += '\n'
3353      sys.stdout.write(xml)
3354  
3355  
3356  def tostring(element_or_tree, *, encoding=None, method="xml",
3357               xml_declaration=None, bint pretty_print=False, bint with_tail=True,
3358               standalone=None, doctype=None,
3359               # method='c14n'
3360               bint exclusive=False, inclusive_ns_prefixes=None,
3361               # method='c14n2'
3362               bint with_comments=True, bint strip_text=False,
3363               ):
3364      u"""tostring(element_or_tree, encoding=None, method="xml",
3365                   xml_declaration=None, pretty_print=False, with_tail=True,
3366                   standalone=None, doctype=None,
3367                   exclusive=False, inclusive_ns_prefixes=None,
3368                   with_comments=True, strip_text=False,
3369                   )
3370  
3371      Serialize an element to an encoded string representation of its XML
3372      tree.
3373  
3374      Defaults to ASCII encoding without XML declaration.  This
3375      behaviour can be configured with the keyword arguments 'encoding'
3376      (string) and 'xml_declaration' (bool).  Note that changing the
3377      encoding to a non UTF-8 compatible encoding will enable a
3378      declaration by default.
3379  
3380      You can also serialise to a Unicode string without declaration by
3381      passing the name ``'unicode'`` as encoding (or the ``str`` function
3382      in Py3 or ``unicode`` in Py2).  This changes the return value from
3383      a byte string to an unencoded unicode string.
3384  
3385      The keyword argument 'pretty_print' (bool) enables formatted XML.
3386  
3387      The keyword argument 'method' selects the output method: 'xml',
3388      'html', plain 'text' (text content without tags), 'c14n' or 'c14n2'.
3389      Default is 'xml'.
3390  
3391      With ``method="c14n"`` (C14N version 1), the options ``exclusive``,
3392      ``with_comments`` and ``inclusive_ns_prefixes`` request exclusive
3393      C14N, include comments, and list the inclusive prefixes respectively.
3394  
3395      With ``method="c14n2"`` (C14N version 2), the ``with_comments`` and
3396      ``strip_text`` options control the output of comments and text space
3397      according to C14N 2.0.
3398  
3399      Passing a boolean value to the ``standalone`` option will output
3400      an XML declaration with the corresponding ``standalone`` flag.
3401  
3402      The ``doctype`` option allows passing in a plain string that will
3403      be serialised before the XML tree.  Note that passing in non
3404      well-formed content here will make the XML output non well-formed.
3405      Also, an existing doctype in the document tree will not be removed
3406      when serialising an ElementTree instance.
3407  
3408      You can prevent the tail text of the element from being serialised
3409      by passing the boolean ``with_tail`` option.  This has no impact
3410      on the tail text of children, which will always be serialised.
3411      """
3412      cdef bint write_declaration
3413      cdef int is_standalone
3414      # C14N serialisation
3415      if method in ('c14n', 'c14n2'):
3416          if encoding is not None:
3417              raise ValueError("Cannot specify encoding with C14N")
3418          if xml_declaration:
3419              raise ValueError("Cannot enable XML declaration in C14N")
3420          if method == 'c14n':
3421              return _tostringC14N(element_or_tree, exclusive, with_comments, inclusive_ns_prefixes)
3422          else:
3423              out = BytesIO()
3424              target = C14NWriterTarget(
3425                  utf8_writer(out).write,
3426                  with_comments=with_comments, strip_text=strip_text)
3427              _tree_to_target(element_or_tree, target)
3428              return out.getvalue()
3429      if not with_comments:
3430          raise ValueError("Can only discard comments in C14N serialisation")
3431      if strip_text:
3432          raise ValueError("Can only strip text in C14N 2.0 serialisation")
3433      if encoding is unicode or (encoding is not None and encoding.lower() == 'unicode'):
3434          if xml_declaration:
3435              raise ValueError, \
3436                  u"Serialisation to unicode must not request an XML declaration"
3437          write_declaration = 0
3438          encoding = unicode
3439      elif xml_declaration is None:
3440          # by default, write an XML declaration only for non-standard encodings
3441          write_declaration = encoding is not None and encoding.upper() not in \
3442                              (u'ASCII', u'UTF-8', u'UTF8', u'US-ASCII')
3443      else:
3444          write_declaration = xml_declaration
3445      if encoding is None:
3446          encoding = u'ASCII'
3447      if standalone is None:
3448          is_standalone = -1
3449      elif standalone:
3450          write_declaration = 1
3451          is_standalone = 1
3452      else:
3453          write_declaration = 1
3454          is_standalone = 0
3455  
3456      if isinstance(element_or_tree, _Element):
3457          return _tostring(<_Element>element_or_tree, encoding, doctype, method,
3458                           write_declaration, 0, pretty_print, with_tail,
3459                           is_standalone)
3460      elif isinstance(element_or_tree, _ElementTree):
3461          return _tostring((<_ElementTree>element_or_tree)._context_node,
3462                           encoding, doctype, method, write_declaration, 1,
3463                           pretty_print, with_tail, is_standalone)
3464      else:
3465          raise TypeError, f"Type '{python._fqtypename(element_or_tree).decode('utf8')}' cannot be serialized."
3466  
3467  
3468  
3469  def tostringlist(element_or_tree, *args, **kwargs):
3470      u"""tostringlist(element_or_tree, *args, **kwargs)
3471  
3472      Serialize an element to an encoded string representation of its XML
3473      tree, stored in a list of partial strings.
3474  
3475      This is purely for ElementTree 1.3 compatibility.  The result is a
3476      single string wrapped in a list.
3477      """
3478      return [tostring(element_or_tree, *args, **kwargs)]
3479  
3480  
3481  def tounicode(element_or_tree, *, method=u"xml", bint pretty_print=False,
3482                bint with_tail=True, doctype=None):
3483      u"""tounicode(element_or_tree, method="xml", pretty_print=False,
3484                    with_tail=True, doctype=None)
3485  
3486      Serialize an element to the Python unicode representation of its XML
3487      tree.
3488  
3489      :deprecated: use ``tostring(el, encoding='unicode')`` instead.
3490  
3491      Note that the result does not carry an XML encoding declaration and is
3492      therefore not necessarily suited for serialization to byte streams without
3493      further treatment.
3494  
3495      The boolean keyword argument 'pretty_print' enables formatted XML.
3496  
3497      The keyword argument 'method' selects the output method: 'xml',
3498      'html' or plain 'text'.
3499  
3500      You can prevent the tail text of the element from being serialised
3501      by passing the boolean ``with_tail`` option.  This has no impact
3502      on the tail text of children, which will always be serialised.
3503      """
3504      if isinstance(element_or_tree, _Element):
3505          return _tostring(<_Element>element_or_tree, unicode, doctype, method,
3506                            0, 0, pretty_print, with_tail, -1)
3507      elif isinstance(element_or_tree, _ElementTree):
3508          return _tostring((<_ElementTree>element_or_tree)._context_node,
3509                           unicode, doctype, method, 0, 1, pretty_print,
3510                           with_tail, -1)
3511      else:
3512          raise TypeError, f"Type '{type(element_or_tree)}' cannot be serialized."
3513  
3514  
3515  def parse(source, _BaseParser parser=None, *, base_url=None):
3516      u"""parse(source, parser=None, base_url=None)
3517  
3518      Return an ElementTree object loaded with source elements.  If no parser
3519      is provided as second argument, the default parser is used.
3520  
3521      The ``source`` can be any of the following:
3522  
3523      - a file name/path
3524      - a file object
3525      - a file-like object
3526      - a URL using the HTTP or FTP protocol
3527  
3528      To parse from a string, use the ``fromstring()`` function instead.
3529  
3530      Note that it is generally faster to parse from a file path or URL
3531      than from an open file object or file-like object.  Transparent
3532      decompression from gzip compressed sources is supported (unless
3533      explicitly disabled in libxml2).
3534  
3535      The ``base_url`` keyword allows setting a URL for the document
3536      when parsing from a file-like object.  This is needed when looking
3537      up external entities (DTD, XInclude, ...) with relative paths.
3538      """
3539      cdef _Document doc
3540      try:
3541          doc = _parseDocument(source, parser, base_url)
3542          return _elementTreeFactory(doc, None)
3543      except _TargetParserResult as result_container:
3544          return result_container.result
3545  
3546  
3547  def adopt_external_document(capsule, _BaseParser parser=None):
3548      """adopt_external_document(capsule, parser=None)
3549  
3550      Unpack a libxml2 document pointer from a PyCapsule and wrap it in an
3551      lxml ElementTree object.
3552  
3553      This allows external libraries to build XML/HTML trees using libxml2
3554      and then pass them efficiently into lxml for further processing.
3555  
3556      If a ``parser`` is provided, it will be used for configuring the
3557      lxml document.  No parsing will be done.
3558  
3559      The capsule must have the name ``"libxml2:xmlDoc"`` and its pointer
3560      value must reference a correct libxml2 document of type ``xmlDoc*``.
3561      The creator of the capsule must take care to correctly clean up the
3562      document using an appropriate capsule destructor.  By default, the
3563      libxml2 document will be copied to let lxml safely own the memory
3564      of the internal tree that it uses.
3565  
3566      If the capsule context is non-NULL, it must point to a C string that
3567      can be compared using ``strcmp()``.  If the context string equals
3568      ``"destructor:xmlFreeDoc"``, the libxml2 document will not be copied
3569      but the capsule invalidated instead by clearing its destructor and
3570      name.  That way, lxml takes ownership of the libxml2 document in memory
3571      without creating a copy first, and the capsule destructor will not be
3572      called.  The document will then eventually be cleaned up by lxml using
3573      the libxml2 API function ``xmlFreeDoc()`` once it is no longer used.
3574  
3575      If no copy is made, later modifications of the tree outside of lxml
3576      should not be attempted after transferring the ownership.
3577      """
3578      cdef xmlDoc* c_doc
3579      cdef bint is_owned = False
3580      c_doc = <xmlDoc*> python.lxml_unpack_xmldoc_capsule(capsule, &is_owned)
3581      doc = _adoptForeignDoc(c_doc, parser, is_owned)
3582      return _elementTreeFactory(doc, None)
3583  
3584  
3585  ################################################################################
3586  # Include submodules
3587  
3588  include "readonlytree.pxi" # Read-only implementation of Element proxies
3589  include "classlookup.pxi"  # Element class lookup mechanisms
3590  include "nsclasses.pxi"    # Namespace implementation and registry
3591  include "docloader.pxi"    # Support for custom document loaders
3592  include "parser.pxi"       # XML and HTML parsers
3593  include "saxparser.pxi"    # SAX-like Parser interface and tree builder
3594  include "parsertarget.pxi" # ET Parser target
3595  include "serializer.pxi"   # XML output functions
3596  include "iterparse.pxi"    # incremental XML parsing
3597  include "xmlid.pxi"        # XMLID and IDDict
3598  include "xinclude.pxi"     # XInclude
3599  include "cleanup.pxi"      # Cleanup and recursive element removal functions
3600  
3601  
3602  ################################################################################
3603  # Include submodules for XPath and XSLT
3604  
3605  include "extensions.pxi"   # XPath/XSLT extension functions
3606  include "xpath.pxi"        # XPath evaluation
3607  include "xslt.pxi"         # XSL transformations
3608  include "xsltext.pxi"      # XSL extension elements
3609  
3610  
3611  ################################################################################
3612  # Validation
3613  
3614  cdef class DocumentInvalid(LxmlError):
3615      """Validation error.
3616  
3617      Raised by all document validators when their ``assertValid(tree)``
3618      method fails.
3619      """
3620  
3621  
3622  cdef class _Validator:
3623      u"Base class for XML validators."
3624      cdef _ErrorLog _error_log
3625      def __cinit__(self):
3626          self._error_log = _ErrorLog()
3627  
3628      def validate(self, etree):
3629          u"""validate(self, etree)
3630  
3631          Validate the document using this schema.
3632  
3633          Returns true if document is valid, false if not.
3634          """
3635          return self(etree)
3636  
3637      def assertValid(self, etree):
3638          u"""assertValid(self, etree)
3639  
3640          Raises `DocumentInvalid` if the document does not comply with the schema.
3641          """
3642          if not self(etree):
3643              raise DocumentInvalid(self._error_log._buildExceptionMessage(
3644                      u"Document does not comply with schema"),
3645                                    self._error_log)
3646  
3647      def assert_(self, etree):
3648          u"""assert_(self, etree)
3649  
3650          Raises `AssertionError` if the document does not comply with the schema.
3651          """
3652          if not self(etree):
3653              raise AssertionError, self._error_log._buildExceptionMessage(
3654                  u"Document does not comply with schema")
3655  
3656      cpdef _append_log_message(self, int domain, int type, int level, int line,
3657                                message, filename):
3658          self._error_log._receiveGeneric(domain, type, level, line, message,
3659                                          filename)
3660  
3661      cpdef _clear_error_log(self):
3662          self._error_log.clear()
3663  
3664      @property
3665      def error_log(self):
3666          """The log of validation errors and warnings."""
3667          assert self._error_log is not None, "XPath evaluator not initialised"
3668          return self._error_log.copy()
3669  
3670  include "dtd.pxi"        # DTD
3671  include "relaxng.pxi"    # RelaxNG
3672  include "xmlschema.pxi"  # XMLSchema
3673  include "schematron.pxi" # Schematron (requires libxml2 2.6.21+)
3674  
3675  ################################################################################
3676  # Public C API
3677  
3678  include "public-api.pxi"
3679  
3680  ################################################################################
3681  # Other stuff
3682  
3683  include "debug.pxi"