Cradicle Explorer

/ lib / lxml / apihelpers.pxi
apihelpers.pxi
   1  # Private/public helper functions for API functions
   2  
   3  from lxml.includes cimport uri
   4  
   5  
   6  cdef void displayNode(xmlNode* c_node, indent):
   7      # to help with debugging
   8      cdef xmlNode* c_child
   9      try:
  10          print indent * u' ', <long>c_node
  11          c_child = c_node.children
  12          while c_child is not NULL:
  13              displayNode(c_child, indent + 1)
  14              c_child = c_child.next
  15      finally:
  16          return  # swallow any exceptions
  17  
  18  cdef inline int _assertValidNode(_Element element) except -1:
  19      assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element)
  20  
  21  cdef inline int _assertValidDoc(_Document doc) except -1:
  22      assert doc._c_doc is not NULL, u"invalid Document proxy at %s" % id(doc)
  23  
  24  cdef _Document _documentOrRaise(object input):
  25      u"""Call this to get the document of a _Document, _ElementTree or _Element
  26      object, or to raise an exception if it can't be determined.
  27  
  28      Should be used in all API functions for consistency.
  29      """
  30      cdef _Document doc
  31      if isinstance(input, _ElementTree):
  32          if (<_ElementTree>input)._context_node is not None:
  33              doc = (<_ElementTree>input)._context_node._doc
  34          else:
  35              doc = None
  36      elif isinstance(input, _Element):
  37          doc = (<_Element>input)._doc
  38      elif isinstance(input, _Document):
  39          doc = <_Document>input
  40      else:
  41          raise TypeError, f"Invalid input object: {python._fqtypename(input).decode('utf8')}"
  42      if doc is None:
  43          raise ValueError, f"Input object has no document: {python._fqtypename(input).decode('utf8')}"
  44      _assertValidDoc(doc)
  45      return doc
  46  
  47  cdef _Element _rootNodeOrRaise(object input):
  48      u"""Call this to get the root node of a _Document, _ElementTree or
  49       _Element object, or to raise an exception if it can't be determined.
  50  
  51      Should be used in all API functions for consistency.
  52       """
  53      cdef _Element node
  54      if isinstance(input, _ElementTree):
  55          node = (<_ElementTree>input)._context_node
  56      elif isinstance(input, _Element):
  57          node = <_Element>input
  58      elif isinstance(input, _Document):
  59          node = (<_Document>input).getroot()
  60      else:
  61          raise TypeError, f"Invalid input object: {python._fqtypename(input).decode('utf8')}"
  62      if (node is None or not node._c_node or
  63              node._c_node.type != tree.XML_ELEMENT_NODE):
  64          raise ValueError, f"Input object is not an XML element: {python._fqtypename(input).decode('utf8')}"
  65      _assertValidNode(node)
  66      return node
  67  
  68  cdef bint _isAncestorOrSame(xmlNode* c_ancestor, xmlNode* c_node):
  69      while c_node:
  70          if c_node is c_ancestor:
  71              return True
  72          c_node = c_node.parent
  73      return False
  74  
  75  cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc,
  76                             _BaseParser parser, text, tail, attrib, nsmap,
  77                             dict extra_attrs):
  78      u"""Create a new element and initialize text content, namespaces and
  79      attributes.
  80  
  81      This helper function will reuse as much of the existing document as
  82      possible:
  83  
  84      If 'parser' is None, the parser will be inherited from 'doc' or the
  85      default parser will be used.
  86  
  87      If 'doc' is None, 'c_doc' is used to create a new _Document and the new
  88      element is made its root node.
  89  
  90      If 'c_doc' is also NULL, a new xmlDoc will be created.
  91      """
  92      cdef xmlNode* c_node
  93      if doc is not None:
  94          c_doc = doc._c_doc
  95      ns_utf, name_utf = _getNsTag(tag)
  96      if parser is not None and parser._for_html:
  97          _htmlTagValidOrRaise(name_utf)
  98          if c_doc is NULL:
  99              c_doc = _newHTMLDoc()
 100      else:
 101          _tagValidOrRaise(name_utf)
 102          if c_doc is NULL:
 103              c_doc = _newXMLDoc()
 104      c_node = _createElement(c_doc, name_utf)
 105      if c_node is NULL:
 106          if doc is None and c_doc is not NULL:
 107              tree.xmlFreeDoc(c_doc)
 108          raise MemoryError()
 109      try:
 110          if doc is None:
 111              tree.xmlDocSetRootElement(c_doc, c_node)
 112              doc = _documentFactory(c_doc, parser)
 113          if text is not None:
 114              _setNodeText(c_node, text)
 115          if tail is not None:
 116              _setTailText(c_node, tail)
 117          # add namespaces to node if necessary
 118          _setNodeNamespaces(c_node, doc, ns_utf, nsmap)
 119          _initNodeAttributes(c_node, doc, attrib, extra_attrs)
 120          return _elementFactory(doc, c_node)
 121      except:
 122          # free allocated c_node/c_doc unless Python does it for us
 123          if c_node.doc is not c_doc:
 124              # node not yet in document => will not be freed by document
 125              if tail is not None:
 126                  _removeText(c_node.next) # tail
 127              tree.xmlFreeNode(c_node)
 128          if doc is None:
 129              # c_doc will not be freed by doc
 130              tree.xmlFreeDoc(c_doc)
 131          raise
 132  
 133  cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf,
 134                           _BaseParser parser, attrib, nsmap, dict extra_attrs) except -1:
 135      u"""Initialise a new Element object.
 136  
 137      This is used when users instantiate a Python Element subclass
 138      directly, without it being mapped to an existing XML node.
 139      """
 140      cdef xmlDoc* c_doc
 141      cdef xmlNode* c_node
 142      cdef _Document doc
 143      if is_html:
 144          _htmlTagValidOrRaise(name_utf)
 145          c_doc = _newHTMLDoc()
 146      else:
 147          _tagValidOrRaise(name_utf)
 148          c_doc = _newXMLDoc()
 149      c_node = _createElement(c_doc, name_utf)
 150      if c_node is NULL:
 151          if c_doc is not NULL:
 152              tree.xmlFreeDoc(c_doc)
 153          raise MemoryError()
 154      tree.xmlDocSetRootElement(c_doc, c_node)
 155      doc = _documentFactory(c_doc, parser)
 156      # add namespaces to node if necessary
 157      _setNodeNamespaces(c_node, doc, ns_utf, nsmap)
 158      _initNodeAttributes(c_node, doc, attrib, extra_attrs)
 159      _registerProxy(element, doc, c_node)
 160      element._init()
 161      return 0
 162  
 163  cdef _Element _makeSubElement(_Element parent, tag, text, tail,
 164                                attrib, nsmap, dict extra_attrs):
 165      u"""Create a new child element and initialize text content, namespaces and
 166      attributes.
 167      """
 168      cdef xmlNode* c_node
 169      cdef xmlDoc* c_doc
 170      if parent is None or parent._doc is None:
 171          return None
 172      _assertValidNode(parent)
 173      ns_utf, name_utf = _getNsTag(tag)
 174      c_doc = parent._doc._c_doc
 175  
 176      if parent._doc._parser is not None and parent._doc._parser._for_html:
 177          _htmlTagValidOrRaise(name_utf)
 178      else:
 179          _tagValidOrRaise(name_utf)
 180  
 181      c_node = _createElement(c_doc, name_utf)
 182      if c_node is NULL:
 183          raise MemoryError()
 184      tree.xmlAddChild(parent._c_node, c_node)
 185  
 186      try:
 187          if text is not None:
 188              _setNodeText(c_node, text)
 189          if tail is not None:
 190              _setTailText(c_node, tail)
 191  
 192          # add namespaces to node if necessary
 193          _setNodeNamespaces(c_node, parent._doc, ns_utf, nsmap)
 194          _initNodeAttributes(c_node, parent._doc, attrib, extra_attrs)
 195          return _elementFactory(parent._doc, c_node)
 196      except:
 197          # make sure we clean up in case of an error
 198          _removeNode(parent._doc, c_node)
 199          raise
 200  
 201  
 202  cdef int _setNodeNamespaces(xmlNode* c_node, _Document doc,
 203                              object node_ns_utf, object nsmap) except -1:
 204      u"""Lookup current namespace prefixes, then set namespace structure for
 205      node (if 'node_ns_utf' was provided) and register new ns-prefix mappings.
 206  
 207      'node_ns_utf' should only be passed for a newly created node.
 208      """
 209      cdef xmlNs* c_ns
 210      cdef list nsdefs
 211  
 212      if nsmap:
 213          for prefix, href in _iter_nsmap(nsmap):
 214              href_utf = _utf8(href)
 215              _uriValidOrRaise(href_utf)
 216              c_href = _xcstr(href_utf)
 217              if prefix is not None:
 218                  prefix_utf = _utf8(prefix)
 219                  _prefixValidOrRaise(prefix_utf)
 220                  c_prefix = _xcstr(prefix_utf)
 221              else:
 222                  c_prefix = <const_xmlChar*>NULL
 223              # add namespace with prefix if it is not already known
 224              c_ns = tree.xmlSearchNs(doc._c_doc, c_node, c_prefix)
 225              if c_ns is NULL or \
 226                      c_ns.href is NULL or \
 227                      tree.xmlStrcmp(c_ns.href, c_href) != 0:
 228                  c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
 229              if href_utf == node_ns_utf:
 230                  tree.xmlSetNs(c_node, c_ns)
 231                  node_ns_utf = None
 232  
 233      if node_ns_utf is not None:
 234          _uriValidOrRaise(node_ns_utf)
 235          doc._setNodeNs(c_node, _xcstr(node_ns_utf))
 236      return 0
 237  
 238  
 239  cdef dict _build_nsmap(xmlNode* c_node):
 240      """
 241      Namespace prefix->URI mapping known in the context of this Element.
 242      This includes all namespace declarations of the parents.
 243      """
 244      cdef xmlNs* c_ns
 245      nsmap = {}
 246      while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
 247          c_ns = c_node.nsDef
 248          while c_ns is not NULL:
 249              if c_ns.prefix or c_ns.href:
 250                  prefix = funicodeOrNone(c_ns.prefix)
 251                  if prefix not in nsmap:
 252                      nsmap[prefix] = funicodeOrNone(c_ns.href)
 253              c_ns = c_ns.next
 254          c_node = c_node.parent
 255      return nsmap
 256  
 257  
 258  cdef _iter_nsmap(nsmap):
 259      """
 260      Create a reproducibly ordered iterable from an nsmap mapping.
 261      Tries to preserve an existing order and sorts if it assumes no order.
 262  
 263      The difference to _iter_attrib() is that None doesn't sort with strings
 264      in Py3.x.
 265      """
 266      if python.PY_VERSION_HEX >= 0x03060000:
 267          # dicts are insertion-ordered in Py3.6+ => keep the user provided order.
 268          if isinstance(nsmap, dict):
 269              return nsmap.items()
 270      if len(nsmap) <= 1:
 271          return nsmap.items()
 272      # nsmap will usually be a plain unordered dict => avoid type checking overhead
 273      if type(nsmap) is not dict and isinstance(nsmap, OrderedDict):
 274          return nsmap.items()  # keep existing order
 275      if None not in nsmap:
 276          return sorted(nsmap.items())
 277  
 278      # Move the default namespace to the end.  This makes sure libxml2
 279      # prefers a prefix if the ns is defined redundantly on the same
 280      # element.  That way, users can work around a problem themselves
 281      # where default namespace attributes on non-default namespaced
 282      # elements serialise without prefix (i.e. into the non-default
 283      # namespace).
 284      default_ns = nsmap[None]
 285      nsdefs = [(k, v) for k, v in nsmap.items() if k is not None]
 286      nsdefs.sort()
 287      nsdefs.append((None, default_ns))
 288      return nsdefs
 289  
 290  
 291  cdef _iter_attrib(attrib):
 292      """
 293      Create a reproducibly ordered iterable from an attrib mapping.
 294      Tries to preserve an existing order and sorts if it assumes no order.
 295      """
 296      # dicts are insertion-ordered in Py3.6+ => keep the user provided order.
 297      if python.PY_VERSION_HEX >= 0x03060000 and isinstance(attrib, dict) or (
 298              isinstance(attrib, (_Attrib, OrderedDict))):
 299          return attrib.items()
 300      # assume it's an unordered mapping of some kind
 301      return sorted(attrib.items())
 302  
 303  
 304  cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, dict extra):
 305      u"""Initialise the attributes of an element node.
 306      """
 307      cdef bint is_html
 308      cdef xmlNs* c_ns
 309      if attrib is not None and not hasattr(attrib, u'items'):
 310          raise TypeError, f"Invalid attribute dictionary: {python._fqtypename(attrib).decode('utf8')}"
 311      if not attrib and not extra:
 312          return  # nothing to do
 313      is_html = doc._parser._for_html
 314      seen = set()
 315      if extra:
 316          if python.PY_VERSION_HEX >= 0x03060000:
 317              for name, value in extra.items():
 318                  _addAttributeToNode(c_node, doc, is_html, name, value, seen)
 319          else:
 320              for name, value in sorted(extra.items()):
 321                  _addAttributeToNode(c_node, doc, is_html, name, value, seen)
 322      if attrib:
 323          for name, value in _iter_attrib(attrib):
 324              _addAttributeToNode(c_node, doc, is_html, name, value, seen)
 325  
 326  
 327  cdef int _addAttributeToNode(xmlNode* c_node, _Document doc, bint is_html,
 328                               name, value, set seen_tags) except -1:
 329      ns_utf, name_utf = tag = _getNsTag(name)
 330      if tag in seen_tags:
 331          return 0
 332      seen_tags.add(tag)
 333      if not is_html:
 334          _attributeValidOrRaise(name_utf)
 335      value_utf = _utf8(value)
 336      if ns_utf is None:
 337          tree.xmlNewProp(c_node, _xcstr(name_utf), _xcstr(value_utf))
 338      else:
 339          _uriValidOrRaise(ns_utf)
 340          c_ns = doc._findOrBuildNodeNs(c_node, _xcstr(ns_utf), NULL, 1)
 341          tree.xmlNewNsProp(c_node, c_ns,
 342                            _xcstr(name_utf), _xcstr(value_utf))
 343      return 0
 344  
 345  
 346  ctypedef struct _ns_node_ref:
 347      xmlNs* ns
 348      xmlNode* node
 349  
 350  
 351  cdef int _collectNsDefs(xmlNode* c_element, _ns_node_ref **_c_ns_list,
 352                          size_t *_c_ns_list_len, size_t *_c_ns_list_size) except -1:
 353      c_ns_list = _c_ns_list[0]
 354      cdef size_t c_ns_list_len = _c_ns_list_len[0]
 355      cdef size_t c_ns_list_size = _c_ns_list_size[0]
 356  
 357      c_nsdef = c_element.nsDef
 358      while c_nsdef is not NULL:
 359          if c_ns_list_len >= c_ns_list_size:
 360              if c_ns_list is NULL:
 361                  c_ns_list_size = 20
 362              else:
 363                  c_ns_list_size *= 2
 364              c_nsref_ptr = <_ns_node_ref*> python.lxml_realloc(
 365                  c_ns_list, c_ns_list_size, sizeof(_ns_node_ref))
 366              if c_nsref_ptr is NULL:
 367                  if c_ns_list is not NULL:
 368                      python.lxml_free(c_ns_list)
 369                      _c_ns_list[0] = NULL
 370                  raise MemoryError()
 371              c_ns_list = c_nsref_ptr
 372  
 373          c_ns_list[c_ns_list_len] = _ns_node_ref(c_nsdef, c_element)
 374          c_ns_list_len += 1
 375          c_nsdef = c_nsdef.next
 376  
 377      _c_ns_list_size[0] = c_ns_list_size
 378      _c_ns_list_len[0] = c_ns_list_len
 379      _c_ns_list[0] = c_ns_list
 380  
 381  
 382  cdef int _removeUnusedNamespaceDeclarations(xmlNode* c_element, set prefixes_to_keep) except -1:
 383      u"""Remove any namespace declarations from a subtree that are not used by
 384      any of its elements (or attributes).
 385  
 386      If a 'prefixes_to_keep' is provided, it must be a set of prefixes.
 387      Any corresponding namespace mappings will not be removed as part of the cleanup.
 388      """
 389      cdef xmlNode* c_node
 390      cdef _ns_node_ref* c_ns_list = NULL
 391      cdef size_t c_ns_list_size = 0
 392      cdef size_t c_ns_list_len = 0
 393      cdef size_t i
 394  
 395      if c_element.parent and c_element.parent.type == tree.XML_DOCUMENT_NODE:
 396          # include declarations on the document node
 397          _collectNsDefs(c_element.parent, &c_ns_list, &c_ns_list_len, &c_ns_list_size)
 398  
 399      tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_element, c_element, 1)
 400      # collect all new namespace declarations into the ns list
 401      if c_element.nsDef:
 402          _collectNsDefs(c_element, &c_ns_list, &c_ns_list_len, &c_ns_list_size)
 403  
 404      # remove all namespace declarations from the list that are referenced
 405      if c_ns_list_len and c_element.type == tree.XML_ELEMENT_NODE:
 406          c_node = c_element
 407          while c_node and c_ns_list_len:
 408              if c_node.ns:
 409                  for i in range(c_ns_list_len):
 410                      if c_node.ns is c_ns_list[i].ns:
 411                          c_ns_list_len -= 1
 412                          c_ns_list[i] = c_ns_list[c_ns_list_len]
 413                          #c_ns_list[c_ns_list_len] = _ns_node_ref(NULL, NULL)
 414                          break
 415              if c_node is c_element:
 416                  # continue with attributes
 417                  c_node = <xmlNode*>c_element.properties
 418              else:
 419                  c_node = c_node.next
 420      tree.END_FOR_EACH_ELEMENT_FROM(c_element)
 421  
 422      if c_ns_list is NULL:
 423          return 0
 424  
 425      # free all namespace declarations that remained in the list,
 426      # except for those we should keep explicitly
 427      cdef xmlNs* c_nsdef
 428      for i in range(c_ns_list_len):
 429          if prefixes_to_keep is not None:
 430              if c_ns_list[i].ns.prefix and c_ns_list[i].ns.prefix in prefixes_to_keep:
 431                  continue
 432          c_node = c_ns_list[i].node
 433          c_nsdef = c_node.nsDef
 434          if c_nsdef is c_ns_list[i].ns:
 435              c_node.nsDef = c_node.nsDef.next
 436          else:
 437              while c_nsdef.next is not c_ns_list[i].ns:
 438                  c_nsdef = c_nsdef.next
 439              c_nsdef.next = c_nsdef.next.next
 440          tree.xmlFreeNs(c_ns_list[i].ns)
 441      
 442      if c_ns_list is not NULL:
 443          python.lxml_free(c_ns_list)
 444      return 0
 445  
 446  cdef xmlNs* _searchNsByHref(xmlNode* c_node, const_xmlChar* c_href, bint is_attribute):
 447      u"""Search a namespace declaration that covers a node (element or
 448      attribute).
 449  
 450      For attributes, try to find a prefixed namespace declaration
 451      instead of the default namespaces.  This helps in supporting
 452      round-trips for attributes on elements with a different namespace.
 453      """
 454      cdef xmlNs* c_ns
 455      cdef xmlNs* c_default_ns = NULL
 456      cdef xmlNode* c_element
 457      if c_href is NULL or c_node is NULL or c_node.type == tree.XML_ENTITY_REF_NODE:
 458          return NULL
 459      if tree.xmlStrcmp(c_href, tree.XML_XML_NAMESPACE) == 0:
 460          # no special cases here, let libxml2 handle this
 461          return tree.xmlSearchNsByHref(c_node.doc, c_node, c_href)
 462      if c_node.type == tree.XML_ATTRIBUTE_NODE:
 463          is_attribute = 1
 464      while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
 465          c_node = c_node.parent
 466      c_element = c_node
 467      while c_node is not NULL:
 468          if c_node.type == tree.XML_ELEMENT_NODE:
 469              c_ns = c_node.nsDef
 470              while c_ns is not NULL:
 471                  if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0:
 472                      if c_ns.prefix is NULL and is_attribute:
 473                          # for attributes, continue searching a named
 474                          # prefix, but keep the first default namespace
 475                          # declaration that we found
 476                          if c_default_ns is NULL:
 477                              c_default_ns = c_ns
 478                      elif tree.xmlSearchNs(
 479                          c_element.doc, c_element, c_ns.prefix) is c_ns:
 480                          # start node is in namespace scope => found!
 481                          return c_ns
 482                  c_ns = c_ns.next
 483              if c_node is not c_element and c_node.ns is not NULL:
 484                  # optimise: the node may have the namespace itself
 485                  c_ns = c_node.ns
 486                  if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0:
 487                      if c_ns.prefix is NULL and is_attribute:
 488                          # for attributes, continue searching a named
 489                          # prefix, but keep the first default namespace
 490                          # declaration that we found
 491                          if c_default_ns is NULL:
 492                              c_default_ns = c_ns
 493                      elif tree.xmlSearchNs(
 494                          c_element.doc, c_element, c_ns.prefix) is c_ns:
 495                          # start node is in namespace scope => found!
 496                          return c_ns
 497          c_node = c_node.parent
 498      # nothing found => use a matching default namespace or fail
 499      if c_default_ns is not NULL:
 500          if tree.xmlSearchNs(c_element.doc, c_element, NULL) is c_default_ns:
 501              return c_default_ns
 502      return NULL
 503  
 504  cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1:
 505      # NOTE: this does not deallocate the node, just unlink it!
 506      cdef xmlNode* c_parent
 507      cdef xmlNode* c_child
 508      if c_node.children is NULL:
 509          tree.xmlUnlinkNode(c_node)
 510          return 0
 511  
 512      c_parent = c_node.parent
 513      # fix parent links of children
 514      c_child = c_node.children
 515      while c_child is not NULL:
 516          c_child.parent = c_parent
 517          c_child = c_child.next
 518  
 519      # fix namespace references of children if their parent's namespace
 520      # declarations get lost
 521      if c_node.nsDef is not NULL:
 522          c_child = c_node.children
 523          while c_child is not NULL:
 524              moveNodeToDocument(doc, doc._c_doc, c_child)
 525              c_child = c_child.next
 526  
 527      # fix sibling links to/from child slice
 528      if c_node.prev is NULL:
 529          c_parent.children = c_node.children
 530      else:
 531          c_node.prev.next = c_node.children
 532          c_node.children.prev = c_node.prev
 533      if c_node.next is NULL:
 534          c_parent.last = c_node.last
 535      else:
 536          c_node.next.prev = c_node.last
 537          c_node.last.next = c_node.next
 538  
 539      # unlink c_node
 540      c_node.children = c_node.last = NULL
 541      c_node.parent = c_node.next = c_node.prev = NULL
 542      return 0
 543  
 544  cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node):
 545      c_href = _getNs(<xmlNode*>c_attrib_node)
 546      value = tree.xmlGetNsProp(c_element, c_attrib_node.name, c_href)
 547      try:
 548          result = funicode(value)
 549      finally:
 550          tree.xmlFree(value)
 551      return result
 552  
 553  cdef object _attributeValueFromNsName(xmlNode* c_element,
 554                                        const_xmlChar* c_href, const_xmlChar* c_name):
 555      c_result = tree.xmlGetNsProp(c_element, c_name, c_href)
 556      if c_result is NULL:
 557          return None
 558      try:
 559          result = funicode(c_result)
 560      finally:
 561          tree.xmlFree(c_result)
 562      return result
 563  
 564  cdef object _getNodeAttributeValue(xmlNode* c_node, key, default):
 565      ns, tag = _getNsTag(key)
 566      c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns)
 567      c_result = tree.xmlGetNsProp(c_node, _xcstr(tag), c_href)
 568      if c_result is NULL:
 569          # XXX free namespace that is not in use..?
 570          return default
 571      try:
 572          result = funicode(c_result)
 573      finally:
 574          tree.xmlFree(c_result)
 575      return result
 576  
 577  cdef inline object _getAttributeValue(_Element element, key, default):
 578      return _getNodeAttributeValue(element._c_node, key, default)
 579  
 580  cdef int _setAttributeValue(_Element element, key, value) except -1:
 581      cdef const_xmlChar* c_value
 582      cdef xmlNs* c_ns
 583      ns, tag = _getNsTag(key)
 584      is_html = element._doc._parser._for_html
 585      if not is_html:
 586          _attributeValidOrRaise(tag)
 587      c_tag = _xcstr(tag)
 588      if value is None and is_html:
 589          c_value = NULL
 590      else:
 591          if isinstance(value, QName):
 592              value = _resolveQNameText(element, value)
 593          else:
 594              value = _utf8(value)
 595          c_value = _xcstr(value)
 596      if ns is None:
 597          c_ns = NULL
 598      else:
 599          c_ns = element._doc._findOrBuildNodeNs(element._c_node, _xcstr(ns), NULL, 1)
 600      tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
 601      return 0
 602  
 603  cdef int _delAttribute(_Element element, key) except -1:
 604      ns, tag = _getNsTag(key)
 605      c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns)
 606      if _delAttributeFromNsName(element._c_node, c_href, _xcstr(tag)):
 607          raise KeyError, key
 608      return 0
 609  
 610  cdef int _delAttributeFromNsName(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
 611      c_attr = tree.xmlHasNsProp(c_node, c_name, c_href)
 612      if c_attr is NULL:
 613          # XXX free namespace that is not in use..?
 614          return -1
 615      tree.xmlRemoveProp(c_attr)
 616      return 0
 617  
 618  cdef list _collectAttributes(xmlNode* c_node, int collecttype):
 619      u"""Collect all attributes of a node in a list.  Depending on collecttype,
 620      it collects either the name (1), the value (2) or the name-value tuples.
 621      """
 622      cdef Py_ssize_t count
 623      c_attr = c_node.properties
 624      count = 0
 625      while c_attr is not NULL:
 626          if c_attr.type == tree.XML_ATTRIBUTE_NODE:
 627              count += 1
 628          c_attr = c_attr.next
 629  
 630      if not count:
 631          return []
 632  
 633      attributes = [None] * count
 634      c_attr = c_node.properties
 635      count = 0
 636      while c_attr is not NULL:
 637          if c_attr.type == tree.XML_ATTRIBUTE_NODE:
 638              if collecttype == 1:
 639                  item = _namespacedName(<xmlNode*>c_attr)
 640              elif collecttype == 2:
 641                  item = _attributeValue(c_node, c_attr)
 642              else:
 643                  item = (_namespacedName(<xmlNode*>c_attr),
 644                          _attributeValue(c_node, c_attr))
 645              attributes[count] = item
 646              count += 1
 647          c_attr = c_attr.next
 648      return attributes
 649  
 650  cdef object __RE_XML_ENCODING = re.compile(
 651      ur'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
 652  
 653  cdef object __REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub
 654  cdef object __HAS_XML_ENCODING = __RE_XML_ENCODING.match
 655  
 656  cdef object _stripEncodingDeclaration(object xml_string):
 657      # this is a hack to remove the XML encoding declaration from unicode
 658      return __REPLACE_XML_ENCODING(ur'\g<1>\g<2>', xml_string)
 659  
 660  cdef bint _hasEncodingDeclaration(object xml_string) except -1:
 661      # check if a (unicode) string has an XML encoding declaration
 662      return __HAS_XML_ENCODING(xml_string) is not None
 663  
 664  cdef inline bint _hasText(xmlNode* c_node):
 665      return c_node is not NULL and _textNodeOrSkip(c_node.children) is not NULL
 666  
 667  cdef inline bint _hasTail(xmlNode* c_node):
 668      return c_node is not NULL and _textNodeOrSkip(c_node.next) is not NULL
 669  
 670  cdef inline bint _hasNonWhitespaceTail(xmlNode* c_node):
 671      return _hasNonWhitespaceText(c_node, tail=True)
 672  
 673  cdef bint _hasNonWhitespaceText(xmlNode* c_node, bint tail=False):
 674      c_text_node = c_node and _textNodeOrSkip(c_node.next if tail else c_node.children)
 675      if c_text_node is NULL:
 676          return False
 677      while c_text_node is not NULL:
 678          if c_text_node.content[0] != c'\0' and not _collectText(c_text_node).isspace():
 679              return True
 680          c_text_node = _textNodeOrSkip(c_text_node.next)
 681      return False
 682  
 683  cdef _collectText(xmlNode* c_node):
 684      u"""Collect all text nodes and return them as a unicode string.
 685  
 686      Start collecting at c_node.
 687      
 688      If there was no text to collect, return None
 689      """
 690      cdef Py_ssize_t scount
 691      cdef xmlChar* c_text
 692      cdef xmlNode* c_node_cur
 693      # check for multiple text nodes
 694      scount = 0
 695      c_text = NULL
 696      c_node_cur = c_node = _textNodeOrSkip(c_node)
 697      while c_node_cur is not NULL:
 698          if c_node_cur.content[0] != c'\0':
 699              c_text = c_node_cur.content
 700          scount += 1
 701          c_node_cur = _textNodeOrSkip(c_node_cur.next)
 702  
 703      # handle two most common cases first
 704      if c_text is NULL:
 705          return '' if scount > 0 else None
 706      if scount == 1:
 707          return funicode(c_text)
 708  
 709      # the rest is not performance critical anymore
 710      result = b''
 711      while c_node is not NULL:
 712          result += <unsigned char*>c_node.content
 713          c_node = _textNodeOrSkip(c_node.next)
 714      return funicode(<const_xmlChar*><unsigned char*>result)
 715  
 716  cdef void _removeText(xmlNode* c_node):
 717      u"""Remove all text nodes.
 718  
 719      Start removing at c_node.
 720      """
 721      cdef xmlNode* c_next
 722      c_node = _textNodeOrSkip(c_node)
 723      while c_node is not NULL:
 724          c_next = _textNodeOrSkip(c_node.next)
 725          tree.xmlUnlinkNode(c_node)
 726          tree.xmlFreeNode(c_node)
 727          c_node = c_next
 728  
 729  cdef xmlNode* _createTextNode(xmlDoc* doc, value) except NULL:
 730      cdef xmlNode* c_text_node
 731      if isinstance(value, CDATA):
 732          c_text_node = tree.xmlNewCDataBlock(
 733              doc, _xcstr((<CDATA>value)._utf8_data),
 734              python.PyBytes_GET_SIZE((<CDATA>value)._utf8_data))
 735      else:
 736          text = _utf8(value)
 737          c_text_node = tree.xmlNewDocText(doc, _xcstr(text))
 738      if not c_text_node:
 739          raise MemoryError()
 740      return c_text_node
 741  
 742  cdef int _setNodeText(xmlNode* c_node, value) except -1:
 743      # remove all text nodes at the start first
 744      _removeText(c_node.children)
 745      if value is None:
 746          return 0
 747      # now add new text node with value at start
 748      c_text_node = _createTextNode(c_node.doc, value)
 749      if c_node.children is NULL:
 750          tree.xmlAddChild(c_node, c_text_node)
 751      else:
 752          tree.xmlAddPrevSibling(c_node.children, c_text_node)
 753      return 0
 754  
 755  cdef int _setTailText(xmlNode* c_node, value) except -1:
 756      # remove all text nodes at the start first
 757      _removeText(c_node.next)
 758      if value is None:
 759          return 0
 760      # now append new text node with value
 761      c_text_node = _createTextNode(c_node.doc, value)
 762      tree.xmlAddNextSibling(c_node, c_text_node)
 763      return 0
 764  
 765  cdef bytes _resolveQNameText(_Element element, value):
 766      cdef xmlNs* c_ns
 767      ns, tag = _getNsTag(value)
 768      if ns is None:
 769          return tag
 770      else:
 771          c_ns = element._doc._findOrBuildNodeNs(
 772              element._c_node, _xcstr(ns), NULL, 0)
 773          return python.PyBytes_FromFormat('%s:%s', c_ns.prefix, _cstr(tag))
 774  
 775  cdef inline bint _hasChild(xmlNode* c_node):
 776      return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL
 777  
 778  cdef inline Py_ssize_t _countElements(xmlNode* c_node):
 779      u"Counts the elements within the following siblings and the node itself."
 780      cdef Py_ssize_t count
 781      count = 0
 782      while c_node is not NULL:
 783          if _isElement(c_node):
 784              count += 1
 785          c_node = c_node.next
 786      return count
 787  
 788  cdef int _findChildSlice(
 789      slice sliceobject, xmlNode* c_parent,
 790      xmlNode** c_start_node, Py_ssize_t* c_step, Py_ssize_t* c_length) except -1:
 791      u"""Resolve a children slice.
 792  
 793      Returns the start node, step size and the slice length in the
 794      pointer arguments.
 795      """
 796      cdef Py_ssize_t start = 0, stop = 0, childcount
 797      childcount = _countElements(c_parent.children)
 798      if childcount == 0:
 799          c_start_node[0] = NULL
 800          c_length[0] = 0
 801          if sliceobject.step is None:
 802              c_step[0] = 1
 803          else:
 804              python._PyEval_SliceIndex(sliceobject.step, c_step)
 805          return 0
 806      python.PySlice_GetIndicesEx(
 807          sliceobject, childcount, &start, &stop, c_step, c_length)
 808      if start > childcount / 2:
 809          c_start_node[0] = _findChildBackwards(c_parent, childcount - start - 1)
 810      else:
 811          c_start_node[0] = _findChild(c_parent, start)
 812      return 0
 813  
 814  cdef bint _isFullSlice(slice sliceobject) except -1:
 815      u"""Conservative guess if this slice is a full slice as in ``s[:]``.
 816      """
 817      cdef Py_ssize_t step = 0
 818      if sliceobject is None:
 819          return 0
 820      if sliceobject.start is None and \
 821              sliceobject.stop is None:
 822          if sliceobject.step is None:
 823              return 1
 824          python._PyEval_SliceIndex(sliceobject.step, &step)
 825          if step == 1:
 826              return 1
 827          return 0
 828      return 0
 829  
 830  cdef _collectChildren(_Element element):
 831      cdef xmlNode* c_node
 832      cdef list result = []
 833      c_node = element._c_node.children
 834      if c_node is not NULL:
 835          if not _isElement(c_node):
 836              c_node = _nextElement(c_node)
 837          while c_node is not NULL:
 838              result.append(_elementFactory(element._doc, c_node))
 839              c_node = _nextElement(c_node)
 840      return result
 841  
 842  cdef inline xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index):
 843      if index < 0:
 844          return _findChildBackwards(c_node, -index - 1)
 845      else:
 846          return _findChildForwards(c_node, index)
 847      
 848  cdef inline xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index):
 849      u"""Return child element of c_node with index, or return NULL if not found.
 850      """
 851      cdef xmlNode* c_child
 852      cdef Py_ssize_t c
 853      c_child = c_node.children
 854      c = 0
 855      while c_child is not NULL:
 856          if _isElement(c_child):
 857              if c == index:
 858                  return c_child
 859              c += 1
 860          c_child = c_child.next
 861      return NULL
 862  
 863  cdef inline xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index):
 864      u"""Return child element of c_node with index, or return NULL if not found.
 865      Search from the end.
 866      """
 867      cdef xmlNode* c_child
 868      cdef Py_ssize_t c
 869      c_child = c_node.last
 870      c = 0
 871      while c_child is not NULL:
 872          if _isElement(c_child):
 873              if c == index:
 874                  return c_child
 875              c += 1
 876          c_child = c_child.prev
 877      return NULL
 878      
 879  cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) nogil:
 880      u"""Return the node if it's a text node.  Skip over ignorable nodes in a
 881      series of text nodes.  Return NULL if a non-ignorable node is found.
 882  
 883      This is used to skip over XInclude nodes when collecting adjacent text
 884      nodes.
 885      """
 886      while c_node is not NULL:
 887          if c_node.type == tree.XML_TEXT_NODE or \
 888                 c_node.type == tree.XML_CDATA_SECTION_NODE:
 889              return c_node
 890          elif c_node.type == tree.XML_XINCLUDE_START or \
 891                   c_node.type == tree.XML_XINCLUDE_END:
 892              c_node = c_node.next
 893          else:
 894              return NULL
 895      return NULL
 896  
 897  cdef inline xmlNode* _nextElement(xmlNode* c_node):
 898      u"""Given a node, find the next sibling that is an element.
 899      """
 900      if c_node is NULL:
 901          return NULL
 902      c_node = c_node.next
 903      while c_node is not NULL:
 904          if _isElement(c_node):
 905              return c_node
 906          c_node = c_node.next
 907      return NULL
 908  
 909  cdef inline xmlNode* _previousElement(xmlNode* c_node):
 910      u"""Given a node, find the next sibling that is an element.
 911      """
 912      if c_node is NULL:
 913          return NULL
 914      c_node = c_node.prev
 915      while c_node is not NULL:
 916          if _isElement(c_node):
 917              return c_node
 918          c_node = c_node.prev
 919      return NULL
 920  
 921  cdef inline xmlNode* _parentElement(xmlNode* c_node):
 922      u"Given a node, find the parent element."
 923      if c_node is NULL or not _isElement(c_node):
 924          return NULL
 925      c_node = c_node.parent
 926      if c_node is NULL or not _isElement(c_node):
 927          return NULL
 928      return c_node
 929  
 930  cdef inline bint _tagMatches(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
 931      u"""Tests if the node matches namespace URI and tag name.
 932  
 933      A node matches if it matches both c_href and c_name.
 934  
 935      A node matches c_href if any of the following is true:
 936      * c_href is NULL
 937      * its namespace is NULL and c_href is the empty string
 938      * its namespace string equals the c_href string
 939  
 940      A node matches c_name if any of the following is true:
 941      * c_name is NULL
 942      * its name string equals the c_name string
 943      """
 944      if c_node is NULL:
 945          return 0
 946      if c_node.type != tree.XML_ELEMENT_NODE:
 947          # not an element, only succeed if we match everything
 948          return c_name is NULL and c_href is NULL
 949      if c_name is NULL:
 950          if c_href is NULL:
 951              # always match
 952              return 1
 953          else:
 954              c_node_href = _getNs(c_node)
 955              if c_node_href is NULL:
 956                  return c_href[0] == c'\0'
 957              else:
 958                  return tree.xmlStrcmp(c_node_href, c_href) == 0
 959      elif c_href is NULL:
 960          if _getNs(c_node) is not NULL:
 961              return 0
 962          return c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0
 963      elif c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0:
 964          c_node_href = _getNs(c_node)
 965          if c_node_href is NULL:
 966              return c_href[0] == c'\0'
 967          else:
 968              return tree.xmlStrcmp(c_node_href, c_href) == 0
 969      else:
 970          return 0
 971  
 972  cdef inline bint _tagMatchesExactly(xmlNode* c_node, qname* c_qname):
 973      u"""Tests if the node matches namespace URI and tag name.
 974  
 975      This differs from _tagMatches() in that it does not consider a
 976      NULL value in qname.href a wildcard, and that it expects the c_name
 977      to be taken from the doc dict, i.e. it only compares the names by
 978      address.
 979  
 980      A node matches if it matches both href and c_name of the qname.
 981  
 982      A node matches c_href if any of the following is true:
 983      * its namespace is NULL and c_href is the empty string
 984      * its namespace string equals the c_href string
 985  
 986      A node matches c_name if any of the following is true:
 987      * c_name is NULL
 988      * its name string points to the same address (!) as c_name
 989      """
 990      return _nsTagMatchesExactly(_getNs(c_node), c_node.name, c_qname)
 991  
 992  cdef inline bint _nsTagMatchesExactly(const_xmlChar* c_node_href,
 993                                        const_xmlChar* c_node_name,
 994                                        qname* c_qname):
 995      u"""Tests if name and namespace URI match those of c_qname.
 996  
 997      This differs from _tagMatches() in that it does not consider a
 998      NULL value in qname.href a wildcard, and that it expects the c_name
 999      to be taken from the doc dict, i.e. it only compares the names by
1000      address.
1001  
1002      A node matches if it matches both href and c_name of the qname.
1003  
1004      A node matches c_href if any of the following is true:
1005      * its namespace is NULL and c_href is the empty string
1006      * its namespace string equals the c_href string
1007  
1008      A node matches c_name if any of the following is true:
1009      * c_name is NULL
1010      * its name string points to the same address (!) as c_name
1011      """
1012      cdef char* c_href
1013      if c_qname.c_name is not NULL and c_qname.c_name is not c_node_name:
1014          return 0
1015      if c_qname.href is NULL:
1016          return 1
1017      c_href = python.__cstr(c_qname.href)
1018      if c_href[0] == '\0':
1019          return c_node_href is NULL or c_node_href[0] == '\0'
1020      elif c_node_href is NULL:
1021          return 0
1022      else:
1023          return tree.xmlStrcmp(<const_xmlChar*>c_href, c_node_href) == 0
1024  
1025  cdef Py_ssize_t _mapTagsToQnameMatchArray(xmlDoc* c_doc, list ns_tags,
1026                                            qname* c_ns_tags, bint force_into_dict) except -1:
1027      u"""Map a sequence of (name, namespace) pairs to a qname array for efficient
1028      matching with _tagMatchesExactly() above.
1029  
1030      Note that each qname struct in the array owns its href byte string object
1031      if it is not NULL.
1032      """
1033      cdef Py_ssize_t count = 0, i
1034      cdef bytes ns, tag
1035      for ns, tag in ns_tags:
1036          if tag is None:
1037              c_tag = <const_xmlChar*>NULL
1038          elif force_into_dict:
1039              c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), len(tag))
1040              if c_tag is NULL:
1041                  # clean up before raising the error
1042                  for i in xrange(count):
1043                      cpython.ref.Py_XDECREF(c_ns_tags[i].href)
1044                  raise MemoryError()
1045          else:
1046              c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), len(tag))
1047              if c_tag is NULL:
1048                  # not in the dict => not in the document
1049                  continue
1050          c_ns_tags[count].c_name = c_tag
1051          if ns is None:
1052              c_ns_tags[count].href = NULL
1053          else:
1054              cpython.ref.Py_INCREF(ns) # keep an owned reference!
1055              c_ns_tags[count].href = <python.PyObject*>ns
1056          count += 1
1057      return count
1058  
1059  cdef int _removeNode(_Document doc, xmlNode* c_node) except -1:
1060      u"""Unlink and free a node and subnodes if possible.  Otherwise, make sure
1061      it's self-contained.
1062      """
1063      cdef xmlNode* c_next
1064      c_next = c_node.next
1065      tree.xmlUnlinkNode(c_node)
1066      _moveTail(c_next, c_node)
1067      if not attemptDeallocation(c_node):
1068          # make namespaces absolute
1069          moveNodeToDocument(doc, c_node.doc, c_node)
1070      return 0
1071  
1072  cdef int _removeSiblings(xmlNode* c_element, tree.xmlElementType node_type, bint with_tail) except -1:
1073      cdef xmlNode* c_node
1074      cdef xmlNode* c_next
1075      c_node = c_element.next
1076      while c_node is not NULL:
1077          c_next = _nextElement(c_node)
1078          if c_node.type == node_type:
1079              if with_tail:
1080                  _removeText(c_node.next)
1081              tree.xmlUnlinkNode(c_node)
1082              attemptDeallocation(c_node)
1083          c_node = c_next
1084      c_node = c_element.prev
1085      while c_node is not NULL:
1086          c_next = _previousElement(c_node)
1087          if c_node.type == node_type:
1088              if with_tail:
1089                  _removeText(c_node.next)
1090              tree.xmlUnlinkNode(c_node)
1091              attemptDeallocation(c_node)
1092          c_node = c_next
1093      return 0
1094  
1095  cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target):
1096      cdef xmlNode* c_next
1097      # tail support: look for any text nodes trailing this node and 
1098      # move them too
1099      c_tail = _textNodeOrSkip(c_tail)
1100      while c_tail is not NULL:
1101          c_next = _textNodeOrSkip(c_tail.next)
1102          c_target = tree.xmlAddNextSibling(c_target, c_tail)
1103          c_tail = c_next
1104  
1105  cdef int _copyTail(xmlNode* c_tail, xmlNode* c_target) except -1:
1106      cdef xmlNode* c_new_tail
1107      # tail copying support: look for any text nodes trailing this node and
1108      # copy it to the target node
1109      c_tail = _textNodeOrSkip(c_tail)
1110      while c_tail is not NULL:
1111          if c_target.doc is not c_tail.doc:
1112              c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0)
1113          else:
1114              c_new_tail = tree.xmlCopyNode(c_tail, 0)
1115          if c_new_tail is NULL:
1116              raise MemoryError()
1117          c_target = tree.xmlAddNextSibling(c_target, c_new_tail)
1118          c_tail = _textNodeOrSkip(c_tail.next)
1119      return 0
1120  
1121  cdef int _copyNonElementSiblings(xmlNode* c_node, xmlNode* c_target) except -1:
1122      cdef xmlNode* c_copy
1123      cdef xmlNode* c_sibling = c_node
1124      while c_sibling.prev != NULL and \
1125              (c_sibling.prev.type == tree.XML_PI_NODE or
1126               c_sibling.prev.type == tree.XML_COMMENT_NODE or
1127               c_sibling.prev.type == tree.XML_DTD_NODE):
1128          c_sibling = c_sibling.prev
1129      while c_sibling != c_node:
1130          if c_sibling.type == tree.XML_DTD_NODE:
1131              c_copy = <xmlNode*>_copyDtd(<tree.xmlDtd*>c_sibling)
1132              if c_sibling == <xmlNode*>c_node.doc.intSubset:
1133                  c_target.doc.intSubset = <tree.xmlDtd*>c_copy
1134              else: # c_sibling == c_node.doc.extSubset
1135                  c_target.doc.extSubset = <tree.xmlDtd*>c_copy
1136          else:
1137              c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
1138              if c_copy is NULL:
1139                  raise MemoryError()
1140          tree.xmlAddPrevSibling(c_target, c_copy)
1141          c_sibling = c_sibling.next
1142      while c_sibling.next != NULL and \
1143              (c_sibling.next.type == tree.XML_PI_NODE or
1144               c_sibling.next.type == tree.XML_COMMENT_NODE):
1145          c_sibling = c_sibling.next
1146          c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
1147          if c_copy is NULL:
1148              raise MemoryError()
1149          tree.xmlAddNextSibling(c_target, c_copy)
1150  
1151  cdef int _deleteSlice(_Document doc, xmlNode* c_node,
1152                        Py_ssize_t count, Py_ssize_t step) except -1:
1153      u"""Delete slice, ``count`` items starting with ``c_node`` with a step
1154      width of ``step``.
1155      """
1156      cdef xmlNode* c_next
1157      cdef Py_ssize_t c, i
1158      cdef _node_to_node_function next_element
1159      if c_node is NULL:
1160          return 0
1161      if step > 0:
1162          next_element = _nextElement
1163      else:
1164          step = -step
1165          next_element = _previousElement
1166      # now start deleting nodes
1167      c = 0
1168      c_next = c_node
1169      while c_node is not NULL and c < count:
1170          for i in range(step):
1171              c_next = next_element(c_next)
1172              if c_next is NULL:
1173                  break
1174          _removeNode(doc, c_node)
1175          c += 1
1176          c_node = c_next
1177      return 0
1178  
1179  cdef int _replaceSlice(_Element parent, xmlNode* c_node,
1180                         Py_ssize_t slicelength, Py_ssize_t step,
1181                         bint left_to_right, elements) except -1:
1182      u"""Replace the slice of ``count`` elements starting at ``c_node`` with
1183      positive step width ``step`` by the Elements in ``elements``.  The
1184      direction is given by the boolean argument ``left_to_right``.
1185  
1186      ``c_node`` may be NULL to indicate the end of the children list.
1187      """
1188      cdef xmlNode* c_orig_neighbour
1189      cdef xmlNode* c_next
1190      cdef xmlDoc*  c_source_doc
1191      cdef _Element element
1192      cdef Py_ssize_t seqlength, i, c
1193      cdef _node_to_node_function next_element
1194      assert step > 0
1195      if left_to_right:
1196          next_element = _nextElement
1197      else:
1198          next_element = _previousElement
1199  
1200      if not isinstance(elements, (list, tuple)):
1201          elements = list(elements)
1202  
1203      if step != 1 or not left_to_right:
1204          # *replacing* children stepwise with list => check size!
1205          seqlength = len(elements)
1206          if seqlength != slicelength:
1207              raise ValueError, f"attempt to assign sequence of size {seqlength} " \
1208                  f"to extended slice of size {slicelength}"
1209  
1210      if c_node is NULL:
1211          # no children yet => add all elements straight away
1212          if left_to_right:
1213              for element in elements:
1214                  assert element is not None, u"Node must not be None"
1215                  _appendChild(parent, element)
1216          else:
1217              for element in elements:
1218                  assert element is not None, u"Node must not be None"
1219                  _prependChild(parent, element)
1220          return 0
1221  
1222      # remove the elements first as some might be re-added
1223      if left_to_right:
1224          # L->R, remember left neighbour
1225          c_orig_neighbour = _previousElement(c_node)
1226      else:
1227          # R->L, remember right neighbour
1228          c_orig_neighbour = _nextElement(c_node)
1229  
1230      # We remove the original slice elements one by one. Since we hold
1231      # a Python reference to all elements that we will insert, it is
1232      # safe to let _removeNode() try (and fail) to free them even if
1233      # the element itself or one of its descendents will be reinserted.
1234      c = 0
1235      c_next = c_node
1236      while c_node is not NULL and c < slicelength:
1237          for i in range(step):
1238              c_next = next_element(c_next)
1239              if c_next is NULL:
1240                  break
1241          _removeNode(parent._doc, c_node)
1242          c += 1
1243          c_node = c_next
1244  
1245      # make sure each element is inserted only once
1246      elements = iter(elements)
1247  
1248      # find the first node right of the new insertion point
1249      if left_to_right:
1250          if c_orig_neighbour is not NULL:
1251              c_node = next_element(c_orig_neighbour)
1252          else:
1253              # before the first element
1254              c_node = _findChildForwards(parent._c_node, 0)
1255      elif c_orig_neighbour is NULL:
1256          # at the end, but reversed stepping
1257          # append one element and go to the next insertion point
1258          for element in elements:
1259              assert element is not None, u"Node must not be None"
1260              _appendChild(parent, element)
1261              c_node = element._c_node
1262              if slicelength > 0:
1263                  slicelength -= 1
1264                  for i in range(1, step):
1265                      c_node = next_element(c_node)
1266                      if c_node is NULL:
1267                          break
1268              break
1269      else:
1270          c_node = c_orig_neighbour
1271  
1272      if left_to_right:
1273          # adjust step size after removing slice as we are not stepping
1274          # over the newly inserted elements
1275          step -= 1
1276  
1277      # now insert elements where we removed them
1278      if c_node is not NULL:
1279          for element in elements:
1280              assert element is not None, u"Node must not be None"
1281              _assertValidNode(element)
1282              # move element and tail over
1283              c_source_doc = element._c_node.doc
1284              c_next = element._c_node.next
1285              tree.xmlAddPrevSibling(c_node, element._c_node)
1286              _moveTail(c_next, element._c_node)
1287  
1288              # integrate element into new document
1289              moveNodeToDocument(parent._doc, c_source_doc, element._c_node)
1290  
1291              # stop at the end of the slice
1292              if slicelength > 0:
1293                  slicelength -= 1
1294                  for i in range(step):
1295                      c_node = next_element(c_node)
1296                      if c_node is NULL:
1297                          break
1298                  if c_node is NULL:
1299                      break
1300          else:
1301              # everything inserted
1302              return 0
1303  
1304      # append the remaining elements at the respective end
1305      if left_to_right:
1306          for element in elements:
1307              assert element is not None, u"Node must not be None"
1308              _assertValidNode(element)
1309              _appendChild(parent, element)
1310      else:
1311          for element in elements:
1312              assert element is not None, u"Node must not be None"
1313              _assertValidNode(element)
1314              _prependChild(parent, element)
1315  
1316      return 0
1317  
1318  
1319  cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1:
1320      """Adaptation of 'xmlAddChild()' that deep-fix the document links iteratively.
1321      """
1322      assert _isElement(c_node)
1323      c_node.parent = c_parent
1324      if c_parent.children is NULL:
1325          c_parent.children = c_parent.last = c_node
1326      else:
1327          c_node.prev = c_parent.last
1328          c_parent.last.next = c_node
1329          c_parent.last = c_node
1330  
1331      _setTreeDoc(c_node, c_parent.doc)
1332      return 0
1333  
1334  
1335  cdef int _appendChild(_Element parent, _Element child) except -1:
1336      u"""Append a new child to a parent element.
1337      """
1338      c_node = child._c_node
1339      c_source_doc = c_node.doc
1340      # prevent cycles
1341      if _isAncestorOrSame(c_node, parent._c_node):
1342          raise ValueError("cannot append parent to itself")
1343      # store possible text node
1344      c_next = c_node.next
1345      # move node itself
1346      tree.xmlUnlinkNode(c_node)
1347      # do not call xmlAddChild() here since it would deep-traverse the tree
1348      _linkChild(parent._c_node, c_node)
1349      _moveTail(c_next, c_node)
1350      # uh oh, elements may be pointing to different doc when
1351      # parent element has moved; change them too..
1352      moveNodeToDocument(parent._doc, c_source_doc, c_node)
1353      return 0
1354  
1355  cdef int _prependChild(_Element parent, _Element child) except -1:
1356      u"""Prepend a new child to a parent element.
1357      """
1358      c_node = child._c_node
1359      c_source_doc = c_node.doc
1360      # prevent cycles
1361      if _isAncestorOrSame(c_node, parent._c_node):
1362          raise ValueError("cannot append parent to itself")
1363      # store possible text node
1364      c_next = c_node.next
1365      # move node itself
1366      c_child = _findChildForwards(parent._c_node, 0)
1367      if c_child is NULL:
1368          tree.xmlUnlinkNode(c_node)
1369          # do not call xmlAddChild() here since it would deep-traverse the tree
1370          _linkChild(parent._c_node, c_node)
1371      else:
1372          tree.xmlAddPrevSibling(c_child, c_node)
1373      _moveTail(c_next, c_node)
1374      # uh oh, elements may be pointing to different doc when
1375      # parent element has moved; change them too..
1376      moveNodeToDocument(parent._doc, c_source_doc, c_node)
1377      return 0
1378  
1379  cdef int _appendSibling(_Element element, _Element sibling) except -1:
1380      u"""Add a new sibling behind an element.
1381      """
1382      return _addSibling(element, sibling, as_next=True)
1383  
1384  cdef int _prependSibling(_Element element, _Element sibling) except -1:
1385      u"""Add a new sibling before an element.
1386      """
1387      return _addSibling(element, sibling, as_next=False)
1388  
1389  cdef int _addSibling(_Element element, _Element sibling, bint as_next) except -1:
1390      c_node = sibling._c_node
1391      c_source_doc = c_node.doc
1392      # prevent cycles
1393      if _isAncestorOrSame(c_node, element._c_node):
1394          if element._c_node is c_node:
1395              return 0  # nothing to do
1396          raise ValueError("cannot add ancestor as sibling, please break cycle first")
1397      # store possible text node
1398      c_next = c_node.next
1399      # move node itself
1400      if as_next:
1401          tree.xmlAddNextSibling(element._c_node, c_node)
1402      else:
1403          tree.xmlAddPrevSibling(element._c_node, c_node)
1404      _moveTail(c_next, c_node)
1405      # uh oh, elements may be pointing to different doc when
1406      # parent element has moved; change them too..
1407      moveNodeToDocument(element._doc, c_source_doc, c_node)
1408      return 0
1409  
1410  cdef inline bint isutf8(const_xmlChar* s):
1411      cdef xmlChar c = s[0]
1412      while c != c'\0':
1413          if c & 0x80:
1414              return True
1415          s += 1
1416          c = s[0]
1417      return False
1418  
1419  cdef bint isutf8l(const_xmlChar* s, size_t length):
1420      """
1421      Search for non-ASCII characters in the string, knowing its length in advance.
1422      """
1423      cdef unsigned int i
1424      cdef unsigned long non_ascii_mask
1425      cdef const unsigned long *lptr = <const unsigned long*> s
1426  
1427      cdef const unsigned long *end = lptr + length // sizeof(unsigned long)
1428      if length >= sizeof(non_ascii_mask):
1429          # Build constant 0x80808080... mask (and let the C compiler fold it).
1430          non_ascii_mask = 0
1431          for i in range(sizeof(non_ascii_mask) // 2):
1432              non_ascii_mask = (non_ascii_mask << 16) | 0x8080
1433  
1434          # Advance to long-aligned character before we start reading longs.
1435          while (<size_t>s) % sizeof(unsigned long) and s < <const_xmlChar *>end:
1436              if s[0] & 0x80:
1437                  return True
1438              s += 1
1439  
1440          # Read one long at a time
1441          lptr = <const unsigned long*> s
1442          while lptr < end:
1443              if lptr[0] & non_ascii_mask:
1444                  return True
1445              lptr += 1
1446          s = <const_xmlChar *>lptr
1447  
1448      while s < (<const_xmlChar *>end + length % sizeof(unsigned long)):
1449          if s[0] & 0x80:
1450              return True
1451          s += 1
1452  
1453      return False
1454  
1455  cdef int _is_valid_xml_ascii(bytes pystring):
1456      """Check if a string is XML ascii content."""
1457      cdef signed char ch
1458      # When ch is a *signed* char, non-ascii characters are negative integers
1459      # and xmlIsChar_ch does not accept them.
1460      for ch in pystring:
1461          if not tree.xmlIsChar_ch(ch):
1462              return 0
1463      return 1
1464  
1465  cdef bint _is_valid_xml_utf8(bytes pystring):
1466      u"""Check if a string is like valid UTF-8 XML content."""
1467      cdef const_xmlChar* s = _xcstr(pystring)
1468      cdef const_xmlChar* c_end = s + len(pystring)
1469      cdef unsigned long next3 = 0
1470      if s < c_end - 2:
1471          next3 = (s[0] << 8) | (s[1])
1472  
1473      while s < c_end - 2:
1474          next3 = 0x00ffffff & ((next3 << 8) | s[2])
1475          if s[0] & 0x80:
1476              # 0xefbfbe and 0xefbfbf are utf-8 encodings of
1477              # forbidden characters \ufffe and \uffff
1478              if next3 == 0x00efbfbe or next3 == 0x00efbfbf:
1479                  return 0
1480              # 0xeda080 and 0xedbfbf are utf-8 encodings of
1481              # \ud800 and \udfff. Anything between them (inclusive)
1482              # is forbidden, because they are surrogate blocks in utf-16.
1483              if 0x00eda080 <= next3 <= 0x00edbfbf:
1484                  return 0
1485          elif not tree.xmlIsChar_ch(s[0]):
1486              return 0  # invalid ascii char
1487          s += 1
1488  
1489      while s < c_end:
1490          if not s[0] & 0x80 and not tree.xmlIsChar_ch(s[0]):
1491              return 0  # invalid ascii char
1492          s += 1
1493  
1494      return 1
1495  
1496  cdef inline object funicodeOrNone(const_xmlChar* s):
1497      return funicode(s) if s is not NULL else None
1498  
1499  cdef inline object funicodeOrEmpty(const_xmlChar* s):
1500      return funicode(s) if s is not NULL else ''
1501  
1502  cdef object funicode(const_xmlChar* s):
1503      cdef Py_ssize_t slen
1504      cdef const_xmlChar* spos
1505      cdef bint is_non_ascii
1506      if python.LXML_UNICODE_STRINGS:
1507          return s.decode('UTF-8')
1508      spos = s
1509      is_non_ascii = 0
1510      while spos[0] != c'\0':
1511          if spos[0] & 0x80:
1512              is_non_ascii = 1
1513              break
1514          spos += 1
1515      slen = spos - s
1516      if spos[0] != c'\0':
1517          slen += cstring_h.strlen(<const char*> spos)
1518      if is_non_ascii:
1519          return s[:slen].decode('UTF-8')
1520      return <bytes>s[:slen]
1521  
1522  cdef bytes _utf8(object s):
1523      """Test if a string is valid user input and encode it to UTF-8.
1524      Reject all bytes/unicode input that contains non-XML characters.
1525      Reject all bytes input that contains non-ASCII characters.
1526      """
1527      cdef int valid
1528      cdef bytes utf8_string
1529      if python.IS_PYTHON2 and type(s) is bytes:
1530          utf8_string = <bytes>s
1531          valid = _is_valid_xml_ascii(utf8_string)
1532      elif isinstance(s, unicode):
1533          utf8_string = (<unicode>s).encode('utf8')
1534          valid = _is_valid_xml_utf8(utf8_string)
1535      elif isinstance(s, (bytes, bytearray)):
1536          utf8_string = bytes(s)
1537          valid = _is_valid_xml_ascii(utf8_string)
1538      else:
1539          raise TypeError("Argument must be bytes or unicode, got '%.200s'" % type(s).__name__)
1540      if not valid:
1541          raise ValueError(
1542              "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters")
1543      return utf8_string
1544  
1545  
1546  cdef bytes _utf8orNone(object s):
1547      return _utf8(s) if s is not None else None
1548  
1549  
1550  cdef strrepr(s):
1551      """Build a representation of strings which we can use in __repr__
1552      methods, e.g. _Element.__repr__().
1553      """
1554      return s.encode('unicode-escape') if python.IS_PYTHON2 else s
1555  
1556  
1557  cdef enum:
1558      NO_FILE_PATH = 0
1559      ABS_UNIX_FILE_PATH = 1
1560      ABS_WIN_FILE_PATH = 2
1561      REL_FILE_PATH = 3
1562  
1563  
1564  cdef bint _isFilePath(const_xmlChar* c_path):
1565      u"simple heuristic to see if a path is a filename"
1566      cdef xmlChar c
1567      # test if it looks like an absolute Unix path or a Windows network path
1568      if c_path[0] == c'/':
1569          return ABS_UNIX_FILE_PATH
1570  
1571      # test if it looks like an absolute Windows path or URL
1572      if c'a' <= c_path[0] <= c'z' or c'A' <= c_path[0] <= c'Z':
1573          c_path += 1
1574          if c_path[0] == c':' and c_path[1] in b'\0\\':
1575              return ABS_WIN_FILE_PATH  # C: or C:\...
1576  
1577          # test if it looks like a URL with scheme://
1578          while c'a' <= c_path[0] <= c'z' or c'A' <= c_path[0] <= c'Z':
1579              c_path += 1
1580          if c_path[0] == c':' and c_path[1] == c'/' and c_path[2] == c'/':
1581              return NO_FILE_PATH
1582  
1583      # assume it's a relative path
1584      return REL_FILE_PATH
1585  
1586  cdef object _NO_FSPATH = object()
1587  
1588  cdef object _getFSPathOrObject(object obj):
1589      """
1590      Get the __fspath__ attribute of an object if it exists.
1591      Otherwise, the original object is returned.
1592      """
1593      if _isString(obj):
1594          return obj
1595      if python.PY_VERSION_HEX >= 0x03060000:
1596          try:
1597              return python.PY_FSPath(obj)
1598          except TypeError:
1599              return obj
1600      fspath = getattr(obj, '__fspath__', _NO_FSPATH)
1601      if fspath is not _NO_FSPATH and callable(fspath):
1602          return fspath()
1603      return obj
1604  
1605  cdef object _encodeFilename(object filename):
1606      u"""Make sure a filename is 8-bit encoded (or None).
1607      """
1608      if filename is None:
1609          return None
1610      elif isinstance(filename, bytes):
1611          return filename
1612      elif isinstance(filename, unicode):
1613          filename8 = (<unicode>filename).encode('utf8')
1614          if _isFilePath(<unsigned char*>filename8):
1615              try:
1616                  return python.PyUnicode_AsEncodedString(
1617                      filename, _C_FILENAME_ENCODING, NULL)
1618              except UnicodeEncodeError:
1619                  pass
1620          return filename8
1621      else:
1622          raise TypeError("Argument must be string or unicode.")
1623  
1624  cdef object _decodeFilename(const_xmlChar* c_path):
1625      u"""Make the filename a unicode string if we are in Py3.
1626      """
1627      return _decodeFilenameWithLength(c_path, tree.xmlStrlen(c_path))
1628  
1629  cdef object _decodeFilenameWithLength(const_xmlChar* c_path, size_t c_len):
1630      u"""Make the filename a unicode string if we are in Py3.
1631      """
1632      if _isFilePath(c_path):
1633          try:
1634              return python.PyUnicode_Decode(
1635                  <const_char*>c_path, c_len, _C_FILENAME_ENCODING, NULL)
1636          except UnicodeDecodeError:
1637              pass
1638      try:
1639          return (<unsigned char*>c_path)[:c_len].decode('UTF-8')
1640      except UnicodeDecodeError:
1641          # this is a stupid fallback, but it might still work...
1642          return (<unsigned char*>c_path)[:c_len].decode('latin-1', 'replace')
1643  
1644  cdef object _encodeFilenameUTF8(object filename):
1645      u"""Recode filename as UTF-8. Tries ASCII, local filesystem encoding and
1646      UTF-8 as source encoding.
1647      """
1648      cdef char* c_filename
1649      if filename is None:
1650          return None
1651      elif isinstance(filename, bytes):
1652          if not isutf8l(<bytes>filename, len(<bytes>filename)):
1653              # plain ASCII!
1654              return filename
1655          c_filename = _cstr(<bytes>filename)
1656          try:
1657              # try to decode with default encoding
1658              filename = python.PyUnicode_Decode(
1659                  c_filename, len(<bytes>filename),
1660                  _C_FILENAME_ENCODING, NULL)
1661          except UnicodeDecodeError as decode_exc:
1662              try:
1663                  # try if it's proper UTF-8
1664                  (<bytes>filename).decode('utf8')
1665                  return filename
1666              except UnicodeDecodeError:
1667                  raise decode_exc # otherwise re-raise original exception
1668      if isinstance(filename, unicode):
1669          return (<unicode>filename).encode('utf8')
1670      else:
1671          raise TypeError("Argument must be string or unicode.")
1672  
1673  cdef tuple _getNsTag(tag):
1674      u"""Given a tag, find namespace URI and tag name.
1675      Return None for NS uri if no namespace URI provided.
1676      """
1677      return __getNsTag(tag, 0)
1678  
1679  cdef tuple _getNsTagWithEmptyNs(tag):
1680      u"""Given a tag, find namespace URI and tag name.  Return None for NS uri
1681      if no namespace URI provided, or the empty string if namespace
1682      part is '{}'.
1683      """
1684      return __getNsTag(tag, 1)
1685  
1686  cdef tuple __getNsTag(tag, bint empty_ns):
1687      cdef char* c_tag
1688      cdef char* c_ns_end
1689      cdef Py_ssize_t taglen
1690      cdef Py_ssize_t nslen
1691      cdef bytes ns = None
1692      # _isString() is much faster than isinstance()
1693      if not _isString(tag) and isinstance(tag, QName):
1694          tag = (<QName>tag).text
1695      tag = _utf8(tag)
1696      c_tag = _cstr(tag)
1697      if c_tag[0] == c'{':
1698          c_tag += 1
1699          c_ns_end = cstring_h.strchr(c_tag, c'}')
1700          if c_ns_end is NULL:
1701              raise ValueError, u"Invalid tag name"
1702          nslen  = c_ns_end - c_tag
1703          taglen = python.PyBytes_GET_SIZE(tag) - nslen - 2
1704          if taglen == 0:
1705              raise ValueError, u"Empty tag name"
1706          if nslen > 0:
1707              ns = <bytes>c_tag[:nslen]
1708          elif empty_ns:
1709              ns = b''
1710          tag = <bytes>c_ns_end[1:taglen+1]
1711      elif python.PyBytes_GET_SIZE(tag) == 0:
1712          raise ValueError, u"Empty tag name"
1713      return ns, tag
1714  
1715  cdef inline int _pyXmlNameIsValid(name_utf8):
1716      return _xmlNameIsValid(_xcstr(name_utf8)) and b':' not in name_utf8
1717  
1718  cdef inline int _pyHtmlNameIsValid(name_utf8):
1719      return _htmlNameIsValid(_xcstr(name_utf8))
1720  
1721  cdef inline int _xmlNameIsValid(const_xmlChar* c_name):
1722      return tree.xmlValidateNameValue(c_name)
1723  
1724  cdef int _htmlNameIsValid(const_xmlChar* c_name):
1725      if c_name is NULL or c_name[0] == c'\0':
1726          return 0
1727      while c_name[0] != c'\0':
1728          if c_name[0] in b'&<>/"\'\t\n\x0B\x0C\r ':
1729              return 0
1730          c_name += 1
1731      return 1
1732  
1733  cdef bint _characterReferenceIsValid(const_xmlChar* c_name):
1734      cdef bint is_hex
1735      if c_name[0] == c'x':
1736          c_name += 1
1737          is_hex = 1
1738      else:
1739          is_hex = 0
1740      if c_name[0] == c'\0':
1741          return 0
1742      while c_name[0] != c'\0':
1743          if c_name[0] < c'0' or c_name[0] > c'9':
1744              if not is_hex:
1745                  return 0
1746              if not (c'a' <= c_name[0] <= c'f'):
1747                  if not (c'A' <= c_name[0] <= c'F'):
1748                      return 0
1749          c_name += 1
1750      return 1
1751  
1752  cdef int _tagValidOrRaise(tag_utf) except -1:
1753      if not _pyXmlNameIsValid(tag_utf):
1754          raise ValueError(f"Invalid tag name {(<bytes>tag_utf).decode('utf8')!r}")
1755      return 0
1756  
1757  cdef int _htmlTagValidOrRaise(tag_utf) except -1:
1758      if not _pyHtmlNameIsValid(tag_utf):
1759          raise ValueError(f"Invalid HTML tag name {(<bytes>tag_utf).decode('utf8')!r}")
1760      return 0
1761  
1762  cdef int _attributeValidOrRaise(name_utf) except -1:
1763      if not _pyXmlNameIsValid(name_utf):
1764          raise ValueError(f"Invalid attribute name {(<bytes>name_utf).decode('utf8')!r}")
1765      return 0
1766  
1767  cdef int _prefixValidOrRaise(tag_utf) except -1:
1768      if not _pyXmlNameIsValid(tag_utf):
1769          raise ValueError(f"Invalid namespace prefix {(<bytes>tag_utf).decode('utf8')!r}")
1770      return 0
1771  
1772  cdef int _uriValidOrRaise(uri_utf) except -1:
1773      cdef uri.xmlURI* c_uri = uri.xmlParseURI(_cstr(uri_utf))
1774      if c_uri is NULL:
1775          raise ValueError(f"Invalid namespace URI {(<bytes>uri_utf).decode('utf8')!r}")
1776      uri.xmlFreeURI(c_uri)
1777      return 0
1778  
1779  cdef inline object _namespacedName(xmlNode* c_node):
1780      return _namespacedNameFromNsName(_getNs(c_node), c_node.name)
1781  
1782  cdef object _namespacedNameFromNsName(const_xmlChar* href, const_xmlChar* name):
1783      if href is NULL:
1784          return funicode(name)
1785      elif not python.IS_PYPY and (python.LXML_UNICODE_STRINGS or isutf8(name) or isutf8(href)):
1786          return python.PyUnicode_FromFormat("{%s}%s", href, name)
1787      else:
1788          s = python.PyBytes_FromFormat("{%s}%s", href, name)
1789          if python.IS_PYPY and (python.LXML_UNICODE_STRINGS or isutf8l(s, len(s))):
1790              return (<bytes>s).decode('utf8')
1791          else:
1792              return s
1793  
1794  cdef _getFilenameForFile(source):
1795      u"""Given a Python File or Gzip object, give filename back.
1796  
1797      Returns None if not a file object.
1798      """
1799      # urllib2 provides a geturl() method
1800      try:
1801          return source.geturl()
1802      except:
1803          pass
1804      # file instances have a name attribute
1805      try:
1806          filename = source.name
1807          if _isString(filename):
1808              return os_path_abspath(filename)
1809      except:
1810          pass
1811      # gzip file instances have a filename attribute (before Py3k)
1812      try:
1813          filename = source.filename
1814          if _isString(filename):
1815              return os_path_abspath(filename)
1816      except:
1817          pass
1818      # can't determine filename
1819      return None