/ lib / lxml / proxy.pxi
proxy.pxi
  1  # Proxy functions and low level node allocation stuff
  2  
  3  # Proxies represent elements, their reference is stored in the C
  4  # structure of the respective node to avoid multiple instantiation of
  5  # the Python class.
  6  
  7  @cython.linetrace(False)
  8  @cython.profile(False)
  9  cdef inline _Element getProxy(xmlNode* c_node):
 10      u"""Get a proxy for a given node.
 11      """
 12      #print "getProxy for:", <int>c_node
 13      if c_node is not NULL and c_node._private is not NULL:
 14          return <_Element>c_node._private
 15      else:
 16          return None
 17  
 18  
 19  @cython.linetrace(False)
 20  @cython.profile(False)
 21  cdef inline bint hasProxy(xmlNode* c_node):
 22      if c_node._private is NULL:
 23          return False
 24      return True
 25  
 26  
 27  @cython.linetrace(False)
 28  @cython.profile(False)
 29  cdef inline int _registerProxy(_Element proxy, _Document doc,
 30                                 xmlNode* c_node) except -1:
 31      u"""Register a proxy and type for the node it's proxying for.
 32      """
 33      #print "registering for:", <int>proxy._c_node
 34      assert not hasProxy(c_node), u"double registering proxy!"
 35      proxy._doc = doc
 36      proxy._c_node = c_node
 37      c_node._private = <void*>proxy
 38      return 0
 39  
 40  
 41  @cython.linetrace(False)
 42  @cython.profile(False)
 43  cdef inline int _unregisterProxy(_Element proxy) except -1:
 44      u"""Unregister a proxy for the node it's proxying for.
 45      """
 46      cdef xmlNode* c_node = proxy._c_node
 47      assert c_node._private is <void*>proxy, u"Tried to unregister unknown proxy"
 48      c_node._private = NULL
 49      return 0
 50  
 51  
 52  ################################################################################
 53  # temporarily make a node the root node of its document
 54  
 55  cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL:
 56      return _plainFakeRootDoc(c_base_doc, c_node, 1)
 57  
 58  cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
 59                                 bint with_siblings) except NULL:
 60      # build a temporary document that has the given node as root node
 61      # note that copy and original must not be modified during its lifetime!!
 62      # always call _destroyFakeDoc() after use!
 63      cdef xmlNode* c_child
 64      cdef xmlNode* c_root
 65      cdef xmlNode* c_new_root
 66      cdef xmlDoc*  c_doc
 67      if with_siblings or (c_node.prev is NULL and c_node.next is NULL):
 68          c_root = tree.xmlDocGetRootElement(c_base_doc)
 69          if c_root is c_node:
 70              # already the root node, no siblings
 71              return c_base_doc
 72  
 73      c_doc  = _copyDoc(c_base_doc, 0)                   # non recursive!
 74      c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
 75      tree.xmlDocSetRootElement(c_doc, c_new_root)
 76      _copyParentNamespaces(c_node, c_new_root)
 77  
 78      c_new_root.children = c_node.children
 79      c_new_root.last = c_node.last
 80      c_new_root.next = c_new_root.prev = NULL
 81  
 82      # store original node
 83      c_doc._private = c_node
 84  
 85      # divert parent pointers of children
 86      c_child = c_new_root.children
 87      while c_child is not NULL:
 88          c_child.parent = c_new_root
 89          c_child = c_child.next
 90  
 91      c_doc.children = c_new_root
 92      return c_doc
 93  
 94  cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
 95      # delete a temporary document
 96      cdef xmlNode* c_child
 97      cdef xmlNode* c_parent
 98      cdef xmlNode* c_root
 99      if c_doc is c_base_doc:
100          return
101      c_root = tree.xmlDocGetRootElement(c_doc)
102  
103      # restore parent pointers of children
104      c_parent = <xmlNode*>c_doc._private
105      c_child = c_root.children
106      while c_child is not NULL:
107          c_child.parent = c_parent
108          c_child = c_child.next
109  
110      # prevent recursive removal of children
111      c_root.children = c_root.last = NULL
112      tree.xmlFreeDoc(c_doc)
113  
114  cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element):
115      u"""Special element factory for cases where we need to create a fake
116      root document, but still need to instantiate arbitrary nodes from
117      it.  If we instantiate the fake root node, things will turn bad
118      when it's destroyed.
119  
120      Instead, if we are asked to instantiate the fake root node, we
121      instantiate the original node instead.
122      """
123      if c_element.doc is not doc._c_doc:
124          if c_element.doc._private is not NULL:
125              if c_element is c_element.doc.children:
126                  c_element = <xmlNode*>c_element.doc._private
127                  #assert c_element.type == tree.XML_ELEMENT_NODE
128      return _elementFactory(doc, c_element)
129  
130  ################################################################################
131  # support for freeing tree elements when proxy objects are destroyed
132  
133  cdef int attemptDeallocation(xmlNode* c_node):
134      u"""Attempt deallocation of c_node (or higher up in tree).
135      """
136      cdef xmlNode* c_top
137      # could be we actually aren't referring to the tree at all
138      if c_node is NULL:
139          #print "not freeing, node is NULL"
140          return 0
141      c_top = getDeallocationTop(c_node)
142      if c_top is not NULL:
143          #print "freeing:", c_top.name
144          _removeText(c_top.next) # tail
145          tree.xmlFreeNode(c_top)
146          return 1
147      return 0
148  
149  cdef xmlNode* getDeallocationTop(xmlNode* c_node):
150      u"""Return the top of the tree that can be deallocated, or NULL.
151      """
152      cdef xmlNode* c_next
153      #print "trying to do deallocating:", c_node.type
154      if hasProxy(c_node):
155          #print "Not freeing: proxies still exist"
156          return NULL
157      while c_node.parent is not NULL:
158          c_node = c_node.parent
159          #print "checking:", c_current.type
160          if c_node.type == tree.XML_DOCUMENT_NODE or \
161                 c_node.type == tree.XML_HTML_DOCUMENT_NODE:
162              #print "not freeing: still in doc"
163              return NULL
164          # if we're still attached to the document, don't deallocate
165          if hasProxy(c_node):
166              #print "Not freeing: proxies still exist"
167              return NULL
168      # see whether we have children to deallocate
169      if not canDeallocateChildNodes(c_node):
170          return NULL
171      # see whether we have siblings to deallocate
172      c_next = c_node.prev
173      while c_next:
174          if _isElement(c_next):
175              if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
176                  return NULL
177          c_next = c_next.prev
178      c_next = c_node.next
179      while c_next:
180          if _isElement(c_next):
181              if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
182                  return NULL
183          c_next = c_next.next
184      return c_node
185  
186  cdef int canDeallocateChildNodes(xmlNode* c_parent):
187      cdef xmlNode* c_node
188      c_node = c_parent.children
189      tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
190      if hasProxy(c_node):
191          return 0
192      tree.END_FOR_EACH_ELEMENT_FROM(c_node)
193      return 1
194  
195  ################################################################################
196  # fix _Document references and namespaces when a node changes documents
197  
198  cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) nogil:
199      u"""Copy the namespaces of all ancestors of c_from_node to c_to_node.
200      """
201      cdef xmlNode* c_parent
202      cdef xmlNs* c_ns
203      cdef xmlNs* c_new_ns
204      cdef int prefix_known
205      c_parent = c_from_node.parent
206      while c_parent and (tree._isElementOrXInclude(c_parent) or
207                          c_parent.type == tree.XML_DOCUMENT_NODE):
208          c_new_ns = c_parent.nsDef
209          while c_new_ns:
210              # libxml2 will check if the prefix is already defined
211              tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
212              c_new_ns = c_new_ns.next
213          c_parent = c_parent.parent
214  
215  
216  ctypedef struct _ns_update_map:
217      xmlNs* old
218      xmlNs* new
219  
220  
221  ctypedef struct _nscache:
222      _ns_update_map* ns_map
223      size_t size
224      size_t last
225  
226  
227  cdef int _growNsCache(_nscache* c_ns_cache) except -1:
228      cdef _ns_update_map* ns_map_ptr
229      if c_ns_cache.size == 0:
230          c_ns_cache.size = 20
231      else:
232          c_ns_cache.size *= 2
233      ns_map_ptr = <_ns_update_map*> python.lxml_realloc(
234          c_ns_cache.ns_map, c_ns_cache.size, sizeof(_ns_update_map))
235      if not ns_map_ptr:
236          python.lxml_free(c_ns_cache.ns_map)
237          c_ns_cache.ns_map = NULL
238          raise MemoryError()
239      c_ns_cache.ns_map = ns_map_ptr
240      return 0
241  
242  
243  cdef inline int _appendToNsCache(_nscache* c_ns_cache,
244                                   xmlNs* c_old_ns, xmlNs* c_new_ns) except -1:
245      if c_ns_cache.last >= c_ns_cache.size:
246          _growNsCache(c_ns_cache)
247      c_ns_cache.ns_map[c_ns_cache.last] = _ns_update_map(old=c_old_ns, new=c_new_ns)
248      c_ns_cache.last += 1
249  
250  
251  cdef int _stripRedundantNamespaceDeclarations(xmlNode* c_element, _nscache* c_ns_cache,
252                                                xmlNs** c_del_ns_list) except -1:
253      u"""Removes namespace declarations from an element that are already
254      defined in its parents.  Does not free the xmlNs's, just prepends
255      them to the c_del_ns_list.
256      """
257      cdef xmlNs* c_ns
258      cdef xmlNs* c_ns_next
259      cdef xmlNs** c_nsdef
260      # use a xmlNs** to handle assignments to "c_element.nsDef" correctly
261      c_nsdef = &c_element.nsDef
262      while c_nsdef[0] is not NULL:
263          c_ns = tree.xmlSearchNsByHref(
264              c_element.doc, c_element.parent, c_nsdef[0].href)
265          if c_ns is NULL:
266              # new namespace href => keep and cache the ns declaration
267              _appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0])
268              c_nsdef = &c_nsdef[0].next
269          else:
270              # known namespace href => cache mapping and strip old ns
271              _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns)
272              # cut out c_nsdef.next and prepend it to garbage chain
273              c_ns_next = c_nsdef[0].next
274              c_nsdef[0].next = c_del_ns_list[0]
275              c_del_ns_list[0] = c_nsdef[0]
276              c_nsdef[0] = c_ns_next
277      return 0
278  
279  
280  cdef void _cleanUpFromNamespaceAdaptation(xmlNode* c_start_node,
281                                            _nscache* c_ns_cache, xmlNs* c_del_ns_list):
282      # Try to recover from exceptions with really bad timing.  We were in the middle
283      # of ripping out xmlNS-es and likely ran out of memory.  Try to fix up the tree
284      # by re-adding the original xmlNs declarations (which might still be used in some
285      # places).
286      if c_ns_cache.ns_map:
287          python.lxml_free(c_ns_cache.ns_map)
288      if c_del_ns_list:
289          if not c_start_node.nsDef:
290              c_start_node.nsDef = c_del_ns_list
291          else:
292              c_ns = c_start_node.nsDef
293              while c_ns.next:
294                  c_ns = c_ns.next
295              c_ns.next = c_del_ns_list
296  
297  
298  cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
299                              xmlNode* c_element) except -1:
300      u"""Fix the xmlNs pointers of a node and its subtree that were moved.
301  
302      Originally copied from libxml2's xmlReconciliateNs().  Expects
303      libxml2 doc pointers of node to be correct already, but fixes
304      _Document references.
305  
306      For each node in the subtree, we do this:
307  
308      1) Remove redundant declarations of namespace that are already
309         defined in its parents.
310  
311      2) Replace namespaces that are *not* defined on the node or its
312         parents by the equivalent namespace declarations that *are*
313         defined on the node or its parents (possibly using a different
314         prefix).  If a namespace is unknown, declare a new one on the
315         node.
316  
317      3) Reassign the names of tags and attribute from the dict of the
318         target document *iff* it is different from the dict used in the
319         source subtree.
320  
321      4) Set the Document reference to the new Document (if different).
322         This is done on backtracking to keep the original Document
323         alive as long as possible, until all its elements are updated.
324  
325      Note that the namespace declarations are removed from the tree in
326      step 1), but freed only after the complete subtree was traversed
327      and all occurrences were replaced by tree-internal pointers.
328      """
329      cdef xmlNode* c_start_node
330      cdef xmlNode* c_node
331      cdef xmlDoc* c_doc = doc._c_doc
332      cdef tree.xmlAttr* c_attr
333      cdef char* c_name
334      cdef _nscache c_ns_cache = [NULL, 0, 0]
335      cdef xmlNs* c_del_ns_list = NULL
336      cdef proxy_count = 0
337  
338      if not tree._isElementOrXInclude(c_element):
339          return 0
340  
341      c_start_node = c_element
342  
343      tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
344      if tree._isElementOrXInclude(c_element):
345          if hasProxy(c_element):
346              proxy_count += 1
347  
348          # 1) cut out namespaces defined here that are already known by
349          #    the ancestors
350          if c_element.nsDef is not NULL:
351              try:
352                  _stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list)
353              except:
354                  _cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list)
355                  raise
356  
357          # 2) make sure the namespaces of an element and its attributes
358          #    are declared in this document (i.e. on the node or its parents)
359          if c_element.ns is not NULL:
360              _fixCNs(doc, c_start_node, c_element, &c_ns_cache, c_del_ns_list)
361  
362          c_node = <xmlNode*>c_element.properties
363          while c_node is not NULL:
364              if c_node.ns is not NULL:
365                  _fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list)
366              c_node = c_node.next
367  
368      tree.END_FOR_EACH_FROM(c_element)
369  
370      # free now unused namespace declarations
371      if c_del_ns_list is not NULL:
372          tree.xmlFreeNsList(c_del_ns_list)
373  
374      # cleanup
375      if c_ns_cache.ns_map is not NULL:
376          python.lxml_free(c_ns_cache.ns_map)
377  
378      # 3) fix the names in the tree if we moved it from a different thread
379      if doc._c_doc.dict is not c_source_doc.dict:
380          fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
381  
382      # 4) fix _Document references
383      #    (and potentially deallocate the source document)
384      if proxy_count > 0:
385          if proxy_count == 1 and c_start_node._private is not NULL:
386              proxy = getProxy(c_start_node)
387              if proxy is not None:
388                  if proxy._doc is not doc:
389                      proxy._doc = doc
390              else:
391                  fixElementDocument(c_start_node, doc, proxy_count)
392          else:
393              fixElementDocument(c_start_node, doc, proxy_count)
394  
395      return 0
396  
397  
398  cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc):
399      """Adaptation of 'xmlSetTreeDoc()' that deep-fixes the document links iteratively.
400      It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42
401      """
402      tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1)
403      if c_node.type == tree.XML_ELEMENT_NODE:
404          c_attr = <tree.xmlAttr*>c_node.properties
405          while c_attr:
406              if c_attr.atype == tree.XML_ATTRIBUTE_ID:
407                  tree.xmlRemoveID(c_node.doc, c_attr)
408              c_attr.doc = c_doc
409              _fixDocChildren(c_attr.children, c_doc)
410              c_attr = c_attr.next
411      # Set doc link for all nodes, not only elements.
412      c_node.doc = c_doc
413      tree.END_FOR_EACH_FROM(c_node)
414  
415  
416  cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc):
417      while c_child:
418          c_child.doc = c_doc
419          if c_child.children:
420              _fixDocChildren(c_child.children, c_doc)
421          c_child = c_child.next
422  
423  
424  cdef int _fixCNs(_Document doc, xmlNode* c_start_node, xmlNode* c_node,
425                   _nscache* c_ns_cache, xmlNs* c_del_ns_list) except -1:
426      cdef xmlNs* c_ns = NULL
427      cdef bint is_prefixed_attr = (c_node.type == tree.XML_ATTRIBUTE_NODE and c_node.ns.prefix)
428  
429      for ns_map in c_ns_cache.ns_map[:c_ns_cache.last]:
430          if c_node.ns is ns_map.old:
431              if is_prefixed_attr and not ns_map.new.prefix:
432                  # avoid dropping prefix from attributes
433                  continue
434              c_ns = ns_map.new
435              break
436  
437      if c_ns:
438          c_node.ns = c_ns
439      else:
440          # not in cache or not acceptable
441          # => find a replacement from this document
442          try:
443              c_ns = doc._findOrBuildNodeNs(
444                  c_start_node, c_node.ns.href, c_node.ns.prefix,
445                  c_node.type == tree.XML_ATTRIBUTE_NODE)
446              c_node.ns = c_ns
447              _appendToNsCache(c_ns_cache, c_node.ns, c_ns)
448          except:
449              _cleanUpFromNamespaceAdaptation(c_start_node, c_ns_cache, c_del_ns_list)
450              raise
451      return 0
452  
453  
454  cdef void fixElementDocument(xmlNode* c_element, _Document doc,
455                               size_t proxy_count):
456      cdef xmlNode* c_node = c_element
457      cdef _Element proxy = None # init-to-None required due to fake-loop below
458      tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
459      if c_node._private is not NULL:
460          proxy = getProxy(c_node)
461          if proxy is not None:
462              if proxy._doc is not doc:
463                  proxy._doc = doc
464              proxy_count -= 1
465              if proxy_count == 0:
466                  return
467      tree.END_FOR_EACH_FROM(c_node)
468  
469  
470  cdef void fixThreadDictNames(xmlNode* c_element,
471                               tree.xmlDict* c_src_dict,
472                               tree.xmlDict* c_dict) nogil:
473      # re-assign the names of tags and attributes
474      #
475      # this should only be called when the element is based on a
476      # different libxml2 tag name dictionary
477      if c_element.type == tree.XML_DOCUMENT_NODE or \
478              c_element.type == tree.XML_HTML_DOCUMENT_NODE:
479          # may define "xml" namespace
480          fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
481          if c_element.doc.extSubset:
482              fixThreadDictNamesForDtd(c_element.doc.extSubset, c_src_dict, c_dict)
483          if c_element.doc.intSubset:
484              fixThreadDictNamesForDtd(c_element.doc.intSubset, c_src_dict, c_dict)
485          c_element = c_element.children
486          while c_element is not NULL:
487              fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
488              c_element = c_element.next
489      elif tree._isElementOrXInclude(c_element):
490          fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
491  
492  
493  cdef inline void _fixThreadDictPtr(const_xmlChar** c_ptr,
494                                     tree.xmlDict* c_src_dict,
495                                     tree.xmlDict* c_dict) nogil:
496      c_str = c_ptr[0]
497      if c_str and c_src_dict and tree.xmlDictOwns(c_src_dict, c_str):
498          # return value can be NULL on memory error, but we don't handle that here
499          c_str = tree.xmlDictLookup(c_dict, c_str, -1)
500          if c_str:
501              c_ptr[0] = c_str
502  
503  
504  cdef void fixThreadDictNamesForNode(xmlNode* c_element,
505                                      tree.xmlDict* c_src_dict,
506                                      tree.xmlDict* c_dict) nogil:
507      cdef xmlNode* c_node = c_element
508      tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
509      if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START):
510          fixThreadDictNamesForAttributes(
511              c_node.properties, c_src_dict, c_dict)
512          fixThreadDictNsForNode(c_node, c_src_dict, c_dict)
513          _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
514      elif c_node.type == tree.XML_TEXT_NODE:
515          # libxml2's SAX2 parser interns some indentation space
516          fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
517      elif c_node.type == tree.XML_COMMENT_NODE:
518          pass  # don't touch c_node.name
519      else:
520          _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
521      tree.END_FOR_EACH_FROM(c_node)
522  
523  
524  cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
525                                                   tree.xmlDict* c_src_dict,
526                                                   tree.xmlDict* c_dict) nogil:
527      cdef xmlNode* c_child
528      cdef xmlNode* c_node = <xmlNode*>c_attr
529      while c_node is not NULL:
530          if c_node.type not in (tree.XML_TEXT_NODE, tree.XML_COMMENT_NODE):
531              _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
532          # libxml2 keeps some (!) attribute values in the dict
533          c_child = c_node.children
534          while c_child is not NULL:
535              fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
536              c_child = c_child.next
537          c_node = c_node.next
538  
539  
540  cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
541                                               tree.xmlDict* c_src_dict,
542                                               tree.xmlDict* c_dict) nogil:
543      if c_node.content is not NULL and \
544             c_node.content is not <xmlChar*>&c_node.properties:
545          if tree.xmlDictOwns(c_src_dict, c_node.content):
546              # result can be NULL on memory error, but we don't handle that here
547              c_node.content = <xmlChar*>tree.xmlDictLookup(c_dict, c_node.content, -1)
548  
549  
550  cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
551                                          tree.xmlDict* c_src_dict,
552                                          tree.xmlDict* c_dict) nogil:
553      cdef xmlNs* c_ns = c_node.nsDef
554      while c_ns is not NULL:
555          _fixThreadDictPtr(&c_ns.href, c_src_dict, c_dict)
556          _fixThreadDictPtr(&c_ns.prefix, c_src_dict, c_dict)
557          c_ns = c_ns.next
558  
559  
560  cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd,
561                                     tree.xmlDict* c_src_dict,
562                                     tree.xmlDict* c_dict) nogil:
563      cdef xmlNode* c_node
564      cdef tree.xmlElement* c_element
565      cdef tree.xmlAttribute* c_attribute
566      cdef tree.xmlEntity* c_entity
567  
568      c_node = c_dtd.children
569      while c_node:
570          if c_node.type == tree.XML_ELEMENT_DECL:
571              c_element = <tree.xmlElement*>c_node
572              if c_element.content:
573                  _fixThreadDictPtr(&c_element.content.name, c_src_dict, c_dict)
574                  _fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict)
575              c_attribute = c_element.attributes
576              while c_attribute:
577                  _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
578                  _fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict)
579                  _fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict)
580                  _fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict)
581                  c_attribute = c_attribute.nexth
582          elif c_node.type == tree.XML_ENTITY_DECL:
583              c_entity = <tree.xmlEntity*>c_node
584              _fixThreadDictPtr(&c_entity.name, c_src_dict, c_dict)
585              _fixThreadDictPtr(&c_entity.ExternalID, c_src_dict, c_dict)
586              _fixThreadDictPtr(&c_entity.SystemID, c_src_dict, c_dict)
587              _fixThreadDictPtr(<const_xmlChar**>&c_entity.content, c_src_dict, c_dict)
588          c_node = c_node.next
589  
590  
591  ################################################################################
592  # adopt an xmlDoc from an external libxml2 document source
593  
594  cdef _Document _adoptForeignDoc(xmlDoc* c_doc, _BaseParser parser=None, bint is_owned=True):
595      """Convert and wrap an externally produced xmlDoc for use in lxml.
596      Assures that all '_private' pointers are NULL to prevent accidental
597      dereference into lxml proxy objects.
598      """
599      if c_doc is NULL:
600          raise ValueError("Illegal document provided: NULL")
601      if c_doc.type not in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
602          doc_type = c_doc.type
603          if is_owned:
604              tree.xmlFreeDoc(c_doc)
605          raise ValueError(f"Illegal document provided: expected XML or HTML, found {doc_type}")
606  
607      cdef xmlNode* c_node = <xmlNode*>c_doc
608  
609      if is_owned:
610          tree.BEGIN_FOR_EACH_FROM(<xmlNode*>c_doc, c_node, 1)
611          c_node._private = NULL
612          tree.END_FOR_EACH_FROM(c_node)
613      else:
614          # create a fresh copy that lxml owns
615          c_doc = tree.xmlCopyDoc(c_doc, 1)
616          if c_doc is NULL:
617              raise MemoryError()
618  
619      return _documentFactory(c_doc, parser)