xmlid.pxi
1 cdef object _find_id_attributes 2 3 def XMLID(text, parser=None, *, base_url=None): 4 u"""XMLID(text, parser=None, base_url=None) 5 6 Parse the text and return a tuple (root node, ID dictionary). The root 7 node is the same as returned by the XML() function. The dictionary 8 contains string-element pairs. The dictionary keys are the values of 'id' 9 attributes. The elements referenced by the ID are stored as dictionary 10 values. 11 """ 12 cdef dict dic 13 global _find_id_attributes 14 if _find_id_attributes is None: 15 _find_id_attributes = XPath(u'//*[string(@id)]') 16 17 # ElementTree compatible implementation: parse and look for 'id' attributes 18 root = XML(text, parser, base_url=base_url) 19 dic = {} 20 for elem in _find_id_attributes(root): 21 dic[elem.get(u'id')] = elem 22 return root, dic 23 24 def XMLDTDID(text, parser=None, *, base_url=None): 25 u"""XMLDTDID(text, parser=None, base_url=None) 26 27 Parse the text and return a tuple (root node, ID dictionary). The root 28 node is the same as returned by the XML() function. The dictionary 29 contains string-element pairs. The dictionary keys are the values of ID 30 attributes as defined by the DTD. The elements referenced by the ID are 31 stored as dictionary values. 32 33 Note that you must not modify the XML tree if you use the ID dictionary. 34 The results are undefined. 35 """ 36 cdef _Element root 37 root = XML(text, parser, base_url=base_url) 38 # xml:id spec compatible implementation: use DTD ID attributes from libxml2 39 if root._doc._c_doc.ids is NULL: 40 return root, {} 41 else: 42 return root, _IDDict(root) 43 44 def parseid(source, parser=None, *, base_url=None): 45 u"""parseid(source, parser=None) 46 47 Parses the source into a tuple containing an ElementTree object and an 48 ID dictionary. If no parser is provided as second argument, the default 49 parser is used. 50 51 Note that you must not modify the XML tree if you use the ID dictionary. 52 The results are undefined. 53 """ 54 cdef _Document doc 55 doc = _parseDocument(source, parser, base_url) 56 return _elementTreeFactory(doc, None), _IDDict(doc) 57 58 cdef class _IDDict: 59 u"""IDDict(self, etree) 60 A dictionary-like proxy class that mapps ID attributes to elements. 61 62 The dictionary must be instantiated with the root element of a parsed XML 63 document, otherwise the behaviour is undefined. Elements and XML trees 64 that were created or modified 'by hand' are not supported. 65 """ 66 cdef _Document _doc 67 cdef object _keys 68 cdef object _items 69 def __cinit__(self, etree): 70 cdef _Document doc 71 doc = _documentOrRaise(etree) 72 if doc._c_doc.ids is NULL: 73 raise ValueError, u"No ID dictionary available." 74 self._doc = doc 75 self._keys = None 76 self._items = None 77 78 def copy(self): 79 return _IDDict(self._doc) 80 81 def __getitem__(self, id_name): 82 cdef tree.xmlHashTable* c_ids 83 cdef tree.xmlID* c_id 84 cdef xmlAttr* c_attr 85 c_ids = self._doc._c_doc.ids 86 id_utf = _utf8(id_name) 87 c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf)) 88 if c_id is NULL: 89 raise KeyError, u"key not found." 90 c_attr = c_id.attr 91 if c_attr is NULL or c_attr.parent is NULL: 92 raise KeyError, u"ID attribute not found." 93 return _elementFactory(self._doc, c_attr.parent) 94 95 def get(self, id_name): 96 return self[id_name] 97 98 def __contains__(self, id_name): 99 cdef tree.xmlID* c_id 100 id_utf = _utf8(id_name) 101 c_id = <tree.xmlID*>tree.xmlHashLookup( 102 self._doc._c_doc.ids, _xcstr(id_utf)) 103 return c_id is not NULL 104 105 def has_key(self, id_name): 106 return id_name in self 107 108 def __repr__(self): 109 return repr(dict(self)) 110 111 def keys(self): 112 if self._keys is None: 113 self._keys = self._build_keys() 114 return self._keys[:] 115 116 def __iter__(self): 117 if self._keys is None: 118 self._keys = self._build_keys() 119 return iter(self._keys) 120 121 def iterkeys(self): 122 return self 123 124 def __len__(self): 125 if self._keys is None: 126 self._keys = self._build_keys() 127 return len(self._keys) 128 129 def items(self): 130 if self._items is None: 131 self._items = self._build_items() 132 return self._items[:] 133 134 def iteritems(self): 135 if self._items is None: 136 self._items = self._build_items() 137 return iter(self._items) 138 139 def values(self): 140 cdef list values = [] 141 if self._items is None: 142 self._items = self._build_items() 143 for item in self._items: 144 value = python.PyTuple_GET_ITEM(item, 1) 145 python.Py_INCREF(value) 146 values.append(value) 147 return values 148 149 def itervalues(self): 150 return iter(self.values()) 151 152 cdef object _build_keys(self): 153 keys = [] 154 tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids, 155 <tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys) 156 return keys 157 158 cdef object _build_items(self): 159 items = [] 160 context = (items, self._doc) 161 tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids, 162 <tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context) 163 return items 164 165 cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name): 166 # collect elements from ID attribute hash table 167 cdef list lst 168 c_id = <tree.xmlID*>payload 169 if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: 170 return 171 lst, doc = <tuple>context 172 element = _elementFactory(doc, c_id.attr.parent) 173 lst.append( (funicode(name), element) ) 174 175 cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name): 176 c_id = <tree.xmlID*>payload 177 if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: 178 return 179 (<list>collect_list).append(funicode(name))