/ lib / lxml / xmlid.pxi
xmlid.pxi
  1  cdef object _find_id_attributes
  2  
  3  def XMLID(text, parser=None, *, base_url=None):
  4      u"""XMLID(text, parser=None, base_url=None)
  5  
  6      Parse the text and return a tuple (root node, ID dictionary).  The root
  7      node is the same as returned by the XML() function.  The dictionary
  8      contains string-element pairs.  The dictionary keys are the values of 'id'
  9      attributes.  The elements referenced by the ID are stored as dictionary
 10      values.
 11      """
 12      cdef dict dic
 13      global _find_id_attributes
 14      if _find_id_attributes is None:
 15          _find_id_attributes = XPath(u'//*[string(@id)]')
 16  
 17      # ElementTree compatible implementation: parse and look for 'id' attributes
 18      root = XML(text, parser, base_url=base_url)
 19      dic = {}
 20      for elem in _find_id_attributes(root):
 21          dic[elem.get(u'id')] = elem
 22      return root, dic
 23  
 24  def XMLDTDID(text, parser=None, *, base_url=None):
 25      u"""XMLDTDID(text, parser=None, base_url=None)
 26  
 27      Parse the text and return a tuple (root node, ID dictionary).  The root
 28      node is the same as returned by the XML() function.  The dictionary
 29      contains string-element pairs.  The dictionary keys are the values of ID
 30      attributes as defined by the DTD.  The elements referenced by the ID are
 31      stored as dictionary values.
 32  
 33      Note that you must not modify the XML tree if you use the ID dictionary.
 34      The results are undefined.
 35      """
 36      cdef _Element root
 37      root = XML(text, parser, base_url=base_url)
 38      # xml:id spec compatible implementation: use DTD ID attributes from libxml2
 39      if root._doc._c_doc.ids is NULL:
 40          return root, {}
 41      else:
 42          return root, _IDDict(root)
 43  
 44  def parseid(source, parser=None, *, base_url=None):
 45      u"""parseid(source, parser=None)
 46  
 47      Parses the source into a tuple containing an ElementTree object and an
 48      ID dictionary.  If no parser is provided as second argument, the default
 49      parser is used.
 50  
 51      Note that you must not modify the XML tree if you use the ID dictionary.
 52      The results are undefined.
 53      """
 54      cdef _Document doc
 55      doc = _parseDocument(source, parser, base_url)
 56      return _elementTreeFactory(doc, None), _IDDict(doc)
 57  
 58  cdef class _IDDict:
 59      u"""IDDict(self, etree)
 60      A dictionary-like proxy class that mapps ID attributes to elements.
 61  
 62      The dictionary must be instantiated with the root element of a parsed XML
 63      document, otherwise the behaviour is undefined.  Elements and XML trees
 64      that were created or modified 'by hand' are not supported.
 65      """
 66      cdef _Document _doc
 67      cdef object _keys
 68      cdef object _items
 69      def __cinit__(self, etree):
 70          cdef _Document doc
 71          doc = _documentOrRaise(etree)
 72          if doc._c_doc.ids is NULL:
 73              raise ValueError, u"No ID dictionary available."
 74          self._doc = doc
 75          self._keys  = None
 76          self._items = None
 77  
 78      def copy(self):
 79          return _IDDict(self._doc)
 80  
 81      def __getitem__(self, id_name):
 82          cdef tree.xmlHashTable* c_ids
 83          cdef tree.xmlID* c_id
 84          cdef xmlAttr* c_attr
 85          c_ids = self._doc._c_doc.ids
 86          id_utf = _utf8(id_name)
 87          c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf))
 88          if c_id is NULL:
 89              raise KeyError, u"key not found."
 90          c_attr = c_id.attr
 91          if c_attr is NULL or c_attr.parent is NULL:
 92              raise KeyError, u"ID attribute not found."
 93          return _elementFactory(self._doc, c_attr.parent)
 94  
 95      def get(self, id_name):
 96          return self[id_name]
 97  
 98      def __contains__(self, id_name):
 99          cdef tree.xmlID* c_id
100          id_utf = _utf8(id_name)
101          c_id = <tree.xmlID*>tree.xmlHashLookup(
102              self._doc._c_doc.ids, _xcstr(id_utf))
103          return c_id is not NULL
104  
105      def has_key(self, id_name):
106          return id_name in self
107  
108      def __repr__(self):
109          return repr(dict(self))
110  
111      def keys(self):
112          if self._keys is None:
113              self._keys = self._build_keys()
114          return self._keys[:]
115  
116      def __iter__(self):
117          if self._keys is None:
118              self._keys = self._build_keys()
119          return iter(self._keys)
120  
121      def iterkeys(self):
122          return self
123  
124      def __len__(self):
125          if self._keys is None:
126              self._keys = self._build_keys()
127          return len(self._keys)
128  
129      def items(self):
130          if self._items is None:
131              self._items = self._build_items()
132          return self._items[:]
133  
134      def iteritems(self):
135          if self._items is None:
136              self._items = self._build_items()
137          return iter(self._items)
138  
139      def values(self):
140          cdef list values = []
141          if self._items is None:
142              self._items = self._build_items()
143          for item in self._items:
144              value = python.PyTuple_GET_ITEM(item, 1)
145              python.Py_INCREF(value)
146              values.append(value)
147          return values
148  
149      def itervalues(self):
150          return iter(self.values())
151  
152      cdef object _build_keys(self):
153          keys = []
154          tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
155                           <tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys)
156          return keys
157  
158      cdef object _build_items(self):
159          items = []
160          context = (items, self._doc)
161          tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids,
162                           <tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context)
163          return items
164  
165  cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name):
166      # collect elements from ID attribute hash table
167      cdef list lst
168      c_id = <tree.xmlID*>payload
169      if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
170          return
171      lst, doc = <tuple>context
172      element = _elementFactory(doc, c_id.attr.parent)
173      lst.append( (funicode(name), element) )
174  
175  cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name):
176      c_id = <tree.xmlID*>payload
177      if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL:
178          return
179      (<list>collect_list).append(funicode(name))