/ lib / lxml / docloader.pxi
docloader.pxi
  1  # Custom resolver API
  2  
  3  ctypedef enum _InputDocumentDataType:
  4      PARSER_DATA_INVALID
  5      PARSER_DATA_EMPTY
  6      PARSER_DATA_STRING
  7      PARSER_DATA_FILENAME
  8      PARSER_DATA_FILE
  9  
 10  @cython.final
 11  @cython.internal
 12  cdef class _InputDocument:
 13      cdef _InputDocumentDataType _type
 14      cdef bytes _data_bytes
 15      cdef object _filename
 16      cdef object _file
 17      cdef bint _close_file
 18  
 19      def __cinit__(self):
 20          self._type = PARSER_DATA_INVALID
 21  
 22  
 23  cdef class Resolver:
 24      u"This is the base class of all resolvers."
 25      def resolve(self, system_url, public_id, context):
 26          u"""resolve(self, system_url, public_id, context)
 27  
 28          Override this method to resolve an external source by
 29          ``system_url`` and ``public_id``.  The third argument is an
 30          opaque context object.
 31  
 32          Return the result of one of the ``resolve_*()`` methods.
 33          """
 34          return None
 35  
 36      def resolve_empty(self, context):
 37          u"""resolve_empty(self, context)
 38  
 39          Return an empty input document.
 40  
 41          Pass context as parameter.
 42          """
 43          cdef _InputDocument doc_ref
 44          doc_ref = _InputDocument()
 45          doc_ref._type = PARSER_DATA_EMPTY
 46          return doc_ref
 47  
 48      def resolve_string(self, string, context, *, base_url=None):
 49          u"""resolve_string(self, string, context, base_url=None)
 50  
 51          Return a parsable string as input document.
 52  
 53          Pass data string and context as parameters.  You can pass the
 54          source URL or filename through the ``base_url`` keyword
 55          argument.
 56          """
 57          cdef _InputDocument doc_ref
 58          if isinstance(string, unicode):
 59              string = (<unicode>string).encode('utf8')
 60          elif not isinstance(string, bytes):
 61              raise TypeError, "argument must be a byte string or unicode string"
 62          doc_ref = _InputDocument()
 63          doc_ref._type = PARSER_DATA_STRING
 64          doc_ref._data_bytes = string
 65          if base_url is not None:
 66              doc_ref._filename = _encodeFilename(base_url)
 67          return doc_ref
 68  
 69      def resolve_filename(self, filename, context):
 70          u"""resolve_filename(self, filename, context)
 71  
 72          Return the name of a parsable file as input document.
 73  
 74          Pass filename and context as parameters.  You can also pass a
 75          URL with an HTTP, FTP or file target.
 76          """
 77          cdef _InputDocument doc_ref
 78          doc_ref = _InputDocument()
 79          doc_ref._type = PARSER_DATA_FILENAME
 80          doc_ref._filename = _encodeFilename(filename)
 81          return doc_ref
 82  
 83      def resolve_file(self, f, context, *, base_url=None, bint close=True):
 84          u"""resolve_file(self, f, context, base_url=None, close=True)
 85  
 86          Return an open file-like object as input document.
 87  
 88          Pass open file and context as parameters.  You can pass the
 89          base URL or filename of the file through the ``base_url``
 90          keyword argument.  If the ``close`` flag is True (the
 91          default), the file will be closed after reading.
 92  
 93          Note that using ``.resolve_filename()`` is more efficient,
 94          especially in threaded environments.
 95          """
 96          cdef _InputDocument doc_ref
 97          try:
 98              f.read
 99          except AttributeError:
100              raise TypeError, u"Argument is not a file-like object"
101          doc_ref = _InputDocument()
102          doc_ref._type = PARSER_DATA_FILE
103          if base_url is not None:
104              doc_ref._filename = _encodeFilename(base_url)
105          else:
106              doc_ref._filename = _getFilenameForFile(f)
107          doc_ref._close_file = close
108          doc_ref._file = f
109          return doc_ref
110  
111  @cython.final
112  @cython.internal
113  cdef class _ResolverRegistry:
114      cdef object _resolvers
115      cdef Resolver _default_resolver
116      def __cinit__(self, Resolver default_resolver=None):
117          self._resolvers = set()
118          self._default_resolver = default_resolver
119  
120      def add(self, Resolver resolver not None):
121          u"""add(self, resolver)
122  
123          Register a resolver.
124  
125          For each requested entity, the 'resolve' method of the resolver will
126          be called and the result will be passed to the parser.  If this method
127          returns None, the request will be delegated to other resolvers or the
128          default resolver.  The resolvers will be tested in an arbitrary order
129          until the first match is found.
130          """
131          self._resolvers.add(resolver)
132  
133      def remove(self, resolver):
134          u"remove(self, resolver)"
135          self._resolvers.discard(resolver)
136  
137      cdef _ResolverRegistry _copy(self):
138          cdef _ResolverRegistry registry
139          registry = _ResolverRegistry(self._default_resolver)
140          registry._resolvers = self._resolvers.copy()
141          return registry
142  
143      def copy(self):
144          u"copy(self)"
145          return self._copy()
146  
147      def resolve(self, system_url, public_id, context):
148          u"resolve(self, system_url, public_id, context)"
149          for resolver in self._resolvers:
150              result = resolver.resolve(system_url, public_id, context)
151              if result is not None:
152                  return result
153          if self._default_resolver is None:
154              return None
155          return self._default_resolver.resolve(system_url, public_id, context)
156  
157      def __repr__(self):
158          return repr(self._resolvers)
159  
160  
161  @cython.internal
162  cdef class _ResolverContext(_ExceptionContext):
163      cdef _ResolverRegistry _resolvers
164      cdef _TempStore _storage
165  
166      cdef int clear(self) except -1:
167          _ExceptionContext.clear(self)
168          self._storage.clear()
169          return 0
170  
171  
172  cdef _initResolverContext(_ResolverContext context,
173                            _ResolverRegistry resolvers):
174      if resolvers is None:
175          context._resolvers = _ResolverRegistry()
176      else:
177          context._resolvers = resolvers
178      context._storage = _TempStore()