docloader.pxi
1 # Custom resolver API 2 3 ctypedef enum _InputDocumentDataType: 4 PARSER_DATA_INVALID 5 PARSER_DATA_EMPTY 6 PARSER_DATA_STRING 7 PARSER_DATA_FILENAME 8 PARSER_DATA_FILE 9 10 @cython.final 11 @cython.internal 12 cdef class _InputDocument: 13 cdef _InputDocumentDataType _type 14 cdef bytes _data_bytes 15 cdef object _filename 16 cdef object _file 17 cdef bint _close_file 18 19 def __cinit__(self): 20 self._type = PARSER_DATA_INVALID 21 22 23 cdef class Resolver: 24 u"This is the base class of all resolvers." 25 def resolve(self, system_url, public_id, context): 26 u"""resolve(self, system_url, public_id, context) 27 28 Override this method to resolve an external source by 29 ``system_url`` and ``public_id``. The third argument is an 30 opaque context object. 31 32 Return the result of one of the ``resolve_*()`` methods. 33 """ 34 return None 35 36 def resolve_empty(self, context): 37 u"""resolve_empty(self, context) 38 39 Return an empty input document. 40 41 Pass context as parameter. 42 """ 43 cdef _InputDocument doc_ref 44 doc_ref = _InputDocument() 45 doc_ref._type = PARSER_DATA_EMPTY 46 return doc_ref 47 48 def resolve_string(self, string, context, *, base_url=None): 49 u"""resolve_string(self, string, context, base_url=None) 50 51 Return a parsable string as input document. 52 53 Pass data string and context as parameters. You can pass the 54 source URL or filename through the ``base_url`` keyword 55 argument. 56 """ 57 cdef _InputDocument doc_ref 58 if isinstance(string, unicode): 59 string = (<unicode>string).encode('utf8') 60 elif not isinstance(string, bytes): 61 raise TypeError, "argument must be a byte string or unicode string" 62 doc_ref = _InputDocument() 63 doc_ref._type = PARSER_DATA_STRING 64 doc_ref._data_bytes = string 65 if base_url is not None: 66 doc_ref._filename = _encodeFilename(base_url) 67 return doc_ref 68 69 def resolve_filename(self, filename, context): 70 u"""resolve_filename(self, filename, context) 71 72 Return the name of a parsable file as input document. 73 74 Pass filename and context as parameters. You can also pass a 75 URL with an HTTP, FTP or file target. 76 """ 77 cdef _InputDocument doc_ref 78 doc_ref = _InputDocument() 79 doc_ref._type = PARSER_DATA_FILENAME 80 doc_ref._filename = _encodeFilename(filename) 81 return doc_ref 82 83 def resolve_file(self, f, context, *, base_url=None, bint close=True): 84 u"""resolve_file(self, f, context, base_url=None, close=True) 85 86 Return an open file-like object as input document. 87 88 Pass open file and context as parameters. You can pass the 89 base URL or filename of the file through the ``base_url`` 90 keyword argument. If the ``close`` flag is True (the 91 default), the file will be closed after reading. 92 93 Note that using ``.resolve_filename()`` is more efficient, 94 especially in threaded environments. 95 """ 96 cdef _InputDocument doc_ref 97 try: 98 f.read 99 except AttributeError: 100 raise TypeError, u"Argument is not a file-like object" 101 doc_ref = _InputDocument() 102 doc_ref._type = PARSER_DATA_FILE 103 if base_url is not None: 104 doc_ref._filename = _encodeFilename(base_url) 105 else: 106 doc_ref._filename = _getFilenameForFile(f) 107 doc_ref._close_file = close 108 doc_ref._file = f 109 return doc_ref 110 111 @cython.final 112 @cython.internal 113 cdef class _ResolverRegistry: 114 cdef object _resolvers 115 cdef Resolver _default_resolver 116 def __cinit__(self, Resolver default_resolver=None): 117 self._resolvers = set() 118 self._default_resolver = default_resolver 119 120 def add(self, Resolver resolver not None): 121 u"""add(self, resolver) 122 123 Register a resolver. 124 125 For each requested entity, the 'resolve' method of the resolver will 126 be called and the result will be passed to the parser. If this method 127 returns None, the request will be delegated to other resolvers or the 128 default resolver. The resolvers will be tested in an arbitrary order 129 until the first match is found. 130 """ 131 self._resolvers.add(resolver) 132 133 def remove(self, resolver): 134 u"remove(self, resolver)" 135 self._resolvers.discard(resolver) 136 137 cdef _ResolverRegistry _copy(self): 138 cdef _ResolverRegistry registry 139 registry = _ResolverRegistry(self._default_resolver) 140 registry._resolvers = self._resolvers.copy() 141 return registry 142 143 def copy(self): 144 u"copy(self)" 145 return self._copy() 146 147 def resolve(self, system_url, public_id, context): 148 u"resolve(self, system_url, public_id, context)" 149 for resolver in self._resolvers: 150 result = resolver.resolve(system_url, public_id, context) 151 if result is not None: 152 return result 153 if self._default_resolver is None: 154 return None 155 return self._default_resolver.resolve(system_url, public_id, context) 156 157 def __repr__(self): 158 return repr(self._resolvers) 159 160 161 @cython.internal 162 cdef class _ResolverContext(_ExceptionContext): 163 cdef _ResolverRegistry _resolvers 164 cdef _TempStore _storage 165 166 cdef int clear(self) except -1: 167 _ExceptionContext.clear(self) 168 self._storage.clear() 169 return 0 170 171 172 cdef _initResolverContext(_ResolverContext context, 173 _ResolverRegistry resolvers): 174 if resolvers is None: 175 context._resolvers = _ResolverRegistry() 176 else: 177 context._resolvers = resolvers 178 context._storage = _TempStore()