/ test / python / testoptional.py
testoptional.py
  1  """
  2  Optional module tests
  3  """
  4  
  5  import sys
  6  import unittest
  7  
  8  # pylint: disable=C0415,W0611,W0621
  9  import timm
 10  import txtai
 11  
 12  
 13  class TestOptional(unittest.TestCase):
 14      """
 15      Optional tests. Simulates optional dependencies not being installed.
 16      """
 17  
 18      @classmethod
 19      def setUpClass(cls):
 20          """
 21          Simulate optional packages not being installed
 22          """
 23  
 24          modules = [
 25              "annoy",
 26              "bitsandbytes",
 27              "bs4",
 28              "chonkie",
 29              "croniter",
 30              "docling.document_converter",
 31              "duckdb",
 32              "fastapi",
 33              "ggml",
 34              "gliner",
 35              "grandcypher",
 36              "grand",
 37              "hnswlib",
 38              "httpx",
 39              "imagehash",
 40              "libcloud.storage.providers",
 41              "litellm",
 42              "llama_cpp",
 43              "model2vec",
 44              "networkx",
 45              "nltk",
 46              "onnxmltools",
 47              "onnxruntime",
 48              "onnxruntime.quantization",
 49              "pandas",
 50              "peft",
 51              "pgvector",
 52              "PIL",
 53              "rich",
 54              "scipy",
 55              "scipy.sparse",
 56              "sentence_transformers",
 57              "sklearn.decomposition",
 58              "smolagents",
 59              "sounddevice",
 60              "soundfile",
 61              "sqlalchemy",
 62              "sqlite_vec",
 63              "staticvectors",
 64              "tika",
 65              "ttstokenizer",
 66              "xmltodict",
 67          ]
 68  
 69          # Get handle to all currently loaded txtai modules
 70          modules = modules + [key for key in sys.modules if key.startswith("txtai")]
 71          cls.modules = {module: None for module in modules}
 72  
 73          # Replace loaded modules with stubs. Save modules for later reloading
 74          for module in cls.modules:
 75              if module in sys.modules:
 76                  cls.modules[module] = sys.modules[module]
 77  
 78              # Remove txtai modules. Set optional dependencies to None to prevent reloading.
 79              if "txtai" in module:
 80                  if module in sys.modules:
 81                      del sys.modules[module]
 82              else:
 83                  sys.modules[module] = None
 84  
 85      @classmethod
 86      def tearDownClass(cls):
 87          """
 88          Resets modules environment back to initial state.
 89          """
 90  
 91          # Reset replaced modules in setup
 92          for key, value in cls.modules.items():
 93              if value:
 94                  sys.modules[key] = value
 95              else:
 96                  del sys.modules[key]
 97  
 98      def testAgent(self):
 99          """
100          Test missing agent dependencies
101          """
102  
103          from txtai.agent import Agent
104  
105          with self.assertRaises(ImportError):
106              Agent(llm="hf-internal-testing/tiny-random-LlamaForCausalLM", max_steps=1)
107  
108      def testANN(self):
109          """
110          Test missing ANN dependencies
111          """
112  
113          from txtai.ann import ANNFactory, SparseANNFactory
114  
115          # Test dense methods
116          with self.assertRaises(ImportError):
117              ANNFactory.create({"backend": "annoy"})
118  
119          with self.assertRaises(ImportError):
120              ANNFactory.create({"backend": "ggml"})
121  
122          with self.assertRaises(ImportError):
123              ANNFactory.create({"backend": "hnsw"})
124  
125          with self.assertRaises(ImportError):
126              ANNFactory.create({"backend": "pgvector"})
127  
128          with self.assertRaises(ImportError):
129              ANNFactory.create({"backend": "sqlite"})
130  
131          with self.assertRaises(ImportError):
132              ANNFactory.create({"backend": "torch", "torch": {"quantize": True}})
133  
134          # Test sparse methods
135          with self.assertRaises(ImportError):
136              SparseANNFactory.create({"backend": "ivfsparse"})
137  
138          with self.assertRaises(ImportError):
139              SparseANNFactory.create({"backend": "pgsparse"})
140  
141      def testApi(self):
142          """
143          Test missing api dependencies
144          """
145  
146          with self.assertRaises(ImportError):
147              import txtai.api
148  
149      def testConsole(self):
150          """
151          Test missing console dependencies
152          """
153  
154          from txtai.console import Console
155  
156          with self.assertRaises(ImportError):
157              Console()
158  
159      def testCloud(self):
160          """
161          Test missing cloud dependencies
162          """
163  
164          from txtai.cloud import ObjectStorage
165  
166          with self.assertRaises(ImportError):
167              ObjectStorage(None)
168  
169      def testDatabase(self):
170          """
171          Test missing database dependencies
172          """
173  
174          from txtai.database import Client, DuckDB, ImageEncoder
175  
176          with self.assertRaises(ImportError):
177              Client({})
178  
179          with self.assertRaises(ImportError):
180              DuckDB({})
181  
182          with self.assertRaises(ImportError):
183              ImageEncoder()
184  
185      def testGraph(self):
186          """
187          Test missing graph dependencies
188          """
189  
190          from txtai.graph import GraphFactory, Query
191  
192          with self.assertRaises(ImportError):
193              GraphFactory.create({"backend": "networkx"})
194  
195          with self.assertRaises(ImportError):
196              GraphFactory.create({"backend": "rdbms"})
197  
198          with self.assertRaises(ImportError):
199              Query()
200  
201      def testModel(self):
202          """
203          Test missing model dependencies
204          """
205  
206          from txtai.embeddings import Reducer
207          from txtai.models import OnnxModel
208  
209          with self.assertRaises(ImportError):
210              Reducer()
211  
212          with self.assertRaises(ImportError):
213              OnnxModel(None)
214  
215      def testPipeline(self):
216          """
217          Test missing pipeline dependencies
218          """
219  
220          from txtai.pipeline import (
221              AudioMixer,
222              AudioStream,
223              Caption,
224              Entity,
225              FileToHTML,
226              HFOnnx,
227              HFTrainer,
228              HTMLToMarkdown,
229              ImageHash,
230              LiteLLM,
231              LlamaCpp,
232              Microphone,
233              MLOnnx,
234              Objects,
235              OpenCode,
236              Segmentation,
237              Tabular,
238              TextToAudio,
239              TextToSpeech,
240              Transcription,
241              Translation,
242          )
243  
244          with self.assertRaises(ImportError):
245              AudioMixer()
246  
247          with self.assertRaises(ImportError):
248              AudioStream()
249  
250          with self.assertRaises(ImportError):
251              Caption()
252  
253          with self.assertRaises(ImportError):
254              Entity("neuml/gliner-bert-tiny")
255  
256          with self.assertRaises(ImportError):
257              FileToHTML(backend="docling")
258  
259          with self.assertRaises(ImportError):
260              FileToHTML(backend="tika")
261  
262          with self.assertRaises(ImportError):
263              HFOnnx()("google/bert_uncased_L-2_H-128_A-2", quantize=True)
264  
265          with self.assertRaises(ImportError):
266              HFTrainer()(None, None, lora=True)
267  
268          with self.assertRaises(ImportError):
269              HTMLToMarkdown()
270  
271          with self.assertRaises(ImportError):
272              ImageHash()
273  
274          with self.assertRaises(ImportError):
275              LiteLLM("huggingface/t5-small")
276  
277          with self.assertRaises(ImportError):
278              LlamaCpp("TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q2_K.gguf")
279  
280          with self.assertRaises(ImportError):
281              Microphone()
282  
283          with self.assertRaises(ImportError):
284              MLOnnx()
285  
286          with self.assertRaises(ImportError):
287              Objects()
288  
289          with self.assertRaises(ImportError):
290              OpenCode("opencode")
291  
292          with self.assertRaises(ImportError):
293              Segmentation(sentences=True)
294  
295          with self.assertRaises(ImportError):
296              Segmentation(chunker="token")
297  
298          with self.assertRaises(ImportError):
299              Tabular()
300  
301          with self.assertRaises(ImportError):
302              TextToAudio()
303  
304          with self.assertRaises(ImportError):
305              TextToSpeech()
306  
307          with self.assertRaises(ImportError):
308              Transcription()
309  
310          with self.assertRaises(ImportError):
311              Translation().detect(["test"])
312  
313      def testScoring(self):
314          """
315          Test missing scoring dependencies
316          """
317  
318          from txtai.scoring import ScoringFactory
319  
320          with self.assertRaises(ImportError):
321              ScoringFactory.create({"method": "pgtext"})
322  
323      def testVectors(self):
324          """
325          Test missing vector dependencies
326          """
327  
328          from txtai.vectors import SparseVectors, VectorsFactory, SparseVectorsFactory
329          from txtai.util import SparseArray
330  
331          # Test dense vectors
332          with self.assertRaises(ImportError):
333              VectorsFactory.create({"method": "litellm", "path": "huggingface/sentence-transformers/all-MiniLM-L6-v2"}, None)
334  
335          with self.assertRaises(ImportError):
336              VectorsFactory.create({"method": "llama.cpp", "path": "nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q2_K.gguf"}, None)
337  
338          with self.assertRaises(ImportError):
339              VectorsFactory.create({"method": "model2vec", "path": "minishlab/M2V_base_output"}, None)
340  
341          with self.assertRaises(ImportError):
342              VectorsFactory.create({"method": "sentence-transformers", "path": "sentence-transformers/nli-mpnet-base-v2"}, None)
343  
344          with self.assertRaises(ImportError):
345              VectorsFactory.create({"method": "words"}, None)
346  
347          # Test default model
348          model = VectorsFactory.create({"path": "sentence-transformers/all-MiniLM-L6-v2"}, None)
349          self.assertIsNotNone(model)
350  
351          # Test sparse vectors
352          with self.assertRaises(ImportError):
353              SparseVectors(None, None, None)
354  
355          with self.assertRaises(ImportError):
356              SparseVectorsFactory.create({"method": "sentence-transformers", "path": "sparse-encoder-testing/splade-bert-tiny-nq"}, None)
357  
358          with self.assertRaises(ImportError):
359              SparseArray()
360  
361      def testWorkflow(self):
362          """
363          Test missing workflow dependencies
364          """
365  
366          from txtai.workflow import ExportTask, ImageTask, ServiceTask, StorageTask, Workflow
367  
368          with self.assertRaises(ImportError):
369              ExportTask()
370  
371          with self.assertRaises(ImportError):
372              ImageTask()
373  
374          with self.assertRaises(ImportError):
375              ServiceTask()
376  
377          with self.assertRaises(ImportError):
378              StorageTask()
379  
380          with self.assertRaises(ImportError):
381              Workflow([], workers=1).schedule(None, [])