factory.py
1 from __future__ import annotations 2 3 """Factory for creating document loader implementations.""" 4 5 from collections.abc import Callable 6 from typing import Any, ClassVar 7 8 from .docling_loader import create_docling_loader 9 from .protocol import DocumentLoader 10 from .pymupdf_loader import create_pymupdf_loader 11 from .types import LoaderType 12 from .whisper_loader import create_whisper_loader 13 14 15 class LoaderFactory: 16 """Factory for creating document loader implementations. Easily extensible.""" 17 18 _registry: ClassVar[dict[LoaderType, Callable[[dict[str, Any]], DocumentLoader]]] = {} 19 20 @classmethod 21 def register(cls, loader_type: LoaderType): 22 """Register a new loader factory. 23 24 Parameters 25 ---------- 26 loader_type 27 Type to register the loader under. 28 """ 29 def decorator(factory_func: Callable[[dict[str, Any]], DocumentLoader]): 30 cls._registry[loader_type] = factory_func 31 return factory_func 32 return decorator 33 34 @classmethod 35 def create(cls, loader_type: LoaderType, **kwargs) -> DocumentLoader: 36 """Create a loader by type. 37 38 Parameters 39 ---------- 40 loader_type 41 Type of the loader to create. 42 **kwargs 43 Additional arguments passed to the loader factory. 44 45 Returns 46 ------- 47 DocumentLoader instance. 48 """ 49 if loader_type not in cls._registry: 50 available = ", ".join(t.value for t in cls._registry) 51 raise ValueError( 52 f"Unknown loader: {loader_type}. " 53 f"Available loaders: {available}" 54 ) 55 return cls._registry[loader_type](kwargs) 56 57 LoaderFactory.register(LoaderType.PYMUPDF)(create_pymupdf_loader) 58 LoaderFactory.register(LoaderType.DOCLING)(create_docling_loader) 59 LoaderFactory.register(LoaderType.WHISPER)(create_whisper_loader) 60