/ src / input_sources / protocol.py
protocol.py
 1  from __future__ import annotations
 2  
 3  """Protocol for input source implementations."""
 4  
 5  from pathlib import Path
 6  from typing import Protocol
 7  
 8  
 9  class InputSource(Protocol):
10      """Protocol for input source implementations.
11  
12      Defines the interface for accessing files from different sources
13      (local filesystem, S3, etc.). All implementations must provide
14      these methods to be compatible.
15      """
16  
17      def list_files(self, path: str = "", extensions: list[str] | None = None) -> list[str]:
18          """List files in the given path.
19  
20          Parameters
21          ----------
22          path
23              Path to list files from (can be directory path or S3 prefix).
24              If empty string or not provided, uses the base path configured for the source:
25              - Local sources: uses base_path from config
26              - S3 sources: uses prefix from config
27          extensions
28              Optional list of file extensions to filter by (e.g., ['.pdf', '.docx']).
29              If None, return all files.
30  
31          Returns
32          -------
33          List of file identifiers (paths or URIs) that can be used with get_file().
34          """
35          ...
36  
37      def get_file(self, file_id: str) -> Path:
38          """Get a file and return a local path to it.
39  
40          For local sources, this returns the path directly.
41          For remote sources (S3), this downloads the file to a temp location.
42  
43          Parameters
44          ----------
45          file_id
46              File identifier (local path or S3 URI).
47  
48          Returns
49          -------
50          Path to the local file (original or downloaded temp file).
51          """
52          ...
53  
54      def cleanup(self) -> None:
55          """Clean up any temporary files or resources.
56  
57          Called after processing is complete to remove downloaded files,
58          close connections, etc.
59          """
60          ...