/ src / input_sources / local_source.py
local_source.py
  1  from __future__ import annotations
  2  
  3  """Local filesystem input source implementation."""
  4  
  5  import logging
  6  from pathlib import Path
  7  from typing import Any
  8  
  9  from .protocol import InputSource
 10  
 11  
 12  class LocalInputSource:
 13      """Input source for local filesystem.
 14  
 15      Provides access to files on the local filesystem without any
 16      downloading or temporary file handling.
 17      """
 18  
 19      def __init__(self, config: dict[str, Any] | None = None):
 20          """Initialize the local input source.
 21  
 22          Parameters
 23          ----------
 24          config
 25              Configuration dictionary with optional keys:
 26              - base_path: str or Path - Base directory for listing files.
 27                If not provided, must pass explicit path to list_files().
 28          """
 29          self.config = config or {}
 30          self.logger = logging.getLogger(__name__)
 31  
 32          # Get base_path from config if provided
 33          base_path_config = self.config.get("base_path")
 34          self.base_path = Path(base_path_config).expanduser().resolve() if base_path_config else None
 35  
 36      def list_files(self, path: str = "", extensions: list[str] | None = None) -> list[str]:
 37          """List files in the given local path.
 38  
 39          Parameters
 40          ----------
 41          path
 42              Local directory path to list files from.
 43              If empty string or not provided, uses base_path from config.
 44              Can be absolute path or relative to base_path.
 45          extensions
 46              Optional list of file extensions to filter by (e.g., ['.pdf', '.docx']).
 47  
 48          Returns
 49          -------
 50          List of file paths as strings.
 51          """
 52          # Determine which path to use
 53          if not path or path == "":
 54              if self.base_path is None:
 55                  raise ValueError(
 56                      "No path provided and no base_path configured. "
 57                      "Either provide a path argument or configure base_path in source_config."
 58                  )
 59              local_path = self.base_path
 60          else:
 61              local_path = Path(path).expanduser().resolve()
 62  
 63          if not local_path.exists():
 64              raise FileNotFoundError(f"Path does not exist: {local_path}")
 65  
 66          if local_path.is_file():
 67              # Single file
 68              if extensions is None or local_path.suffix.lower() in extensions:
 69                  return [str(local_path)]
 70              return []
 71  
 72          # Directory - recursively find all files
 73          files = []
 74          for file_path in local_path.rglob("*"):
 75              if file_path.is_file() and (extensions is None or file_path.suffix.lower() in extensions):
 76                      files.append(str(file_path))
 77  
 78          self.logger.info(f"Found {len(files)} file(s) in {local_path}")
 79          return sorted(files)
 80  
 81      def get_file(self, file_id: str) -> Path:
 82          """Get a local file path.
 83  
 84          For local sources, this simply validates and returns the path.
 85  
 86          Parameters
 87          ----------
 88          file_id
 89              Local file path.
 90  
 91          Returns
 92          -------
 93          Path object pointing to the file.
 94          """
 95          local_path = Path(file_id).expanduser().resolve()
 96  
 97          if not local_path.exists():
 98              raise FileNotFoundError(f"File does not exist: {local_path}")
 99  
100          if not local_path.is_file():
101              raise ValueError(f"Path is not a file: {local_path}")
102  
103          return local_path
104  
105      def cleanup(self) -> None:
106          """Clean up resources.
107  
108          For local source, nothing to clean up.
109          """
110          pass
111  
112  
113  def create_local_source(config: dict[str, Any]) -> InputSource:
114      """Create a local filesystem input source.
115  
116      Parameters
117      ----------
118      config
119          Configuration dictionary (currently unused).
120  
121      Returns
122      -------
123      InputSource instance for local filesystem.
124      """
125      return LocalInputSource(config)