local_source.py
1 from __future__ import annotations 2 3 """Local filesystem input source implementation.""" 4 5 import logging 6 from pathlib import Path 7 from typing import Any 8 9 from .protocol import InputSource 10 11 12 class LocalInputSource: 13 """Input source for local filesystem. 14 15 Provides access to files on the local filesystem without any 16 downloading or temporary file handling. 17 """ 18 19 def __init__(self, config: dict[str, Any] | None = None): 20 """Initialize the local input source. 21 22 Parameters 23 ---------- 24 config 25 Configuration dictionary with optional keys: 26 - base_path: str or Path - Base directory for listing files. 27 If not provided, must pass explicit path to list_files(). 28 """ 29 self.config = config or {} 30 self.logger = logging.getLogger(__name__) 31 32 # Get base_path from config if provided 33 base_path_config = self.config.get("base_path") 34 self.base_path = Path(base_path_config).expanduser().resolve() if base_path_config else None 35 36 def list_files(self, path: str = "", extensions: list[str] | None = None) -> list[str]: 37 """List files in the given local path. 38 39 Parameters 40 ---------- 41 path 42 Local directory path to list files from. 43 If empty string or not provided, uses base_path from config. 44 Can be absolute path or relative to base_path. 45 extensions 46 Optional list of file extensions to filter by (e.g., ['.pdf', '.docx']). 47 48 Returns 49 ------- 50 List of file paths as strings. 51 """ 52 # Determine which path to use 53 if not path or path == "": 54 if self.base_path is None: 55 raise ValueError( 56 "No path provided and no base_path configured. " 57 "Either provide a path argument or configure base_path in source_config." 58 ) 59 local_path = self.base_path 60 else: 61 local_path = Path(path).expanduser().resolve() 62 63 if not local_path.exists(): 64 raise FileNotFoundError(f"Path does not exist: {local_path}") 65 66 if local_path.is_file(): 67 # Single file 68 if extensions is None or local_path.suffix.lower() in extensions: 69 return [str(local_path)] 70 return [] 71 72 # Directory - recursively find all files 73 files = [] 74 for file_path in local_path.rglob("*"): 75 if file_path.is_file() and (extensions is None or file_path.suffix.lower() in extensions): 76 files.append(str(file_path)) 77 78 self.logger.info(f"Found {len(files)} file(s) in {local_path}") 79 return sorted(files) 80 81 def get_file(self, file_id: str) -> Path: 82 """Get a local file path. 83 84 For local sources, this simply validates and returns the path. 85 86 Parameters 87 ---------- 88 file_id 89 Local file path. 90 91 Returns 92 ------- 93 Path object pointing to the file. 94 """ 95 local_path = Path(file_id).expanduser().resolve() 96 97 if not local_path.exists(): 98 raise FileNotFoundError(f"File does not exist: {local_path}") 99 100 if not local_path.is_file(): 101 raise ValueError(f"Path is not a file: {local_path}") 102 103 return local_path 104 105 def cleanup(self) -> None: 106 """Clean up resources. 107 108 For local source, nothing to clean up. 109 """ 110 pass 111 112 113 def create_local_source(config: dict[str, Any]) -> InputSource: 114 """Create a local filesystem input source. 115 116 Parameters 117 ---------- 118 config 119 Configuration dictionary (currently unused). 120 121 Returns 122 ------- 123 InputSource instance for local filesystem. 124 """ 125 return LocalInputSource(config)