concat_files.py
1 #!/usr/bin/env python3 2 """Concatenate source files from specified paths into a single file. 3 4 Run: 5 python scripts/concat_files.py # current directory 6 python scripts/concat_files.py src/ # single directory 7 python scripts/concat_files.py src/ lib/ # multiple directories 8 python scripts/concat_files.py -o out.txt . # custom output file 9 """ 10 11 import argparse 12 import sys 13 from pathlib import Path 14 15 # ───────────────────────────────────────────────────────────── 16 # CONFIGURATION 17 # ───────────────────────────────────────────────────────────── 18 19 INCLUDE_EXTS = { 20 ".rs", 21 ".md", 22 ".toml", 23 } 24 25 # Default directories to search when no paths provided 26 SEARCH_PREFIXES = [ 27 "packages", 28 "crates", 29 # "docs" 30 ] 31 32 EXCLUDE = { 33 ".git", 34 "__pycache__", 35 ".ruff_cache", 36 ".pytest_cache", 37 ".mypy_cache", 38 ".egg-info", 39 ".venv", 40 "dist", 41 "build", 42 "out", 43 "htmlcov", 44 "coverage", 45 "node_modules", 46 ".next", 47 ".cache", 48 "target", 49 } 50 51 ENCODING = "utf-8" 52 53 BANNER_CHAR = "─" 54 BANNER_WIDTH = 160 55 JOIN_WITH = "\n\n" + BANNER_CHAR * BANNER_WIDTH + "\n" 56 57 # ───────────────────────────────────────────────────────────── 58 # HELPERS 59 # ───────────────────────────────────────────────────────────── 60 61 62 def is_excluded_path(path: Path) -> bool: 63 return any(part in EXCLUDE for part in path.parts) 64 65 66 def should_include_file(path: Path) -> bool: 67 return path.is_file() and path.suffix in INCLUDE_EXTS 68 69 70 def banner(title: str) -> str: 71 pad = max(BANNER_WIDTH - len(title) - 2, 0) 72 left = pad // 2 73 right = pad - left 74 return f"{BANNER_CHAR * left} {title} {BANNER_CHAR * right}\n" 75 76 77 def read_file(path: Path) -> str: 78 try: 79 return path.read_text(encoding=ENCODING) 80 except Exception as e: 81 return f"[ERROR READING FILE: {e}]" 82 83 84 # ───────────────────────────────────────────────────────────── 85 # TREE + FILE COLLECTION 86 # ───────────────────────────────────────────────────────────── 87 88 89 def print_tree(root: Path, prefix: str = "") -> None: 90 try: 91 entries = [ 92 p 93 for p in root.iterdir() 94 if not is_excluded_path(p) and (p.is_dir() or should_include_file(p)) 95 ] 96 except PermissionError: 97 return 98 99 entries.sort(key=lambda p: (p.is_file(), p.name.lower())) 100 101 for i, entry in enumerate(entries): 102 is_last = i == len(entries) - 1 103 connector = "└── " if is_last else "├── " 104 print(f"{prefix}{connector}{entry.name}") 105 106 if entry.is_dir(): 107 extension = " " if is_last else "│ " 108 print_tree(entry, prefix + extension) 109 110 111 def collect_files(root: Path) -> list[Path]: 112 files: list[Path] = [] 113 for path in root.rglob("*"): 114 if is_excluded_path(path): 115 continue 116 if should_include_file(path): 117 files.append(path) 118 return sorted(files) 119 120 121 # ───────────────────────────────────────────────────────────── 122 # MAIN 123 # ───────────────────────────────────────────────────────────── 124 125 126 def parse_args() -> argparse.Namespace: 127 parser = argparse.ArgumentParser( 128 description="Concatenate source files from specified paths into a single file.", 129 formatter_class=argparse.RawDescriptionHelpFormatter, 130 epilog=""" 131 Examples: 132 python scripts/concat_files.py # current directory 133 python scripts/concat_files.py src/ # single directory 134 python scripts/concat_files.py src/ lib/ # multiple directories 135 python scripts/concat_files.py -o out.txt . # custom output file 136 """, 137 ) 138 parser.add_argument( 139 "paths", 140 nargs="*", 141 type=Path, 142 default=None, 143 help="Files or directories to include (default: packages/ and crates/)", 144 ) 145 parser.add_argument( 146 "-o", 147 "--output", 148 type=Path, 149 default=Path("concat_output.txt"), 150 help="Output file (default: concat_output.txt)", 151 ) 152 parser.add_argument( 153 "--no-tree", 154 action="store_true", 155 help="Skip printing the file tree", 156 ) 157 return parser.parse_args() 158 159 160 def main() -> None: 161 args = parse_args() 162 163 # Use default prefixes if no paths provided 164 input_paths = args.paths if args.paths else [Path(p) for p in SEARCH_PREFIXES] 165 166 # Resolve paths 167 paths: list[Path] = [] 168 for p in input_paths: 169 rp = p.resolve() 170 if not rp.exists(): 171 print(f"Warning: {p} does not exist, skipping", file=sys.stderr) 172 continue 173 paths.append(rp) 174 175 if not paths: 176 print("Error: No valid paths provided", file=sys.stderr) 177 sys.exit(1) 178 179 # Find common root for display 180 if len(paths) == 1: 181 root = paths[0] if paths[0].is_dir() else paths[0].parent 182 else: 183 parts_list = [p.parts for p in paths] 184 common_parts: list[str] = [] 185 for parts in zip(*parts_list): 186 if len(set(parts)) == 1: 187 common_parts.append(parts[0]) 188 else: 189 break 190 root = Path(*common_parts) if common_parts else Path("/") 191 192 # Print tree (optional) 193 if not args.no_tree: 194 print(root) 195 for i, p in enumerate(paths): 196 is_last = i == len(paths) - 1 197 connector = "└── " if is_last else "├── " 198 try: 199 rel = p.relative_to(root) 200 except ValueError: 201 rel = p 202 if p.is_dir(): 203 print(f"{connector}{rel}") 204 extension = " " if is_last else "│ " 205 print_tree(p, prefix=extension) 206 else: 207 print(f"{connector}{rel}") 208 209 # Collect contents 210 output: list[str] = [] 211 212 for p in paths: 213 if p.is_file(): 214 try: 215 rel = p.relative_to(root) 216 except ValueError: 217 rel = p 218 output.append(banner(str(rel))) 219 output.append(read_file(p)) 220 else: 221 for file in collect_files(p): 222 try: 223 rel = file.relative_to(root) 224 except ValueError: 225 rel = file 226 output.append(banner(str(rel))) 227 output.append(read_file(file)) 228 229 if not output: 230 print("No files found matching criteria", file=sys.stderr) 231 sys.exit(1) 232 233 out_path = args.output.resolve() 234 out_path.write_text(JOIN_WITH.join(output), encoding=ENCODING) 235 236 file_count = len(output) // 2 237 print(f"\n✓ Wrote {file_count} file{'s' if file_count != 1 else ''} to {out_path}") 238 239 240 if __name__ == "__main__": 241 main()