/ scripts / concat_files.py
concat_files.py
  1  #!/usr/bin/env python3
  2  """Concatenate source files from specified paths into a single file.
  3  
  4  Run:
  5      python scripts/concat_files.py                # current directory
  6      python scripts/concat_files.py src/           # single directory
  7      python scripts/concat_files.py src/ lib/      # multiple directories
  8      python scripts/concat_files.py -o out.txt .   # custom output file
  9  """
 10  
 11  import argparse
 12  import sys
 13  from pathlib import Path
 14  
 15  # ─────────────────────────────────────────────────────────────
 16  # CONFIGURATION
 17  # ─────────────────────────────────────────────────────────────
 18  
 19  INCLUDE_EXTS = {
 20      ".rs",
 21      ".md",
 22      ".toml",
 23  }
 24  
 25  # Default directories to search when no paths provided
 26  SEARCH_PREFIXES = [
 27      "packages",
 28      "crates",
 29      # "docs"
 30  ]
 31  
 32  EXCLUDE = {
 33      ".git",
 34      "__pycache__",
 35      ".ruff_cache",
 36      ".pytest_cache",
 37      ".mypy_cache",
 38      ".egg-info",
 39      ".venv",
 40      "dist",
 41      "build",
 42      "out",
 43      "htmlcov",
 44      "coverage",
 45      "node_modules",
 46      ".next",
 47      ".cache",
 48      "target",
 49  }
 50  
 51  ENCODING = "utf-8"
 52  
 53  BANNER_CHAR = "─"
 54  BANNER_WIDTH = 160
 55  JOIN_WITH = "\n\n" + BANNER_CHAR * BANNER_WIDTH + "\n"
 56  
 57  # ─────────────────────────────────────────────────────────────
 58  # HELPERS
 59  # ─────────────────────────────────────────────────────────────
 60  
 61  
 62  def is_excluded_path(path: Path) -> bool:
 63      return any(part in EXCLUDE for part in path.parts)
 64  
 65  
 66  def should_include_file(path: Path) -> bool:
 67      return path.is_file() and path.suffix in INCLUDE_EXTS
 68  
 69  
 70  def banner(title: str) -> str:
 71      pad = max(BANNER_WIDTH - len(title) - 2, 0)
 72      left = pad // 2
 73      right = pad - left
 74      return f"{BANNER_CHAR * left} {title} {BANNER_CHAR * right}\n"
 75  
 76  
 77  def read_file(path: Path) -> str:
 78      try:
 79          return path.read_text(encoding=ENCODING)
 80      except Exception as e:
 81          return f"[ERROR READING FILE: {e}]"
 82  
 83  
 84  # ─────────────────────────────────────────────────────────────
 85  # TREE + FILE COLLECTION
 86  # ─────────────────────────────────────────────────────────────
 87  
 88  
 89  def print_tree(root: Path, prefix: str = "") -> None:
 90      try:
 91          entries = [
 92              p
 93              for p in root.iterdir()
 94              if not is_excluded_path(p) and (p.is_dir() or should_include_file(p))
 95          ]
 96      except PermissionError:
 97          return
 98  
 99      entries.sort(key=lambda p: (p.is_file(), p.name.lower()))
100  
101      for i, entry in enumerate(entries):
102          is_last = i == len(entries) - 1
103          connector = "└── " if is_last else "├── "
104          print(f"{prefix}{connector}{entry.name}")
105  
106          if entry.is_dir():
107              extension = "    " if is_last else "│   "
108              print_tree(entry, prefix + extension)
109  
110  
111  def collect_files(root: Path) -> list[Path]:
112      files: list[Path] = []
113      for path in root.rglob("*"):
114          if is_excluded_path(path):
115              continue
116          if should_include_file(path):
117              files.append(path)
118      return sorted(files)
119  
120  
121  # ─────────────────────────────────────────────────────────────
122  # MAIN
123  # ─────────────────────────────────────────────────────────────
124  
125  
126  def parse_args() -> argparse.Namespace:
127      parser = argparse.ArgumentParser(
128          description="Concatenate source files from specified paths into a single file.",
129          formatter_class=argparse.RawDescriptionHelpFormatter,
130          epilog="""
131  Examples:
132      python scripts/concat_files.py                # current directory
133      python scripts/concat_files.py src/           # single directory
134      python scripts/concat_files.py src/ lib/      # multiple directories
135      python scripts/concat_files.py -o out.txt .   # custom output file
136  """,
137      )
138      parser.add_argument(
139          "paths",
140          nargs="*",
141          type=Path,
142          default=None,
143          help="Files or directories to include (default: packages/ and crates/)",
144      )
145      parser.add_argument(
146          "-o",
147          "--output",
148          type=Path,
149          default=Path("concat_output.txt"),
150          help="Output file (default: concat_output.txt)",
151      )
152      parser.add_argument(
153          "--no-tree",
154          action="store_true",
155          help="Skip printing the file tree",
156      )
157      return parser.parse_args()
158  
159  
160  def main() -> None:
161      args = parse_args()
162  
163      # Use default prefixes if no paths provided
164      input_paths = args.paths if args.paths else [Path(p) for p in SEARCH_PREFIXES]
165  
166      # Resolve paths
167      paths: list[Path] = []
168      for p in input_paths:
169          rp = p.resolve()
170          if not rp.exists():
171              print(f"Warning: {p} does not exist, skipping", file=sys.stderr)
172              continue
173          paths.append(rp)
174  
175      if not paths:
176          print("Error: No valid paths provided", file=sys.stderr)
177          sys.exit(1)
178  
179      # Find common root for display
180      if len(paths) == 1:
181          root = paths[0] if paths[0].is_dir() else paths[0].parent
182      else:
183          parts_list = [p.parts for p in paths]
184          common_parts: list[str] = []
185          for parts in zip(*parts_list):
186              if len(set(parts)) == 1:
187                  common_parts.append(parts[0])
188              else:
189                  break
190          root = Path(*common_parts) if common_parts else Path("/")
191  
192      # Print tree (optional)
193      if not args.no_tree:
194          print(root)
195          for i, p in enumerate(paths):
196              is_last = i == len(paths) - 1
197              connector = "└── " if is_last else "├── "
198              try:
199                  rel = p.relative_to(root)
200              except ValueError:
201                  rel = p
202              if p.is_dir():
203                  print(f"{connector}{rel}")
204                  extension = "    " if is_last else "│   "
205                  print_tree(p, prefix=extension)
206              else:
207                  print(f"{connector}{rel}")
208  
209      # Collect contents
210      output: list[str] = []
211  
212      for p in paths:
213          if p.is_file():
214              try:
215                  rel = p.relative_to(root)
216              except ValueError:
217                  rel = p
218              output.append(banner(str(rel)))
219              output.append(read_file(p))
220          else:
221              for file in collect_files(p):
222                  try:
223                      rel = file.relative_to(root)
224                  except ValueError:
225                      rel = file
226                  output.append(banner(str(rel)))
227                  output.append(read_file(file))
228  
229      if not output:
230          print("No files found matching criteria", file=sys.stderr)
231          sys.exit(1)
232  
233      out_path = args.output.resolve()
234      out_path.write_text(JOIN_WITH.join(output), encoding=ENCODING)
235  
236      file_count = len(output) // 2
237      print(f"\n✓ Wrote {file_count} file{'s' if file_count != 1 else ''} to {out_path}")
238  
239  
240  if __name__ == "__main__":
241      main()