/ tools / binary_extensions.py
binary_extensions.py
 1  """Binary file extensions to skip for text-based operations.
 2  
 3  These files can't be meaningfully compared as text and are often large.
 4  Ported from free-code src/constants/files.ts.
 5  """
 6  
 7  BINARY_EXTENSIONS = frozenset({
 8      # Images
 9      ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp", ".tiff", ".tif",
10      # Videos
11      ".mp4", ".mov", ".avi", ".mkv", ".webm", ".wmv", ".flv", ".m4v", ".mpeg", ".mpg",
12      # Audio
13      ".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma", ".aiff", ".opus",
14      # Archives
15      ".zip", ".tar", ".gz", ".bz2", ".7z", ".rar", ".xz", ".z", ".tgz", ".iso",
16      # Executables/binaries
17      ".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib",
18      ".app", ".msi", ".deb", ".rpm",
19      # Documents (exclude .pdf — text-based, agents may want to inspect)
20      ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
21      ".odt", ".ods", ".odp",
22      # Fonts
23      ".ttf", ".otf", ".woff", ".woff2", ".eot",
24      # Bytecode / VM artifacts
25      ".pyc", ".pyo", ".class", ".jar", ".war", ".ear", ".node", ".wasm", ".rlib",
26      # Database files
27      ".sqlite", ".sqlite3", ".db", ".mdb", ".idx",
28      # Design / 3D
29      ".psd", ".ai", ".eps", ".sketch", ".fig", ".xd", ".blend", ".3ds", ".max",
30      # Flash
31      ".swf", ".fla",
32      # Lock/profiling data
33      ".lockb", ".dat", ".data",
34  })
35  
36  
37  def has_binary_extension(path: str) -> bool:
38      """Check if a file path has a binary extension. Pure string check, no I/O."""
39      dot = path.rfind(".")
40      if dot == -1:
41          return False
42      return path[dot:].lower() in BINARY_EXTENSIONS