binary_extensions.py
1 """Binary file extensions to skip for text-based operations. 2 3 These files can't be meaningfully compared as text and are often large. 4 Ported from free-code src/constants/files.ts. 5 """ 6 7 BINARY_EXTENSIONS = frozenset({ 8 # Images 9 ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp", ".tiff", ".tif", 10 # Videos 11 ".mp4", ".mov", ".avi", ".mkv", ".webm", ".wmv", ".flv", ".m4v", ".mpeg", ".mpg", 12 # Audio 13 ".mp3", ".wav", ".ogg", ".flac", ".aac", ".m4a", ".wma", ".aiff", ".opus", 14 # Archives 15 ".zip", ".tar", ".gz", ".bz2", ".7z", ".rar", ".xz", ".z", ".tgz", ".iso", 16 # Executables/binaries 17 ".exe", ".dll", ".so", ".dylib", ".bin", ".o", ".a", ".obj", ".lib", 18 ".app", ".msi", ".deb", ".rpm", 19 # Documents (exclude .pdf — text-based, agents may want to inspect) 20 ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", 21 ".odt", ".ods", ".odp", 22 # Fonts 23 ".ttf", ".otf", ".woff", ".woff2", ".eot", 24 # Bytecode / VM artifacts 25 ".pyc", ".pyo", ".class", ".jar", ".war", ".ear", ".node", ".wasm", ".rlib", 26 # Database files 27 ".sqlite", ".sqlite3", ".db", ".mdb", ".idx", 28 # Design / 3D 29 ".psd", ".ai", ".eps", ".sketch", ".fig", ".xd", ".blend", ".3ds", ".max", 30 # Flash 31 ".swf", ".fla", 32 # Lock/profiling data 33 ".lockb", ".dat", ".data", 34 }) 35 36 37 def has_binary_extension(path: str) -> bool: 38 """Check if a file path has a binary extension. Pure string check, no I/O.""" 39 dot = path.rfind(".") 40 if dot == -1: 41 return False 42 return path[dot:].lower() in BINARY_EXTENSIONS