lint-files.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2021-present The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 """ 7 This checks that all files in the repository have correct filenames and permissions 8 """ 9 10 import os 11 import re 12 import sys 13 from subprocess import check_output 14 from typing import Optional, NoReturn 15 16 CMD_TOP_LEVEL = ["git", "rev-parse", "--show-toplevel"] 17 CMD_ALL_FILES = ["git", "ls-files", "-z", "--full-name", "--stage"] 18 CMD_SHEBANG_FILES = ["git", "grep", "--full-name", "--line-number", "-I", "^#!"] 19 20 ALL_SOURCE_FILENAMES_REGEXP = r"^.*\.(cpp|h|py|sh)$" 21 ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$" 22 ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$" 23 ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP = ( 24 "^src/(secp256k1/|minisketch/|test/fuzz/FuzzedDataProvider.h)" 25 ) 26 ALLOWED_PERMISSION_NON_EXECUTABLES = 0o644 27 ALLOWED_PERMISSION_EXECUTABLES = 0o755 28 ALLOWED_EXECUTABLE_SHEBANG = { 29 # https://github.com/dylanaraps/pure-bash-bible#shebang: 30 # `#!/bin/bash` assumes it is always installed to /bin/ which can cause issues; 31 # `#!/usr/bin/env bash` searches the user's PATH to find the bash binary. 32 "py": [b"#!/usr/bin/env python3"], 33 "sh": [b"#!/usr/bin/env bash", b"#!/bin/sh"], 34 } 35 36 37 class FileMeta(object): 38 def __init__(self, file_spec: str): 39 '''Parse a `git ls files --stage` output line.''' 40 # 100755 5a150d5f8031fcd75e80a4dd9843afa33655f579 0 ci/test/00_setup_env.sh 41 meta, self.file_path = file_spec.split('\t', 2) 42 meta = meta.split() 43 # The octal file permission of the file. Internally, git only 44 # keeps an 'executable' bit, so this will always be 0o644 or 0o755. 45 self.permissions = int(meta[0], 8) & 0o7777 46 # We don't currently care about the other fields 47 48 @property 49 def extension(self) -> Optional[str]: 50 """ 51 Returns the file extension for a given filename string. 52 eg: 53 'ci/lint_run_all.sh' -> 'sh' 54 'ci/retry/retry' -> None 55 'contrib/devtools/split-debug.sh.in' -> 'in' 56 """ 57 return str(os.path.splitext(self.file_path)[1].strip(".") or None) 58 59 @property 60 def full_extension(self) -> Optional[str]: 61 """ 62 Returns the full file extension for a given filename string. 63 eg: 64 'ci/lint_run_all.sh' -> 'sh' 65 'ci/retry/retry' -> None 66 'contrib/devtools/split-debug.sh.in' -> 'sh.in' 67 """ 68 filename_parts = self.file_path.split(os.extsep, 1) 69 try: 70 return filename_parts[1] 71 except IndexError: 72 return None 73 74 75 def get_git_file_metadata() -> dict[str, FileMeta]: 76 ''' 77 Return a dictionary mapping the name of all files in the repository to git tree metadata. 78 ''' 79 files_raw = check_output(CMD_ALL_FILES, text=True).rstrip("\0").split("\0") 80 files = {} 81 for file_spec in files_raw: 82 meta = FileMeta(file_spec) 83 files[meta.file_path] = meta 84 return files 85 86 def check_all_filenames(files) -> int: 87 """ 88 Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase 89 alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames. 90 """ 91 filenames = files.keys() 92 filename_regex = re.compile(ALLOWED_FILENAME_REGEXP) 93 failed_tests = 0 94 for filename in filenames: 95 if not filename_regex.match(filename): 96 print( 97 f"""File {repr(filename)} does not match the allowed filename regexp ('{ALLOWED_FILENAME_REGEXP}').""" 98 ) 99 failed_tests += 1 100 return failed_tests 101 102 103 def check_source_filenames(files) -> int: 104 """ 105 Checks only source files (*.cpp, *.h, *.py, *.sh) against a stricter allowed regexp to make sure only lowercase 106 alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.) are used in source code filenames. 107 108 Additionally there is an exception regexp for directories or files which are excepted from matching this regexp. 109 """ 110 filenames = [filename for filename in files.keys() if re.match(ALL_SOURCE_FILENAMES_REGEXP, filename, re.IGNORECASE)] 111 filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP) 112 filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP) 113 failed_tests = 0 114 for filename in filenames: 115 if not filename_regex.match(filename) and not filename_exception_regex.match(filename): 116 print( 117 f"""File {repr(filename)} does not match the allowed source filename regexp ('{ALLOWED_SOURCE_FILENAME_REGEXP}'), or the exception regexp ({ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP}).""" 118 ) 119 failed_tests += 1 120 return failed_tests 121 122 123 def check_all_file_permissions(files) -> int: 124 """ 125 Checks all files in the repository match an allowed executable or non-executable file permission octal. 126 127 Additionally checks that for executable files, the file contains a shebang line 128 """ 129 failed_tests = 0 130 for filename, file_meta in files.items(): 131 if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES: 132 with open(filename, "rb") as f: 133 shebang = f.readline().rstrip(b"\n") 134 135 # For any file with executable permissions the first line must contain a shebang 136 if not shebang.startswith(b"#!"): 137 print( 138 f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES:03o} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" to make it non-executable.""" 139 ) 140 failed_tests += 1 141 142 # For certain file extensions that have been defined, we also check that the shebang conforms to a specific 143 # allowable set of shebangs 144 if file_meta.extension in ALLOWED_EXECUTABLE_SHEBANG.keys(): 145 if shebang not in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension]: 146 print( 147 f"""File "{filename}" is missing expected shebang """ 148 + " or ".join( 149 [ 150 x.decode("utf-8") 151 for x in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension] 152 ] 153 ) 154 ) 155 failed_tests += 1 156 157 elif file_meta.permissions == ALLOWED_PERMISSION_NON_EXECUTABLES: 158 continue 159 else: 160 print( 161 f"""File "{filename}" has unexpected permission {file_meta.permissions:03o}. Do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" (if non-executable) or "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (if executable).""" 162 ) 163 failed_tests += 1 164 165 return failed_tests 166 167 168 def check_shebang_file_permissions(files_meta) -> int: 169 """ 170 Checks every file that contains a shebang line to ensure it has an executable permission 171 """ 172 filenames = check_output(CMD_SHEBANG_FILES, text=True).strip().split("\n") 173 174 # The git grep command we use returns files which contain a shebang on any line within the file 175 # so we need to filter the list to only files with the shebang on the first line 176 filenames = [filename.split(":1:")[0] for filename in filenames if ":1:" in filename] 177 178 failed_tests = 0 179 for filename in filenames: 180 file_meta = files_meta[filename] 181 if file_meta.permissions != ALLOWED_PERMISSION_EXECUTABLES: 182 # These file types are typically expected to be sourced and not executed directly 183 if file_meta.full_extension in ["bash", "init", "openrc", "sh.in"]: 184 continue 185 186 # *.py files which don't contain an `if __name__ == '__main__'` are not expected to be executed directly 187 if file_meta.extension == "py": 188 with open(filename, "r") as f: 189 file_data = f.read() 190 if not re.search("""if __name__ == ['"]__main__['"]:""", file_data): 191 continue 192 193 print( 194 f"""File "{filename}" contains a shebang line, but has the file permission {file_meta.permissions:03o} instead of the expected executable permission {ALLOWED_PERMISSION_EXECUTABLES:03o}. Do "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (or remove the shebang line).""" 195 ) 196 failed_tests += 1 197 return failed_tests 198 199 200 def main() -> NoReturn: 201 root_dir = check_output(CMD_TOP_LEVEL, text=True).strip() 202 os.chdir(root_dir) 203 204 files = get_git_file_metadata() 205 206 failed_tests = 0 207 failed_tests += check_all_filenames(files) 208 failed_tests += check_source_filenames(files) 209 failed_tests += check_all_file_permissions(files) 210 failed_tests += check_shebang_file_permissions(files) 211 212 if failed_tests: 213 print( 214 f"ERROR: There were {failed_tests} failed tests in the lint-files.py lint test. Please resolve the above errors." 215 ) 216 sys.exit(1) 217 else: 218 sys.exit(0) 219 220 221 if __name__ == "__main__": 222 main()