lint-files.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2021-2022 The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 """ 7 This checks that all files in the repository have correct filenames and permissions 8 """ 9 10 import os 11 import re 12 import sys 13 from subprocess import check_output 14 from typing import Optional, NoReturn 15 16 CMD_TOP_LEVEL = ["git", "rev-parse", "--show-toplevel"] 17 CMD_ALL_FILES = ["git", "ls-files", "-z", "--full-name", "--stage"] 18 CMD_SHEBANG_FILES = ["git", "grep", "--full-name", "--line-number", "-I", "^#!"] 19 20 ALL_SOURCE_FILENAMES_REGEXP = r"^.*\.(cpp|h|py|sh)$" 21 ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$" 22 ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$" 23 ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP = ( 24 "^src/(secp256k1/|minisketch/|test/fuzz/FuzzedDataProvider.h)" 25 ) 26 ALLOWED_PERMISSION_NON_EXECUTABLES = 0o644 27 ALLOWED_PERMISSION_EXECUTABLES = 0o755 28 ALLOWED_EXECUTABLE_SHEBANG = { 29 "py": [b"#!/usr/bin/env python3"], 30 "sh": [b"#!/usr/bin/env bash", b"#!/bin/sh"], 31 } 32 33 34 class FileMeta(object): 35 def __init__(self, file_spec: str): 36 '''Parse a `git ls files --stage` output line.''' 37 # 100755 5a150d5f8031fcd75e80a4dd9843afa33655f579 0 ci/test/00_setup_env.sh 38 meta, self.file_path = file_spec.split('\t', 2) 39 meta = meta.split() 40 # The octal file permission of the file. Internally, git only 41 # keeps an 'executable' bit, so this will always be 0o644 or 0o755. 42 self.permissions = int(meta[0], 8) & 0o7777 43 # We don't currently care about the other fields 44 45 @property 46 def extension(self) -> Optional[str]: 47 """ 48 Returns the file extension for a given filename string. 49 eg: 50 'ci/lint_run_all.sh' -> 'sh' 51 'ci/retry/retry' -> None 52 'contrib/devtools/split-debug.sh.in' -> 'in' 53 """ 54 return str(os.path.splitext(self.file_path)[1].strip(".") or None) 55 56 @property 57 def full_extension(self) -> Optional[str]: 58 """ 59 Returns the full file extension for a given filename string. 60 eg: 61 'ci/lint_run_all.sh' -> 'sh' 62 'ci/retry/retry' -> None 63 'contrib/devtools/split-debug.sh.in' -> 'sh.in' 64 """ 65 filename_parts = self.file_path.split(os.extsep, 1) 66 try: 67 return filename_parts[1] 68 except IndexError: 69 return None 70 71 72 def get_git_file_metadata() -> dict[str, FileMeta]: 73 ''' 74 Return a dictionary mapping the name of all files in the repository to git tree metadata. 75 ''' 76 files_raw = check_output(CMD_ALL_FILES).decode("utf8").rstrip("\0").split("\0") 77 files = {} 78 for file_spec in files_raw: 79 meta = FileMeta(file_spec) 80 files[meta.file_path] = meta 81 return files 82 83 def check_all_filenames(files) -> int: 84 """ 85 Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase 86 alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames. 87 """ 88 filenames = files.keys() 89 filename_regex = re.compile(ALLOWED_FILENAME_REGEXP) 90 failed_tests = 0 91 for filename in filenames: 92 if not filename_regex.match(filename): 93 print( 94 f"""File {repr(filename)} does not not match the allowed filename regexp ('{ALLOWED_FILENAME_REGEXP}').""" 95 ) 96 failed_tests += 1 97 return failed_tests 98 99 100 def check_source_filenames(files) -> int: 101 """ 102 Checks only source files (*.cpp, *.h, *.py, *.sh) against a stricter allowed regexp to make sure only lowercase 103 alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.) are used in source code filenames. 104 105 Additionally there is an exception regexp for directories or files which are excepted from matching this regexp. 106 """ 107 filenames = [filename for filename in files.keys() if re.match(ALL_SOURCE_FILENAMES_REGEXP, filename, re.IGNORECASE)] 108 filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP) 109 filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP) 110 failed_tests = 0 111 for filename in filenames: 112 if not filename_regex.match(filename) and not filename_exception_regex.match(filename): 113 print( 114 f"""File {repr(filename)} does not not match the allowed source filename regexp ('{ALLOWED_SOURCE_FILENAME_REGEXP}'), or the exception regexp ({ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP}).""" 115 ) 116 failed_tests += 1 117 return failed_tests 118 119 120 def check_all_file_permissions(files) -> int: 121 """ 122 Checks all files in the repository match an allowed executable or non-executable file permission octal. 123 124 Additionally checks that for executable files, the file contains a shebang line 125 """ 126 failed_tests = 0 127 for filename, file_meta in files.items(): 128 if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES: 129 with open(filename, "rb") as f: 130 shebang = f.readline().rstrip(b"\n") 131 132 # For any file with executable permissions the first line must contain a shebang 133 if not shebang.startswith(b"#!"): 134 print( 135 f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES:03o} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" to make it non-executable.""" 136 ) 137 failed_tests += 1 138 139 # For certain file extensions that have been defined, we also check that the shebang conforms to a specific 140 # allowable set of shebangs 141 if file_meta.extension in ALLOWED_EXECUTABLE_SHEBANG.keys(): 142 if shebang not in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension]: 143 print( 144 f"""File "{filename}" is missing expected shebang """ 145 + " or ".join( 146 [ 147 x.decode("utf-8") 148 for x in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension] 149 ] 150 ) 151 ) 152 failed_tests += 1 153 154 elif file_meta.permissions == ALLOWED_PERMISSION_NON_EXECUTABLES: 155 continue 156 else: 157 print( 158 f"""File "{filename}" has unexpected permission {file_meta.permissions:03o}. Do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" (if non-executable) or "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (if executable).""" 159 ) 160 failed_tests += 1 161 162 return failed_tests 163 164 165 def check_shebang_file_permissions(files_meta) -> int: 166 """ 167 Checks every file that contains a shebang line to ensure it has an executable permission 168 """ 169 filenames = check_output(CMD_SHEBANG_FILES).decode("utf8").strip().split("\n") 170 171 # The git grep command we use returns files which contain a shebang on any line within the file 172 # so we need to filter the list to only files with the shebang on the first line 173 filenames = [filename.split(":1:")[0] for filename in filenames if ":1:" in filename] 174 175 failed_tests = 0 176 for filename in filenames: 177 file_meta = files_meta[filename] 178 if file_meta.permissions != ALLOWED_PERMISSION_EXECUTABLES: 179 # These file types are typically expected to be sourced and not executed directly 180 if file_meta.full_extension in ["bash", "init", "openrc", "sh.in"]: 181 continue 182 183 # *.py files which don't contain an `if __name__ == '__main__'` are not expected to be executed directly 184 if file_meta.extension == "py": 185 with open(filename, "r", encoding="utf8") as f: 186 file_data = f.read() 187 if not re.search("""if __name__ == ['"]__main__['"]:""", file_data): 188 continue 189 190 print( 191 f"""File "{filename}" contains a shebang line, but has the file permission {file_meta.permissions:03o} instead of the expected executable permission {ALLOWED_PERMISSION_EXECUTABLES:03o}. Do "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (or remove the shebang line).""" 192 ) 193 failed_tests += 1 194 return failed_tests 195 196 197 def main() -> NoReturn: 198 root_dir = check_output(CMD_TOP_LEVEL).decode("utf8").strip() 199 os.chdir(root_dir) 200 201 files = get_git_file_metadata() 202 203 failed_tests = 0 204 failed_tests += check_all_filenames(files) 205 failed_tests += check_source_filenames(files) 206 failed_tests += check_all_file_permissions(files) 207 failed_tests += check_shebang_file_permissions(files) 208 209 if failed_tests: 210 print( 211 f"ERROR: There were {failed_tests} failed tests in the lint-files.py lint test. Please resolve the above errors." 212 ) 213 sys.exit(1) 214 else: 215 sys.exit(0) 216 217 218 if __name__ == "__main__": 219 main()