/ test / lint / lint-files.py
lint-files.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2021-present The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  """
  7  This checks that all files in the repository have correct filenames and permissions
  8  """
  9  
 10  import os
 11  import re
 12  import sys
 13  from subprocess import check_output
 14  from typing import Optional, NoReturn
 15  
 16  CMD_TOP_LEVEL = ["git", "rev-parse", "--show-toplevel"]
 17  CMD_ALL_FILES = ["git", "ls-files", "-z", "--full-name", "--stage"]
 18  CMD_SHEBANG_FILES = ["git", "grep", "--full-name", "--line-number", "-I", "^#!"]
 19  
 20  ALL_SOURCE_FILENAMES_REGEXP = r"^.*\.(cpp|h|py|sh)$"
 21  ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$"
 22  ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$"
 23  ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP = (
 24      "^src/(secp256k1/|minisketch/|test/fuzz/FuzzedDataProvider.h)"
 25  )
 26  ALLOWED_PERMISSION_NON_EXECUTABLES = 0o644
 27  ALLOWED_PERMISSION_EXECUTABLES = 0o755
 28  ALLOWED_EXECUTABLE_SHEBANG = {
 29      # https://github.com/dylanaraps/pure-bash-bible#shebang:
 30      # `#!/bin/bash` assumes it is always installed to /bin/ which can cause issues;
 31      # `#!/usr/bin/env bash` searches the user's PATH to find the bash binary.
 32      "py": [b"#!/usr/bin/env python3"],
 33      "sh": [b"#!/usr/bin/env bash", b"#!/bin/sh"],
 34  }
 35  
 36  
 37  class FileMeta(object):
 38      def __init__(self, file_spec: str):
 39          '''Parse a `git ls files --stage` output line.'''
 40          # 100755 5a150d5f8031fcd75e80a4dd9843afa33655f579 0       ci/test/00_setup_env.sh
 41          meta, self.file_path = file_spec.split('\t', 2)
 42          meta = meta.split()
 43          # The octal file permission of the file. Internally, git only
 44          # keeps an 'executable' bit, so this will always be 0o644 or 0o755.
 45          self.permissions = int(meta[0], 8) & 0o7777
 46          # We don't currently care about the other fields
 47  
 48      @property
 49      def extension(self) -> Optional[str]:
 50          """
 51          Returns the file extension for a given filename string.
 52          eg:
 53          'ci/lint_run_all.sh' -> 'sh'
 54          'ci/retry/retry' -> None
 55          'contrib/devtools/split-debug.sh.in' -> 'in'
 56          """
 57          return str(os.path.splitext(self.file_path)[1].strip(".") or None)
 58  
 59      @property
 60      def full_extension(self) -> Optional[str]:
 61          """
 62          Returns the full file extension for a given filename string.
 63          eg:
 64          'ci/lint_run_all.sh' -> 'sh'
 65          'ci/retry/retry' -> None
 66          'contrib/devtools/split-debug.sh.in' -> 'sh.in'
 67          """
 68          filename_parts = self.file_path.split(os.extsep, 1)
 69          try:
 70              return filename_parts[1]
 71          except IndexError:
 72              return None
 73  
 74  
 75  def get_git_file_metadata() -> dict[str, FileMeta]:
 76      '''
 77      Return a dictionary mapping the name of all files in the repository to git tree metadata.
 78      '''
 79      files_raw = check_output(CMD_ALL_FILES, text=True).rstrip("\0").split("\0")
 80      files = {}
 81      for file_spec in files_raw:
 82          meta = FileMeta(file_spec)
 83          files[meta.file_path] = meta
 84      return files
 85  
 86  def check_all_filenames(files) -> int:
 87      """
 88      Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase
 89      alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames.
 90      """
 91      filenames = files.keys()
 92      filename_regex = re.compile(ALLOWED_FILENAME_REGEXP)
 93      failed_tests = 0
 94      for filename in filenames:
 95          if not filename_regex.match(filename):
 96              print(
 97                  f"""File {repr(filename)} does not match the allowed filename regexp ('{ALLOWED_FILENAME_REGEXP}')."""
 98              )
 99              failed_tests += 1
100      return failed_tests
101  
102  
103  def check_source_filenames(files) -> int:
104      """
105      Checks only source files (*.cpp, *.h, *.py, *.sh) against a stricter allowed regexp to make sure only lowercase
106      alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.) are used in source code filenames.
107  
108      Additionally there is an exception regexp for directories or files which are excepted from matching this regexp.
109      """
110      filenames = [filename for filename in files.keys() if re.match(ALL_SOURCE_FILENAMES_REGEXP, filename, re.IGNORECASE)]
111      filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP)
112      filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP)
113      failed_tests = 0
114      for filename in filenames:
115          if not filename_regex.match(filename) and not filename_exception_regex.match(filename):
116              print(
117                  f"""File {repr(filename)} does not match the allowed source filename regexp ('{ALLOWED_SOURCE_FILENAME_REGEXP}'), or the exception regexp ({ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP})."""
118              )
119              failed_tests += 1
120      return failed_tests
121  
122  
123  def check_all_file_permissions(files) -> int:
124      """
125      Checks all files in the repository match an allowed executable or non-executable file permission octal.
126  
127      Additionally checks that for executable files, the file contains a shebang line
128      """
129      failed_tests = 0
130      for filename, file_meta in files.items():
131          if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES:
132              with open(filename, "rb") as f:
133                  shebang = f.readline().rstrip(b"\n")
134  
135              # For any file with executable permissions the first line must contain a shebang
136              if not shebang.startswith(b"#!"):
137                  print(
138                      f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES:03o} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" to make it non-executable."""
139                  )
140                  failed_tests += 1
141  
142              # For certain file extensions that have been defined, we also check that the shebang conforms to a specific
143              # allowable set of shebangs
144              if file_meta.extension in ALLOWED_EXECUTABLE_SHEBANG.keys():
145                  if shebang not in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension]:
146                      print(
147                          f"""File "{filename}" is missing expected shebang """
148                          + " or ".join(
149                              [
150                                  x.decode("utf-8")
151                                  for x in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension]
152                              ]
153                          )
154                      )
155                      failed_tests += 1
156  
157          elif file_meta.permissions == ALLOWED_PERMISSION_NON_EXECUTABLES:
158              continue
159          else:
160              print(
161                  f"""File "{filename}" has unexpected permission {file_meta.permissions:03o}. Do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" (if non-executable) or "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (if executable)."""
162              )
163              failed_tests += 1
164  
165      return failed_tests
166  
167  
168  def check_shebang_file_permissions(files_meta) -> int:
169      """
170      Checks every file that contains a shebang line to ensure it has an executable permission
171      """
172      filenames = check_output(CMD_SHEBANG_FILES, text=True).strip().split("\n")
173  
174      # The git grep command we use returns files which contain a shebang on any line within the file
175      # so we need to filter the list to only files with the shebang on the first line
176      filenames = [filename.split(":1:")[0] for filename in filenames if ":1:" in filename]
177  
178      failed_tests = 0
179      for filename in filenames:
180          file_meta = files_meta[filename]
181          if file_meta.permissions != ALLOWED_PERMISSION_EXECUTABLES:
182              # These file types are typically expected to be sourced and not executed directly
183              if file_meta.full_extension in ["bash", "init", "openrc", "sh.in"]:
184                  continue
185  
186              # *.py files which don't contain an `if __name__ == '__main__'` are not expected to be executed directly
187              if file_meta.extension == "py":
188                  with open(filename, "r") as f:
189                      file_data = f.read()
190                  if not re.search("""if __name__ == ['"]__main__['"]:""", file_data):
191                      continue
192  
193              print(
194                  f"""File "{filename}" contains a shebang line, but has the file permission {file_meta.permissions:03o} instead of the expected executable permission {ALLOWED_PERMISSION_EXECUTABLES:03o}. Do "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (or remove the shebang line)."""
195              )
196              failed_tests += 1
197      return failed_tests
198  
199  
200  def main() -> NoReturn:
201      root_dir = check_output(CMD_TOP_LEVEL, text=True).strip()
202      os.chdir(root_dir)
203  
204      files = get_git_file_metadata()
205  
206      failed_tests = 0
207      failed_tests += check_all_filenames(files)
208      failed_tests += check_source_filenames(files)
209      failed_tests += check_all_file_permissions(files)
210      failed_tests += check_shebang_file_permissions(files)
211  
212      if failed_tests:
213          print(
214              f"ERROR: There were {failed_tests} failed tests in the lint-files.py lint test. Please resolve the above errors."
215          )
216          sys.exit(1)
217      else:
218          sys.exit(0)
219  
220  
221  if __name__ == "__main__":
222      main()