/ test / lint / lint-files.py
lint-files.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2021-2022 The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  """
  7  This checks that all files in the repository have correct filenames and permissions
  8  """
  9  
 10  import os
 11  import re
 12  import sys
 13  from subprocess import check_output
 14  from typing import Optional, NoReturn
 15  
 16  CMD_TOP_LEVEL = ["git", "rev-parse", "--show-toplevel"]
 17  CMD_ALL_FILES = ["git", "ls-files", "-z", "--full-name", "--stage"]
 18  CMD_SHEBANG_FILES = ["git", "grep", "--full-name", "--line-number", "-I", "^#!"]
 19  
 20  ALL_SOURCE_FILENAMES_REGEXP = r"^.*\.(cpp|h|py|sh)$"
 21  ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$"
 22  ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$"
 23  ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP = (
 24      "^src/(secp256k1/|minisketch/|test/fuzz/FuzzedDataProvider.h)"
 25  )
 26  ALLOWED_PERMISSION_NON_EXECUTABLES = 0o644
 27  ALLOWED_PERMISSION_EXECUTABLES = 0o755
 28  ALLOWED_EXECUTABLE_SHEBANG = {
 29      "py": [b"#!/usr/bin/env python3"],
 30      "sh": [b"#!/usr/bin/env bash", b"#!/bin/sh"],
 31  }
 32  
 33  
 34  class FileMeta(object):
 35      def __init__(self, file_spec: str):
 36          '''Parse a `git ls files --stage` output line.'''
 37          # 100755 5a150d5f8031fcd75e80a4dd9843afa33655f579 0       ci/test/00_setup_env.sh
 38          meta, self.file_path = file_spec.split('\t', 2)
 39          meta = meta.split()
 40          # The octal file permission of the file. Internally, git only
 41          # keeps an 'executable' bit, so this will always be 0o644 or 0o755.
 42          self.permissions = int(meta[0], 8) & 0o7777
 43          # We don't currently care about the other fields
 44  
 45      @property
 46      def extension(self) -> Optional[str]:
 47          """
 48          Returns the file extension for a given filename string.
 49          eg:
 50          'ci/lint_run_all.sh' -> 'sh'
 51          'ci/retry/retry' -> None
 52          'contrib/devtools/split-debug.sh.in' -> 'in'
 53          """
 54          return str(os.path.splitext(self.file_path)[1].strip(".") or None)
 55  
 56      @property
 57      def full_extension(self) -> Optional[str]:
 58          """
 59          Returns the full file extension for a given filename string.
 60          eg:
 61          'ci/lint_run_all.sh' -> 'sh'
 62          'ci/retry/retry' -> None
 63          'contrib/devtools/split-debug.sh.in' -> 'sh.in'
 64          """
 65          filename_parts = self.file_path.split(os.extsep, 1)
 66          try:
 67              return filename_parts[1]
 68          except IndexError:
 69              return None
 70  
 71  
 72  def get_git_file_metadata() -> dict[str, FileMeta]:
 73      '''
 74      Return a dictionary mapping the name of all files in the repository to git tree metadata.
 75      '''
 76      files_raw = check_output(CMD_ALL_FILES).decode("utf8").rstrip("\0").split("\0")
 77      files = {}
 78      for file_spec in files_raw:
 79          meta = FileMeta(file_spec)
 80          files[meta.file_path] = meta
 81      return files
 82  
 83  def check_all_filenames(files) -> int:
 84      """
 85      Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase
 86      alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames.
 87      """
 88      filenames = files.keys()
 89      filename_regex = re.compile(ALLOWED_FILENAME_REGEXP)
 90      failed_tests = 0
 91      for filename in filenames:
 92          if not filename_regex.match(filename):
 93              print(
 94                  f"""File {repr(filename)} does not not match the allowed filename regexp ('{ALLOWED_FILENAME_REGEXP}')."""
 95              )
 96              failed_tests += 1
 97      return failed_tests
 98  
 99  
100  def check_source_filenames(files) -> int:
101      """
102      Checks only source files (*.cpp, *.h, *.py, *.sh) against a stricter allowed regexp to make sure only lowercase
103      alphanumerics (a-z0-9), underscores (_), hyphens (-) and dots (.) are used in source code filenames.
104  
105      Additionally there is an exception regexp for directories or files which are excepted from matching this regexp.
106      """
107      filenames = [filename for filename in files.keys() if re.match(ALL_SOURCE_FILENAMES_REGEXP, filename, re.IGNORECASE)]
108      filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP)
109      filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP)
110      failed_tests = 0
111      for filename in filenames:
112          if not filename_regex.match(filename) and not filename_exception_regex.match(filename):
113              print(
114                  f"""File {repr(filename)} does not not match the allowed source filename regexp ('{ALLOWED_SOURCE_FILENAME_REGEXP}'), or the exception regexp ({ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP})."""
115              )
116              failed_tests += 1
117      return failed_tests
118  
119  
120  def check_all_file_permissions(files) -> int:
121      """
122      Checks all files in the repository match an allowed executable or non-executable file permission octal.
123  
124      Additionally checks that for executable files, the file contains a shebang line
125      """
126      failed_tests = 0
127      for filename, file_meta in files.items():
128          if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES:
129              with open(filename, "rb") as f:
130                  shebang = f.readline().rstrip(b"\n")
131  
132              # For any file with executable permissions the first line must contain a shebang
133              if not shebang.startswith(b"#!"):
134                  print(
135                      f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES:03o} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" to make it non-executable."""
136                  )
137                  failed_tests += 1
138  
139              # For certain file extensions that have been defined, we also check that the shebang conforms to a specific
140              # allowable set of shebangs
141              if file_meta.extension in ALLOWED_EXECUTABLE_SHEBANG.keys():
142                  if shebang not in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension]:
143                      print(
144                          f"""File "{filename}" is missing expected shebang """
145                          + " or ".join(
146                              [
147                                  x.decode("utf-8")
148                                  for x in ALLOWED_EXECUTABLE_SHEBANG[file_meta.extension]
149                              ]
150                          )
151                      )
152                      failed_tests += 1
153  
154          elif file_meta.permissions == ALLOWED_PERMISSION_NON_EXECUTABLES:
155              continue
156          else:
157              print(
158                  f"""File "{filename}" has unexpected permission {file_meta.permissions:03o}. Do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES:03o} {filename}" (if non-executable) or "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (if executable)."""
159              )
160              failed_tests += 1
161  
162      return failed_tests
163  
164  
165  def check_shebang_file_permissions(files_meta) -> int:
166      """
167      Checks every file that contains a shebang line to ensure it has an executable permission
168      """
169      filenames = check_output(CMD_SHEBANG_FILES).decode("utf8").strip().split("\n")
170  
171      # The git grep command we use returns files which contain a shebang on any line within the file
172      # so we need to filter the list to only files with the shebang on the first line
173      filenames = [filename.split(":1:")[0] for filename in filenames if ":1:" in filename]
174  
175      failed_tests = 0
176      for filename in filenames:
177          file_meta = files_meta[filename]
178          if file_meta.permissions != ALLOWED_PERMISSION_EXECUTABLES:
179              # These file types are typically expected to be sourced and not executed directly
180              if file_meta.full_extension in ["bash", "init", "openrc", "sh.in"]:
181                  continue
182  
183              # *.py files which don't contain an `if __name__ == '__main__'` are not expected to be executed directly
184              if file_meta.extension == "py":
185                  with open(filename, "r", encoding="utf8") as f:
186                      file_data = f.read()
187                  if not re.search("""if __name__ == ['"]__main__['"]:""", file_data):
188                      continue
189  
190              print(
191                  f"""File "{filename}" contains a shebang line, but has the file permission {file_meta.permissions:03o} instead of the expected executable permission {ALLOWED_PERMISSION_EXECUTABLES:03o}. Do "chmod {ALLOWED_PERMISSION_EXECUTABLES:03o} {filename}" (or remove the shebang line)."""
192              )
193              failed_tests += 1
194      return failed_tests
195  
196  
197  def main() -> NoReturn:
198      root_dir = check_output(CMD_TOP_LEVEL).decode("utf8").strip()
199      os.chdir(root_dir)
200  
201      files = get_git_file_metadata()
202  
203      failed_tests = 0
204      failed_tests += check_all_filenames(files)
205      failed_tests += check_source_filenames(files)
206      failed_tests += check_all_file_permissions(files)
207      failed_tests += check_shebang_file_permissions(files)
208  
209      if failed_tests:
210          print(
211              f"ERROR: There were {failed_tests} failed tests in the lint-files.py lint test. Please resolve the above errors."
212          )
213          sys.exit(1)
214      else:
215          sys.exit(0)
216  
217  
218  if __name__ == "__main__":
219      main()