/ test / lint / lint-python-utf8-encoding.py
lint-python-utf8-encoding.py
 1  #!/usr/bin/env python3
 2  #
 3  # Copyright (c) 2018-2022 The Bitcoin Core developers
 4  # Distributed under the MIT software license, see the accompanying
 5  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
 6  #
 7  # Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to
 8  # avoid potential issues on the BSDs where the locale is not always set.
 9  
10  import sys
11  import re
12  
13  from subprocess import check_output, CalledProcessError
14  
15  EXCLUDED_DIRS = ["src/crc32c/", "src/secp256k1/"]
16  
17  
18  def get_exclude_args():
19      return [":(exclude)" + dir for dir in EXCLUDED_DIRS]
20  
21  
22  def check_fileopens():
23      fileopens = list()
24  
25      try:
26          fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), text=True, encoding="utf8").splitlines()
27      except CalledProcessError as e:
28          if e.returncode > 1:
29              raise e
30  
31      filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, (\*\*kwargs|['\"][^'\"]*b[^'\"]*['\"])", fileopen)]
32  
33      return filtered_fileopens
34  
35  
36  def check_checked_outputs():
37      checked_outputs = list()
38  
39      try:
40          checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), text=True, encoding="utf8").splitlines()
41      except CalledProcessError as e:
42          if e.returncode > 1:
43              raise e
44  
45      filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"text=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)]
46  
47      return filtered_checked_outputs
48  
49  
50  def main():
51      exit_code = 0
52  
53      nonexplicit_utf8_fileopens = check_fileopens()
54      if nonexplicit_utf8_fileopens:
55          print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n")
56          for fileopen in nonexplicit_utf8_fileopens:
57              print(fileopen)
58          exit_code = 1
59  
60      nonexplicit_utf8_checked_outputs = check_checked_outputs()
61      if nonexplicit_utf8_checked_outputs:
62          if nonexplicit_utf8_fileopens:
63              print("\n")
64          print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n")
65          for checked_output in nonexplicit_utf8_checked_outputs:
66              print(checked_output)
67          exit_code = 1
68  
69      sys.exit(exit_code)
70  
71  
72  if __name__ == "__main__":
73      main()