lint-python-utf8-encoding.py
1 #!/usr/bin/env python3 2 # 3 # Copyright (c) 2018-2022 The Bitcoin Core developers 4 # Distributed under the MIT software license, see the accompanying 5 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 6 # 7 # Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to 8 # avoid potential issues on the BSDs where the locale is not always set. 9 10 import sys 11 import re 12 13 from subprocess import check_output, CalledProcessError 14 15 EXCLUDED_DIRS = ["src/crc32c/", "src/secp256k1/"] 16 17 18 def get_exclude_args(): 19 return [":(exclude)" + dir for dir in EXCLUDED_DIRS] 20 21 22 def check_fileopens(): 23 fileopens = list() 24 25 try: 26 fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), text=True, encoding="utf8").splitlines() 27 except CalledProcessError as e: 28 if e.returncode > 1: 29 raise e 30 31 filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, (\*\*kwargs|['\"][^'\"]*b[^'\"]*['\"])", fileopen)] 32 33 return filtered_fileopens 34 35 36 def check_checked_outputs(): 37 checked_outputs = list() 38 39 try: 40 checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), text=True, encoding="utf8").splitlines() 41 except CalledProcessError as e: 42 if e.returncode > 1: 43 raise e 44 45 filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"text=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)] 46 47 return filtered_checked_outputs 48 49 50 def main(): 51 exit_code = 0 52 53 nonexplicit_utf8_fileopens = check_fileopens() 54 if nonexplicit_utf8_fileopens: 55 print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n") 56 for fileopen in nonexplicit_utf8_fileopens: 57 print(fileopen) 58 exit_code = 1 59 60 nonexplicit_utf8_checked_outputs = check_checked_outputs() 61 if nonexplicit_utf8_checked_outputs: 62 if nonexplicit_utf8_fileopens: 63 print("\n") 64 print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n") 65 for checked_output in nonexplicit_utf8_checked_outputs: 66 print(checked_output) 67 exit_code = 1 68 69 sys.exit(exit_code) 70 71 72 if __name__ == "__main__": 73 main()