lint-locale-dependence.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2018-2022 The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 # 6 # Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt 7 # opts in to POSIX localization by running setlocale(LC_ALL, "") on startup, 8 # whereas no such call is made in bitcoind. 9 # 10 # Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale 11 # specified by the user's LC_ALL (or LC_*) environment variable as the new 12 # C locale. 13 # 14 # In contrast, bitcoind does not opt in to localization -- no call to 15 # setlocale(LC_ALL, "") is made and the environment variables LC_* are 16 # thus ignored. 17 # 18 # This results in situations where bitcoind is guaranteed to be running 19 # with the classic locale ("C") whereas the locale of bitcoin-qt will vary 20 # depending on the user's environment variables. 21 # 22 # An example: Assuming the environment variable LC_ALL=de_DE then the 23 # call std::to_string(1.23) will return "1.230000" in bitcoind but 24 # "1,230000" in bitcoin-qt. 25 # 26 # From the Qt documentation: 27 # "On Unix/Linux Qt is configured to use the system locale settings by default. 28 # This can cause a conflict when using POSIX functions, for instance, when 29 # converting between data types such as floats and strings, since the notation 30 # may differ between locales. To get around this problem, call the POSIX function 31 # setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication 32 # or QCoreApplication to reset the locale that is used for number formatting to 33 # "C"-locale." 34 # 35 # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and 36 # https://stackoverflow.com/a/34878283 for more details. 37 38 import re 39 import sys 40 41 from subprocess import check_output, CalledProcessError 42 43 44 KNOWN_VIOLATIONS = [ 45 "src/dbwrapper.cpp:.*vsnprintf", 46 "src/test/fuzz/locale.cpp:.*setlocale", 47 "src/test/util_tests.cpp:.*strtoll", 48 "src/wallet/bdb.cpp:.*DbEnv::strerror", # False positive 49 "src/util/syserror.cpp:.*strerror", # Outside this function use `SysErrorString` 50 ] 51 52 REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [ 53 "src/crypto/ctaes/", 54 "src/leveldb/", 55 "src/secp256k1/", 56 "src/minisketch/", 57 "src/tinyformat.h", 58 ] 59 60 LOCALE_DEPENDENT_FUNCTIONS = [ 61 "alphasort", # LC_COLLATE (via strcoll) 62 "asctime", # LC_TIME (directly) 63 "asprintf", # (via vasprintf) 64 "atof", # LC_NUMERIC (via strtod) 65 "atoi", # LC_NUMERIC (via strtol) 66 "atol", # LC_NUMERIC (via strtol) 67 "atoll", # (via strtoll) 68 "atoq", 69 "btowc", # LC_CTYPE (directly) 70 "ctime", # (via asctime or localtime) 71 "dprintf", # (via vdprintf) 72 "fgetwc", 73 "fgetws", 74 "fold_case", # boost::locale::fold_case 75 "fprintf", # (via vfprintf) 76 "fputwc", 77 "fputws", 78 "fscanf", # (via __vfscanf) 79 "fwprintf", # (via __vfwprintf) 80 "getdate", # via __getdate_r => isspace // __localtime_r 81 "getwc", 82 "getwchar", 83 "is_digit", # boost::algorithm::is_digit 84 "is_space", # boost::algorithm::is_space 85 "isalnum", # LC_CTYPE 86 "isalpha", # LC_CTYPE 87 "isblank", # LC_CTYPE 88 "iscntrl", # LC_CTYPE 89 "isctype", # LC_CTYPE 90 "isdigit", # LC_CTYPE 91 "isgraph", # LC_CTYPE 92 "islower", # LC_CTYPE 93 "isprint", # LC_CTYPE 94 "ispunct", # LC_CTYPE 95 "isspace", # LC_CTYPE 96 "isupper", # LC_CTYPE 97 "iswalnum", # LC_CTYPE 98 "iswalpha", # LC_CTYPE 99 "iswblank", # LC_CTYPE 100 "iswcntrl", # LC_CTYPE 101 "iswctype", # LC_CTYPE 102 "iswdigit", # LC_CTYPE 103 "iswgraph", # LC_CTYPE 104 "iswlower", # LC_CTYPE 105 "iswprint", # LC_CTYPE 106 "iswpunct", # LC_CTYPE 107 "iswspace", # LC_CTYPE 108 "iswupper", # LC_CTYPE 109 "iswxdigit", # LC_CTYPE 110 "isxdigit", # LC_CTYPE 111 "localeconv", # LC_NUMERIC + LC_MONETARY 112 "mblen", # LC_CTYPE 113 "mbrlen", 114 "mbrtowc", 115 "mbsinit", 116 "mbsnrtowcs", 117 "mbsrtowcs", 118 "mbstowcs", # LC_CTYPE 119 "mbtowc", # LC_CTYPE 120 "mktime", 121 "normalize", # boost::locale::normalize 122 "printf", # LC_NUMERIC 123 "putwc", 124 "putwchar", 125 "scanf", # LC_NUMERIC 126 "setlocale", 127 "snprintf", 128 "sprintf", 129 "sscanf", 130 "std::locale::global", 131 "std::to_string", 132 "stod", 133 "stof", 134 "stoi", 135 "stol", 136 "stold", 137 "stoll", 138 "stoul", 139 "stoull", 140 "strcasecmp", 141 "strcasestr", 142 "strcoll", # LC_COLLATE 143 "strerror", 144 "strfmon", 145 "strftime", # LC_TIME 146 "strncasecmp", 147 "strptime", 148 "strtod", # LC_NUMERIC 149 "strtof", 150 "strtoimax", 151 "strtol", # LC_NUMERIC 152 "strtold", 153 "strtoll", 154 "strtoq", 155 "strtoul", # LC_NUMERIC 156 "strtoull", 157 "strtoumax", 158 "strtouq", 159 "strxfrm", # LC_COLLATE 160 "swprintf", 161 "to_lower", # boost::locale::to_lower 162 "to_title", # boost::locale::to_title 163 "to_upper", # boost::locale::to_upper 164 "tolower", # LC_CTYPE 165 "toupper", # LC_CTYPE 166 "towctrans", 167 "towlower", # LC_CTYPE 168 "towupper", # LC_CTYPE 169 "trim", # boost::algorithm::trim 170 "trim_left", # boost::algorithm::trim_left 171 "trim_right", # boost::algorithm::trim_right 172 "ungetwc", 173 "vasprintf", 174 "vdprintf", 175 "versionsort", 176 "vfprintf", 177 "vfscanf", 178 "vfwprintf", 179 "vprintf", 180 "vscanf", 181 "vsnprintf", 182 "vsprintf", 183 "vsscanf", 184 "vswprintf", 185 "vwprintf", 186 "wcrtomb", 187 "wcscasecmp", 188 "wcscoll", # LC_COLLATE 189 "wcsftime", # LC_TIME 190 "wcsncasecmp", 191 "wcsnrtombs", 192 "wcsrtombs", 193 "wcstod", # LC_NUMERIC 194 "wcstof", 195 "wcstoimax", 196 "wcstol", # LC_NUMERIC 197 "wcstold", 198 "wcstoll", 199 "wcstombs", # LC_CTYPE 200 "wcstoul", # LC_NUMERIC 201 "wcstoull", 202 "wcstoumax", 203 "wcswidth", 204 "wcsxfrm", # LC_COLLATE 205 "wctob", 206 "wctomb", # LC_CTYPE 207 "wctrans", 208 "wctype", 209 "wcwidth", 210 "wprintf" 211 ] 212 213 214 def find_locale_dependent_function_uses(): 215 regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS) 216 exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS] 217 git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + ")(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args 218 git_grep_output = list() 219 220 try: 221 git_grep_output = check_output(git_grep_command, text=True, encoding="utf8").splitlines() 222 except CalledProcessError as e: 223 if e.returncode > 1: 224 raise e 225 226 return git_grep_output 227 228 229 def main(): 230 exit_code = 0 231 232 regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS) 233 git_grep_output = find_locale_dependent_function_uses() 234 235 for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS: 236 matches = [line for line in git_grep_output 237 if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line) 238 and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line) 239 and not re.search(regexp_ignore_known_violations, line)] 240 if matches: 241 print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:") 242 for match in matches: 243 print(match) 244 print("") 245 exit_code = 1 246 247 if exit_code == 1: 248 print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n") 249 print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}") 250 251 sys.exit(exit_code) 252 253 254 if __name__ == "__main__": 255 main()