lint-locale-dependence.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2018-present The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 # 6 # Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt 7 # opts in to POSIX localization by running setlocale(LC_ALL, "") on startup, 8 # whereas no such call is made in bitcoind. 9 # 10 # Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale 11 # specified by the user's LC_ALL (or LC_*) environment variable as the new 12 # C locale. 13 # 14 # In contrast, bitcoind does not opt in to localization -- no call to 15 # setlocale(LC_ALL, "") is made and the environment variables LC_* are 16 # thus ignored. 17 # 18 # This results in situations where bitcoind is guaranteed to be running 19 # with the classic locale ("C") whereas the locale of bitcoin-qt will vary 20 # depending on the user's environment variables. 21 # 22 # An example: Assuming the environment variable LC_ALL=de_DE then the 23 # call std::to_string(1.23) will return "1.230000" in bitcoind but 24 # "1,230000" in bitcoin-qt. 25 # 26 # From the Qt documentation: 27 # "On Unix/Linux Qt is configured to use the system locale settings by default. 28 # This can cause a conflict when using POSIX functions, for instance, when 29 # converting between data types such as floats and strings, since the notation 30 # may differ between locales. To get around this problem, call the POSIX function 31 # setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication 32 # or QCoreApplication to reset the locale that is used for number formatting to 33 # "C"-locale." 34 # 35 # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and 36 # https://stackoverflow.com/a/34878283 for more details. 37 38 import re 39 import sys 40 41 from subprocess import check_output, CalledProcessError 42 43 44 KNOWN_VIOLATIONS = [ 45 "src/dbwrapper.cpp:.*vsnprintf", 46 "src/span.h:.*printf", 47 "src/test/fuzz/locale.cpp:.*setlocale", 48 "src/test/util_tests.cpp:.*strtoll", 49 "src/util/syserror.cpp:.*strerror", # Outside this function use `SysErrorString` 50 ] 51 52 REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [ 53 "src/crypto/ctaes/", 54 "src/ipc/libmultiprocess/", 55 "src/leveldb/", 56 "src/secp256k1/", 57 "src/minisketch/", 58 "src/tinyformat.h", 59 ] 60 61 LOCALE_DEPENDENT_FUNCTIONS = [ 62 "alphasort", # LC_COLLATE (via strcoll) 63 "asctime", # LC_TIME (directly) 64 "asprintf", # (via vasprintf) 65 "atof", # LC_NUMERIC (via strtod) 66 "atoi", # LC_NUMERIC (via strtol) 67 "atol", # LC_NUMERIC (via strtol) 68 "atoll", # (via strtoll) 69 "atoq", 70 "btowc", # LC_CTYPE (directly) 71 "ctime", # (via asctime or localtime) 72 "dprintf", # (via vdprintf) 73 "fgetwc", 74 "fgetws", 75 "fold_case", # boost::locale::fold_case 76 "fprintf", # (via vfprintf) 77 "fputwc", 78 "fputws", 79 "fscanf", # (via __vfscanf) 80 "fwprintf", # (via __vfwprintf) 81 "getdate", # via __getdate_r => isspace // __localtime_r 82 "getwc", 83 "getwchar", 84 "is_digit", # boost::algorithm::is_digit 85 "is_space", # boost::algorithm::is_space 86 "isalnum", # LC_CTYPE 87 "isalpha", # LC_CTYPE 88 "isblank", # LC_CTYPE 89 "iscntrl", # LC_CTYPE 90 "isctype", # LC_CTYPE 91 "isdigit", # LC_CTYPE 92 "isgraph", # LC_CTYPE 93 "islower", # LC_CTYPE 94 "isprint", # LC_CTYPE 95 "ispunct", # LC_CTYPE 96 "isspace", # LC_CTYPE 97 "isupper", # LC_CTYPE 98 "iswalnum", # LC_CTYPE 99 "iswalpha", # LC_CTYPE 100 "iswblank", # LC_CTYPE 101 "iswcntrl", # LC_CTYPE 102 "iswctype", # LC_CTYPE 103 "iswdigit", # LC_CTYPE 104 "iswgraph", # LC_CTYPE 105 "iswlower", # LC_CTYPE 106 "iswprint", # LC_CTYPE 107 "iswpunct", # LC_CTYPE 108 "iswspace", # LC_CTYPE 109 "iswupper", # LC_CTYPE 110 "iswxdigit", # LC_CTYPE 111 "isxdigit", # LC_CTYPE 112 "localeconv", # LC_NUMERIC + LC_MONETARY 113 "mblen", # LC_CTYPE 114 "mbrlen", 115 "mbrtowc", 116 "mbsinit", 117 "mbsnrtowcs", 118 "mbsrtowcs", 119 "mbstowcs", # LC_CTYPE 120 "mbtowc", # LC_CTYPE 121 "mktime", 122 "normalize", # boost::locale::normalize 123 "printf", # LC_NUMERIC 124 "putwc", 125 "putwchar", 126 "scanf", # LC_NUMERIC 127 "setlocale", 128 "snprintf", 129 "sprintf", 130 "sscanf", 131 "std::locale::global", 132 "std::to_string", 133 "stod", 134 "stof", 135 "stoi", 136 "stol", 137 "stold", 138 "stoll", 139 "stoul", 140 "stoull", 141 "strcasecmp", 142 "strcasestr", 143 "strcoll", # LC_COLLATE 144 "strerror", 145 "strfmon", 146 "strftime", # LC_TIME 147 "strncasecmp", 148 "strptime", 149 "strtod", # LC_NUMERIC 150 "strtof", 151 "strtoimax", 152 "strtol", # LC_NUMERIC 153 "strtold", 154 "strtoll", 155 "strtoq", 156 "strtoul", # LC_NUMERIC 157 "strtoull", 158 "strtoumax", 159 "strtouq", 160 "strxfrm", # LC_COLLATE 161 "swprintf", 162 "to_lower", # boost::locale::to_lower 163 "to_title", # boost::locale::to_title 164 "to_upper", # boost::locale::to_upper 165 "tolower", # LC_CTYPE 166 "toupper", # LC_CTYPE 167 "towctrans", 168 "towlower", # LC_CTYPE 169 "towupper", # LC_CTYPE 170 "trim", # boost::algorithm::trim 171 "trim_left", # boost::algorithm::trim_left 172 "trim_right", # boost::algorithm::trim_right 173 "ungetwc", 174 "vasprintf", 175 "vdprintf", 176 "versionsort", 177 "vfprintf", 178 "vfscanf", 179 "vfwprintf", 180 "vprintf", 181 "vscanf", 182 "vsnprintf", 183 "vsprintf", 184 "vsscanf", 185 "vswprintf", 186 "vwprintf", 187 "wcrtomb", 188 "wcscasecmp", 189 "wcscoll", # LC_COLLATE 190 "wcsftime", # LC_TIME 191 "wcsncasecmp", 192 "wcsnrtombs", 193 "wcsrtombs", 194 "wcstod", # LC_NUMERIC 195 "wcstof", 196 "wcstoimax", 197 "wcstol", # LC_NUMERIC 198 "wcstold", 199 "wcstoll", 200 "wcstombs", # LC_CTYPE 201 "wcstoul", # LC_NUMERIC 202 "wcstoull", 203 "wcstoumax", 204 "wcswidth", 205 "wcsxfrm", # LC_COLLATE 206 "wctob", 207 "wctomb", # LC_CTYPE 208 "wctrans", 209 "wctype", 210 "wcwidth", 211 "wprintf" 212 ] 213 214 215 def find_locale_dependent_function_uses(): 216 regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS) 217 exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS] 218 git_grep_command = ["git", "grep", "--extended-regexp", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + ")(_r|_s)?\\(", "--", "*.cpp", "*.h"] + exclude_args 219 git_grep_output = list() 220 221 try: 222 git_grep_output = check_output(git_grep_command, text=True).splitlines() 223 except CalledProcessError as e: 224 if e.returncode > 1: 225 raise e 226 227 return git_grep_output 228 229 230 def main(): 231 exit_code = 0 232 233 regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS) 234 git_grep_output = find_locale_dependent_function_uses() 235 236 for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS: 237 matches = [line for line in git_grep_output 238 if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?\\(", line) 239 and not re.search("\\.(c|cpp|h):\\s*//.*" + locale_dependent_function, line) 240 and not re.search(regexp_ignore_known_violations, line)] 241 if matches: 242 print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:") 243 for match in matches: 244 print(match) 245 print("") 246 exit_code = 1 247 248 if exit_code == 1: 249 print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n") 250 print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}") 251 252 sys.exit(exit_code) 253 254 255 if __name__ == "__main__": 256 main()