lint-locale-dependence.py
1 #!/usr/bin/env python3 2 # Copyright (c) 2018-present The Bitcoin Core developers 3 # Distributed under the MIT software license, see the accompanying 4 # file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 # 6 # Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt 7 # opts in to POSIX localization by running setlocale(LC_ALL, "") on startup, 8 # whereas no such call is made in bitcoind. 9 # 10 # Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale 11 # specified by the user's LC_ALL (or LC_*) environment variable as the new 12 # C locale. 13 # 14 # In contrast, bitcoind does not opt in to localization -- no call to 15 # setlocale(LC_ALL, "") is made and the environment variables LC_* are 16 # thus ignored. 17 # 18 # This results in situations where bitcoind is guaranteed to be running 19 # with the classic locale ("C") whereas the locale of bitcoin-qt will vary 20 # depending on the user's environment variables. 21 # 22 # An example: Assuming the environment variable LC_ALL=de_DE then the 23 # call std::to_string(1.23) will return "1.230000" in bitcoind but 24 # "1,230000" in bitcoin-qt. 25 # 26 # From the Qt documentation: 27 # "On Unix/Linux Qt is configured to use the system locale settings by default. 28 # This can cause a conflict when using POSIX functions, for instance, when 29 # converting between data types such as floats and strings, since the notation 30 # may differ between locales. To get around this problem, call the POSIX function 31 # setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication 32 # or QCoreApplication to reset the locale that is used for number formatting to 33 # "C"-locale." 34 # 35 # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and 36 # https://stackoverflow.com/a/34878283 for more details. 37 38 import re 39 import sys 40 41 from subprocess import check_output, CalledProcessError 42 43 from lint_ignore_dirs import SHARED_EXCLUDED_SUBTREES 44 45 46 KNOWN_VIOLATIONS = [ 47 "src/dbwrapper.cpp:.*vsnprintf", 48 "src/span.h:.*printf", 49 "src/test/fuzz/locale.cpp:.*setlocale", 50 "src/test/util_tests.cpp:.*strtoll", 51 "src/util/syserror.cpp:.*strerror", # Outside this function use `SysErrorString` 52 ] 53 54 REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [ 55 "src/tinyformat.h", 56 ] + SHARED_EXCLUDED_SUBTREES 57 58 LOCALE_DEPENDENT_FUNCTIONS = [ 59 "alphasort", # LC_COLLATE (via strcoll) 60 "asctime", # LC_TIME (directly) 61 "asprintf", # (via vasprintf) 62 "atof", # LC_NUMERIC (via strtod) 63 "atoi", # LC_NUMERIC (via strtol) 64 "atol", # LC_NUMERIC (via strtol) 65 "atoll", # (via strtoll) 66 "atoq", 67 "btowc", # LC_CTYPE (directly) 68 "ctime", # (via asctime or localtime) 69 "dprintf", # (via vdprintf) 70 "fgetwc", 71 "fgetws", 72 "fold_case", # boost::locale::fold_case 73 "fprintf", # (via vfprintf) 74 "fputwc", 75 "fputws", 76 "fscanf", # (via __vfscanf) 77 "fwprintf", # (via __vfwprintf) 78 "getdate", # via __getdate_r => isspace // __localtime_r 79 "getwc", 80 "getwchar", 81 "is_digit", # boost::algorithm::is_digit 82 "is_space", # boost::algorithm::is_space 83 "isalnum", # LC_CTYPE 84 "isalpha", # LC_CTYPE 85 "isblank", # LC_CTYPE 86 "iscntrl", # LC_CTYPE 87 "isctype", # LC_CTYPE 88 "isdigit", # LC_CTYPE 89 "isgraph", # LC_CTYPE 90 "islower", # LC_CTYPE 91 "isprint", # LC_CTYPE 92 "ispunct", # LC_CTYPE 93 "isspace", # LC_CTYPE 94 "isupper", # LC_CTYPE 95 "iswalnum", # LC_CTYPE 96 "iswalpha", # LC_CTYPE 97 "iswblank", # LC_CTYPE 98 "iswcntrl", # LC_CTYPE 99 "iswctype", # LC_CTYPE 100 "iswdigit", # LC_CTYPE 101 "iswgraph", # LC_CTYPE 102 "iswlower", # LC_CTYPE 103 "iswprint", # LC_CTYPE 104 "iswpunct", # LC_CTYPE 105 "iswspace", # LC_CTYPE 106 "iswupper", # LC_CTYPE 107 "iswxdigit", # LC_CTYPE 108 "isxdigit", # LC_CTYPE 109 "localeconv", # LC_NUMERIC + LC_MONETARY 110 "mblen", # LC_CTYPE 111 "mbrlen", 112 "mbrtowc", 113 "mbsinit", 114 "mbsnrtowcs", 115 "mbsrtowcs", 116 "mbstowcs", # LC_CTYPE 117 "mbtowc", # LC_CTYPE 118 "mktime", 119 "normalize", # boost::locale::normalize 120 "printf", # LC_NUMERIC 121 "putwc", 122 "putwchar", 123 "scanf", # LC_NUMERIC 124 "setlocale", 125 "snprintf", 126 "sprintf", 127 "sscanf", 128 "std::locale::global", 129 "std::to_string", 130 "stod", 131 "stof", 132 "stoi", 133 "stol", 134 "stold", 135 "stoll", 136 "stoul", 137 "stoull", 138 "strcasecmp", 139 "strcasestr", 140 "strcoll", # LC_COLLATE 141 "strerror", 142 "strfmon", 143 "strftime", # LC_TIME 144 "strncasecmp", 145 "strptime", 146 "strtod", # LC_NUMERIC 147 "strtof", 148 "strtoimax", 149 "strtol", # LC_NUMERIC 150 "strtold", 151 "strtoll", 152 "strtoq", 153 "strtoul", # LC_NUMERIC 154 "strtoull", 155 "strtoumax", 156 "strtouq", 157 "strxfrm", # LC_COLLATE 158 "swprintf", 159 "to_lower", # boost::locale::to_lower 160 "to_title", # boost::locale::to_title 161 "to_upper", # boost::locale::to_upper 162 "tolower", # LC_CTYPE 163 "toupper", # LC_CTYPE 164 "towctrans", 165 "towlower", # LC_CTYPE 166 "towupper", # LC_CTYPE 167 "trim", # boost::algorithm::trim 168 "trim_left", # boost::algorithm::trim_left 169 "trim_right", # boost::algorithm::trim_right 170 "ungetwc", 171 "vasprintf", 172 "vdprintf", 173 "versionsort", 174 "vfprintf", 175 "vfscanf", 176 "vfwprintf", 177 "vprintf", 178 "vscanf", 179 "vsnprintf", 180 "vsprintf", 181 "vsscanf", 182 "vswprintf", 183 "vwprintf", 184 "wcrtomb", 185 "wcscasecmp", 186 "wcscoll", # LC_COLLATE 187 "wcsftime", # LC_TIME 188 "wcsncasecmp", 189 "wcsnrtombs", 190 "wcsrtombs", 191 "wcstod", # LC_NUMERIC 192 "wcstof", 193 "wcstoimax", 194 "wcstol", # LC_NUMERIC 195 "wcstold", 196 "wcstoll", 197 "wcstombs", # LC_CTYPE 198 "wcstoul", # LC_NUMERIC 199 "wcstoull", 200 "wcstoumax", 201 "wcswidth", 202 "wcsxfrm", # LC_COLLATE 203 "wctob", 204 "wctomb", # LC_CTYPE 205 "wctrans", 206 "wctype", 207 "wcwidth", 208 "wprintf" 209 ] 210 211 212 def find_locale_dependent_function_uses(): 213 regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS) 214 exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS] 215 git_grep_command = ["git", "grep", "--extended-regexp", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + ")(_r|_s)?\\(", "--", "*.cpp", "*.h"] + exclude_args 216 git_grep_output = list() 217 218 try: 219 git_grep_output = check_output(git_grep_command, text=True).splitlines() 220 except CalledProcessError as e: 221 if e.returncode > 1: 222 raise e 223 224 return git_grep_output 225 226 227 def main(): 228 exit_code = 0 229 230 regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS) 231 git_grep_output = find_locale_dependent_function_uses() 232 233 for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS: 234 matches = [line for line in git_grep_output 235 if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?\\(", line) 236 and not re.search("\\.(c|cpp|h):\\s*//.*" + locale_dependent_function, line) 237 and not re.search(regexp_ignore_known_violations, line)] 238 if matches: 239 print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:") 240 for match in matches: 241 print(match) 242 print("") 243 exit_code = 1 244 245 if exit_code == 1: 246 print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n") 247 print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}") 248 249 sys.exit(exit_code) 250 251 252 if __name__ == "__main__": 253 main()