/ test / lint / lint-locale-dependence.py
lint-locale-dependence.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2018-2022 The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  #
  6  # Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
  7  # opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
  8  # whereas no such call is made in bitcoind.
  9  #
 10  # Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
 11  # specified by the user's LC_ALL (or LC_*) environment variable as the new
 12  # C locale.
 13  #
 14  # In contrast, bitcoind does not opt in to localization -- no call to
 15  # setlocale(LC_ALL, "") is made and the environment variables LC_* are
 16  # thus ignored.
 17  #
 18  # This results in situations where bitcoind is guaranteed to be running
 19  # with the classic locale ("C") whereas the locale of bitcoin-qt will vary
 20  # depending on the user's environment variables.
 21  #
 22  # An example: Assuming the environment variable LC_ALL=de_DE then the
 23  # call std::to_string(1.23) will return "1.230000" in bitcoind but
 24  # "1,230000" in bitcoin-qt.
 25  #
 26  # From the Qt documentation:
 27  # "On Unix/Linux Qt is configured to use the system locale settings by default.
 28  #  This can cause a conflict when using POSIX functions, for instance, when
 29  #  converting between data types such as floats and strings, since the notation
 30  #  may differ between locales. To get around this problem, call the POSIX function
 31  #  setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
 32  #  or QCoreApplication to reset the locale that is used for number formatting to
 33  #  "C"-locale."
 34  #
 35  # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
 36  # https://stackoverflow.com/a/34878283 for more details.
 37  
 38  import re
 39  import sys
 40  
 41  from subprocess import check_output, CalledProcessError
 42  
 43  
 44  KNOWN_VIOLATIONS = [
 45      "src/dbwrapper.cpp:.*vsnprintf",
 46      "src/test/fuzz/locale.cpp:.*setlocale",
 47      "src/test/util_tests.cpp:.*strtoll",
 48      "src/wallet/bdb.cpp:.*DbEnv::strerror",  # False positive
 49      "src/util/syserror.cpp:.*strerror",      # Outside this function use `SysErrorString`
 50  ]
 51  
 52  REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
 53      "src/crypto/ctaes/",
 54      "src/leveldb/",
 55      "src/secp256k1/",
 56      "src/minisketch/",
 57      "src/tinyformat.h",
 58  ]
 59  
 60  LOCALE_DEPENDENT_FUNCTIONS = [
 61      "alphasort",    # LC_COLLATE (via strcoll)
 62      "asctime",      # LC_TIME (directly)
 63      "asprintf",     # (via vasprintf)
 64      "atof",         # LC_NUMERIC (via strtod)
 65      "atoi",         # LC_NUMERIC (via strtol)
 66      "atol",         # LC_NUMERIC (via strtol)
 67      "atoll",        # (via strtoll)
 68      "atoq",
 69      "btowc",        # LC_CTYPE (directly)
 70      "ctime",        # (via asctime or localtime)
 71      "dprintf",      # (via vdprintf)
 72      "fgetwc",
 73      "fgetws",
 74      "fold_case",    # boost::locale::fold_case
 75      "fprintf",      # (via vfprintf)
 76      "fputwc",
 77      "fputws",
 78      "fscanf",       # (via __vfscanf)
 79      "fwprintf",     # (via __vfwprintf)
 80      "getdate",      # via __getdate_r => isspace // __localtime_r
 81      "getwc",
 82      "getwchar",
 83      "is_digit",     # boost::algorithm::is_digit
 84      "is_space",     # boost::algorithm::is_space
 85      "isalnum",      # LC_CTYPE
 86      "isalpha",      # LC_CTYPE
 87      "isblank",      # LC_CTYPE
 88      "iscntrl",      # LC_CTYPE
 89      "isctype",      # LC_CTYPE
 90      "isdigit",      # LC_CTYPE
 91      "isgraph",      # LC_CTYPE
 92      "islower",      # LC_CTYPE
 93      "isprint",      # LC_CTYPE
 94      "ispunct",      # LC_CTYPE
 95      "isspace",      # LC_CTYPE
 96      "isupper",      # LC_CTYPE
 97      "iswalnum",     # LC_CTYPE
 98      "iswalpha",     # LC_CTYPE
 99      "iswblank",     # LC_CTYPE
100      "iswcntrl",     # LC_CTYPE
101      "iswctype",     # LC_CTYPE
102      "iswdigit",     # LC_CTYPE
103      "iswgraph",     # LC_CTYPE
104      "iswlower",     # LC_CTYPE
105      "iswprint",     # LC_CTYPE
106      "iswpunct",     # LC_CTYPE
107      "iswspace",     # LC_CTYPE
108      "iswupper",     # LC_CTYPE
109      "iswxdigit",    # LC_CTYPE
110      "isxdigit",     # LC_CTYPE
111      "localeconv",   # LC_NUMERIC + LC_MONETARY
112      "mblen",        # LC_CTYPE
113      "mbrlen",
114      "mbrtowc",
115      "mbsinit",
116      "mbsnrtowcs",
117      "mbsrtowcs",
118      "mbstowcs",     # LC_CTYPE
119      "mbtowc",       # LC_CTYPE
120      "mktime",
121      "normalize",    # boost::locale::normalize
122      "printf",       # LC_NUMERIC
123      "putwc",
124      "putwchar",
125      "scanf",        # LC_NUMERIC
126      "setlocale",
127      "snprintf",
128      "sprintf",
129      "sscanf",
130      "std::locale::global",
131      "std::to_string",
132      "stod",
133      "stof",
134      "stoi",
135      "stol",
136      "stold",
137      "stoll",
138      "stoul",
139      "stoull",
140      "strcasecmp",
141      "strcasestr",
142      "strcoll",      # LC_COLLATE
143      "strerror",
144      "strfmon",
145      "strftime",     # LC_TIME
146      "strncasecmp",
147      "strptime",
148      "strtod",       # LC_NUMERIC
149      "strtof",
150      "strtoimax",
151      "strtol",       # LC_NUMERIC
152      "strtold",
153      "strtoll",
154      "strtoq",
155      "strtoul",      # LC_NUMERIC
156      "strtoull",
157      "strtoumax",
158      "strtouq",
159      "strxfrm",      # LC_COLLATE
160      "swprintf",
161      "to_lower",     # boost::locale::to_lower
162      "to_title",     # boost::locale::to_title
163      "to_upper",     # boost::locale::to_upper
164      "tolower",      # LC_CTYPE
165      "toupper",      # LC_CTYPE
166      "towctrans",
167      "towlower",     # LC_CTYPE
168      "towupper",     # LC_CTYPE
169      "trim",         # boost::algorithm::trim
170      "trim_left",    # boost::algorithm::trim_left
171      "trim_right",   # boost::algorithm::trim_right
172      "ungetwc",
173      "vasprintf",
174      "vdprintf",
175      "versionsort",
176      "vfprintf",
177      "vfscanf",
178      "vfwprintf",
179      "vprintf",
180      "vscanf",
181      "vsnprintf",
182      "vsprintf",
183      "vsscanf",
184      "vswprintf",
185      "vwprintf",
186      "wcrtomb",
187      "wcscasecmp",
188      "wcscoll",      # LC_COLLATE
189      "wcsftime",     # LC_TIME
190      "wcsncasecmp",
191      "wcsnrtombs",
192      "wcsrtombs",
193      "wcstod",       # LC_NUMERIC
194      "wcstof",
195      "wcstoimax",
196      "wcstol",       # LC_NUMERIC
197      "wcstold",
198      "wcstoll",
199      "wcstombs",     # LC_CTYPE
200      "wcstoul",      # LC_NUMERIC
201      "wcstoull",
202      "wcstoumax",
203      "wcswidth",
204      "wcsxfrm",      # LC_COLLATE
205      "wctob",
206      "wctomb",       # LC_CTYPE
207      "wctrans",
208      "wctype",
209      "wcwidth",
210      "wprintf"
211  ]
212  
213  
214  def find_locale_dependent_function_uses():
215      regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
216      exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
217      git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" +  regexp_locale_dependent_functions + ")(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
218      git_grep_output = list()
219  
220      try:
221          git_grep_output = check_output(git_grep_command, text=True, encoding="utf8").splitlines()
222      except CalledProcessError as e:
223          if e.returncode > 1:
224              raise e
225  
226      return git_grep_output
227  
228  
229  def main():
230      exit_code = 0
231  
232      regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
233      git_grep_output = find_locale_dependent_function_uses()
234  
235      for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
236          matches =  [line for line in git_grep_output
237                      if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
238                      and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
239                      and not re.search(regexp_ignore_known_violations, line)]
240          if matches:
241              print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
242              for match in matches:
243                  print(match)
244              print("")
245              exit_code = 1
246  
247      if exit_code == 1:
248          print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n")
249          print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
250  
251      sys.exit(exit_code)
252  
253  
254  if __name__ == "__main__":
255      main()