/ test / lint / lint-locale-dependence.py
lint-locale-dependence.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2018-present The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  #
  6  # Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
  7  # opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
  8  # whereas no such call is made in bitcoind.
  9  #
 10  # Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
 11  # specified by the user's LC_ALL (or LC_*) environment variable as the new
 12  # C locale.
 13  #
 14  # In contrast, bitcoind does not opt in to localization -- no call to
 15  # setlocale(LC_ALL, "") is made and the environment variables LC_* are
 16  # thus ignored.
 17  #
 18  # This results in situations where bitcoind is guaranteed to be running
 19  # with the classic locale ("C") whereas the locale of bitcoin-qt will vary
 20  # depending on the user's environment variables.
 21  #
 22  # An example: Assuming the environment variable LC_ALL=de_DE then the
 23  # call std::to_string(1.23) will return "1.230000" in bitcoind but
 24  # "1,230000" in bitcoin-qt.
 25  #
 26  # From the Qt documentation:
 27  # "On Unix/Linux Qt is configured to use the system locale settings by default.
 28  #  This can cause a conflict when using POSIX functions, for instance, when
 29  #  converting between data types such as floats and strings, since the notation
 30  #  may differ between locales. To get around this problem, call the POSIX function
 31  #  setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
 32  #  or QCoreApplication to reset the locale that is used for number formatting to
 33  #  "C"-locale."
 34  #
 35  # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
 36  # https://stackoverflow.com/a/34878283 for more details.
 37  
 38  import re
 39  import sys
 40  
 41  from subprocess import check_output, CalledProcessError
 42  
 43  from lint_ignore_dirs import SHARED_EXCLUDED_SUBTREES
 44  
 45  
 46  KNOWN_VIOLATIONS = [
 47      "src/dbwrapper.cpp:.*vsnprintf",
 48      "src/span.h:.*printf",
 49      "src/test/fuzz/locale.cpp:.*setlocale",
 50      "src/test/util_tests.cpp:.*strtoll",
 51      "src/util/syserror.cpp:.*strerror",      # Outside this function use `SysErrorString`
 52  ]
 53  
 54  REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
 55      "src/tinyformat.h",
 56  ] + SHARED_EXCLUDED_SUBTREES
 57  
 58  LOCALE_DEPENDENT_FUNCTIONS = [
 59      "alphasort",    # LC_COLLATE (via strcoll)
 60      "asctime",      # LC_TIME (directly)
 61      "asprintf",     # (via vasprintf)
 62      "atof",         # LC_NUMERIC (via strtod)
 63      "atoi",         # LC_NUMERIC (via strtol)
 64      "atol",         # LC_NUMERIC (via strtol)
 65      "atoll",        # (via strtoll)
 66      "atoq",
 67      "btowc",        # LC_CTYPE (directly)
 68      "ctime",        # (via asctime or localtime)
 69      "dprintf",      # (via vdprintf)
 70      "fgetwc",
 71      "fgetws",
 72      "fold_case",    # boost::locale::fold_case
 73      "fprintf",      # (via vfprintf)
 74      "fputwc",
 75      "fputws",
 76      "fscanf",       # (via __vfscanf)
 77      "fwprintf",     # (via __vfwprintf)
 78      "getdate",      # via __getdate_r => isspace // __localtime_r
 79      "getwc",
 80      "getwchar",
 81      "is_digit",     # boost::algorithm::is_digit
 82      "is_space",     # boost::algorithm::is_space
 83      "isalnum",      # LC_CTYPE
 84      "isalpha",      # LC_CTYPE
 85      "isblank",      # LC_CTYPE
 86      "iscntrl",      # LC_CTYPE
 87      "isctype",      # LC_CTYPE
 88      "isdigit",      # LC_CTYPE
 89      "isgraph",      # LC_CTYPE
 90      "islower",      # LC_CTYPE
 91      "isprint",      # LC_CTYPE
 92      "ispunct",      # LC_CTYPE
 93      "isspace",      # LC_CTYPE
 94      "isupper",      # LC_CTYPE
 95      "iswalnum",     # LC_CTYPE
 96      "iswalpha",     # LC_CTYPE
 97      "iswblank",     # LC_CTYPE
 98      "iswcntrl",     # LC_CTYPE
 99      "iswctype",     # LC_CTYPE
100      "iswdigit",     # LC_CTYPE
101      "iswgraph",     # LC_CTYPE
102      "iswlower",     # LC_CTYPE
103      "iswprint",     # LC_CTYPE
104      "iswpunct",     # LC_CTYPE
105      "iswspace",     # LC_CTYPE
106      "iswupper",     # LC_CTYPE
107      "iswxdigit",    # LC_CTYPE
108      "isxdigit",     # LC_CTYPE
109      "localeconv",   # LC_NUMERIC + LC_MONETARY
110      "mblen",        # LC_CTYPE
111      "mbrlen",
112      "mbrtowc",
113      "mbsinit",
114      "mbsnrtowcs",
115      "mbsrtowcs",
116      "mbstowcs",     # LC_CTYPE
117      "mbtowc",       # LC_CTYPE
118      "mktime",
119      "normalize",    # boost::locale::normalize
120      "printf",       # LC_NUMERIC
121      "putwc",
122      "putwchar",
123      "scanf",        # LC_NUMERIC
124      "setlocale",
125      "snprintf",
126      "sprintf",
127      "sscanf",
128      "std::locale::global",
129      "std::to_string",
130      "stod",
131      "stof",
132      "stoi",
133      "stol",
134      "stold",
135      "stoll",
136      "stoul",
137      "stoull",
138      "strcasecmp",
139      "strcasestr",
140      "strcoll",      # LC_COLLATE
141      "strerror",
142      "strfmon",
143      "strftime",     # LC_TIME
144      "strncasecmp",
145      "strptime",
146      "strtod",       # LC_NUMERIC
147      "strtof",
148      "strtoimax",
149      "strtol",       # LC_NUMERIC
150      "strtold",
151      "strtoll",
152      "strtoq",
153      "strtoul",      # LC_NUMERIC
154      "strtoull",
155      "strtoumax",
156      "strtouq",
157      "strxfrm",      # LC_COLLATE
158      "swprintf",
159      "to_lower",     # boost::locale::to_lower
160      "to_title",     # boost::locale::to_title
161      "to_upper",     # boost::locale::to_upper
162      "tolower",      # LC_CTYPE
163      "toupper",      # LC_CTYPE
164      "towctrans",
165      "towlower",     # LC_CTYPE
166      "towupper",     # LC_CTYPE
167      "trim",         # boost::algorithm::trim
168      "trim_left",    # boost::algorithm::trim_left
169      "trim_right",   # boost::algorithm::trim_right
170      "ungetwc",
171      "vasprintf",
172      "vdprintf",
173      "versionsort",
174      "vfprintf",
175      "vfscanf",
176      "vfwprintf",
177      "vprintf",
178      "vscanf",
179      "vsnprintf",
180      "vsprintf",
181      "vsscanf",
182      "vswprintf",
183      "vwprintf",
184      "wcrtomb",
185      "wcscasecmp",
186      "wcscoll",      # LC_COLLATE
187      "wcsftime",     # LC_TIME
188      "wcsncasecmp",
189      "wcsnrtombs",
190      "wcsrtombs",
191      "wcstod",       # LC_NUMERIC
192      "wcstof",
193      "wcstoimax",
194      "wcstol",       # LC_NUMERIC
195      "wcstold",
196      "wcstoll",
197      "wcstombs",     # LC_CTYPE
198      "wcstoul",      # LC_NUMERIC
199      "wcstoull",
200      "wcstoumax",
201      "wcswidth",
202      "wcsxfrm",      # LC_COLLATE
203      "wctob",
204      "wctomb",       # LC_CTYPE
205      "wctrans",
206      "wctype",
207      "wcwidth",
208      "wprintf"
209  ]
210  
211  
212  def find_locale_dependent_function_uses():
213      regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
214      exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
215      git_grep_command = ["git", "grep", "--extended-regexp", "[^a-zA-Z0-9_\\`'\"<>](" +  regexp_locale_dependent_functions + ")(_r|_s)?\\(", "--", "*.cpp", "*.h"] + exclude_args
216      git_grep_output = list()
217  
218      try:
219          git_grep_output = check_output(git_grep_command, text=True).splitlines()
220      except CalledProcessError as e:
221          if e.returncode > 1:
222              raise e
223  
224      return git_grep_output
225  
226  
227  def main():
228      exit_code = 0
229  
230      regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
231      git_grep_output = find_locale_dependent_function_uses()
232  
233      for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
234          matches =  [line for line in git_grep_output
235                      if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?\\(", line)
236                      and not re.search("\\.(c|cpp|h):\\s*//.*" + locale_dependent_function, line)
237                      and not re.search(regexp_ignore_known_violations, line)]
238          if matches:
239              print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
240              for match in matches:
241                  print(match)
242              print("")
243              exit_code = 1
244  
245      if exit_code == 1:
246          print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n")
247          print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
248  
249      sys.exit(exit_code)
250  
251  
252  if __name__ == "__main__":
253      main()