/ test / lint / lint-locale-dependence.py
lint-locale-dependence.py
  1  #!/usr/bin/env python3
  2  # Copyright (c) 2018-present The Bitcoin Core developers
  3  # Distributed under the MIT software license, see the accompanying
  4  # file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  #
  6  # Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
  7  # opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
  8  # whereas no such call is made in bitcoind.
  9  #
 10  # Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
 11  # specified by the user's LC_ALL (or LC_*) environment variable as the new
 12  # C locale.
 13  #
 14  # In contrast, bitcoind does not opt in to localization -- no call to
 15  # setlocale(LC_ALL, "") is made and the environment variables LC_* are
 16  # thus ignored.
 17  #
 18  # This results in situations where bitcoind is guaranteed to be running
 19  # with the classic locale ("C") whereas the locale of bitcoin-qt will vary
 20  # depending on the user's environment variables.
 21  #
 22  # An example: Assuming the environment variable LC_ALL=de_DE then the
 23  # call std::to_string(1.23) will return "1.230000" in bitcoind but
 24  # "1,230000" in bitcoin-qt.
 25  #
 26  # From the Qt documentation:
 27  # "On Unix/Linux Qt is configured to use the system locale settings by default.
 28  #  This can cause a conflict when using POSIX functions, for instance, when
 29  #  converting between data types such as floats and strings, since the notation
 30  #  may differ between locales. To get around this problem, call the POSIX function
 31  #  setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
 32  #  or QCoreApplication to reset the locale that is used for number formatting to
 33  #  "C"-locale."
 34  #
 35  # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
 36  # https://stackoverflow.com/a/34878283 for more details.
 37  
 38  import re
 39  import sys
 40  
 41  from subprocess import check_output, CalledProcessError
 42  
 43  
 44  KNOWN_VIOLATIONS = [
 45      "src/dbwrapper.cpp:.*vsnprintf",
 46      "src/span.h:.*printf",
 47      "src/test/fuzz/locale.cpp:.*setlocale",
 48      "src/test/util_tests.cpp:.*strtoll",
 49      "src/util/syserror.cpp:.*strerror",      # Outside this function use `SysErrorString`
 50  ]
 51  
 52  REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
 53      "src/crypto/ctaes/",
 54      "src/ipc/libmultiprocess/",
 55      "src/leveldb/",
 56      "src/secp256k1/",
 57      "src/minisketch/",
 58      "src/tinyformat.h",
 59  ]
 60  
 61  LOCALE_DEPENDENT_FUNCTIONS = [
 62      "alphasort",    # LC_COLLATE (via strcoll)
 63      "asctime",      # LC_TIME (directly)
 64      "asprintf",     # (via vasprintf)
 65      "atof",         # LC_NUMERIC (via strtod)
 66      "atoi",         # LC_NUMERIC (via strtol)
 67      "atol",         # LC_NUMERIC (via strtol)
 68      "atoll",        # (via strtoll)
 69      "atoq",
 70      "btowc",        # LC_CTYPE (directly)
 71      "ctime",        # (via asctime or localtime)
 72      "dprintf",      # (via vdprintf)
 73      "fgetwc",
 74      "fgetws",
 75      "fold_case",    # boost::locale::fold_case
 76      "fprintf",      # (via vfprintf)
 77      "fputwc",
 78      "fputws",
 79      "fscanf",       # (via __vfscanf)
 80      "fwprintf",     # (via __vfwprintf)
 81      "getdate",      # via __getdate_r => isspace // __localtime_r
 82      "getwc",
 83      "getwchar",
 84      "is_digit",     # boost::algorithm::is_digit
 85      "is_space",     # boost::algorithm::is_space
 86      "isalnum",      # LC_CTYPE
 87      "isalpha",      # LC_CTYPE
 88      "isblank",      # LC_CTYPE
 89      "iscntrl",      # LC_CTYPE
 90      "isctype",      # LC_CTYPE
 91      "isdigit",      # LC_CTYPE
 92      "isgraph",      # LC_CTYPE
 93      "islower",      # LC_CTYPE
 94      "isprint",      # LC_CTYPE
 95      "ispunct",      # LC_CTYPE
 96      "isspace",      # LC_CTYPE
 97      "isupper",      # LC_CTYPE
 98      "iswalnum",     # LC_CTYPE
 99      "iswalpha",     # LC_CTYPE
100      "iswblank",     # LC_CTYPE
101      "iswcntrl",     # LC_CTYPE
102      "iswctype",     # LC_CTYPE
103      "iswdigit",     # LC_CTYPE
104      "iswgraph",     # LC_CTYPE
105      "iswlower",     # LC_CTYPE
106      "iswprint",     # LC_CTYPE
107      "iswpunct",     # LC_CTYPE
108      "iswspace",     # LC_CTYPE
109      "iswupper",     # LC_CTYPE
110      "iswxdigit",    # LC_CTYPE
111      "isxdigit",     # LC_CTYPE
112      "localeconv",   # LC_NUMERIC + LC_MONETARY
113      "mblen",        # LC_CTYPE
114      "mbrlen",
115      "mbrtowc",
116      "mbsinit",
117      "mbsnrtowcs",
118      "mbsrtowcs",
119      "mbstowcs",     # LC_CTYPE
120      "mbtowc",       # LC_CTYPE
121      "mktime",
122      "normalize",    # boost::locale::normalize
123      "printf",       # LC_NUMERIC
124      "putwc",
125      "putwchar",
126      "scanf",        # LC_NUMERIC
127      "setlocale",
128      "snprintf",
129      "sprintf",
130      "sscanf",
131      "std::locale::global",
132      "std::to_string",
133      "stod",
134      "stof",
135      "stoi",
136      "stol",
137      "stold",
138      "stoll",
139      "stoul",
140      "stoull",
141      "strcasecmp",
142      "strcasestr",
143      "strcoll",      # LC_COLLATE
144      "strerror",
145      "strfmon",
146      "strftime",     # LC_TIME
147      "strncasecmp",
148      "strptime",
149      "strtod",       # LC_NUMERIC
150      "strtof",
151      "strtoimax",
152      "strtol",       # LC_NUMERIC
153      "strtold",
154      "strtoll",
155      "strtoq",
156      "strtoul",      # LC_NUMERIC
157      "strtoull",
158      "strtoumax",
159      "strtouq",
160      "strxfrm",      # LC_COLLATE
161      "swprintf",
162      "to_lower",     # boost::locale::to_lower
163      "to_title",     # boost::locale::to_title
164      "to_upper",     # boost::locale::to_upper
165      "tolower",      # LC_CTYPE
166      "toupper",      # LC_CTYPE
167      "towctrans",
168      "towlower",     # LC_CTYPE
169      "towupper",     # LC_CTYPE
170      "trim",         # boost::algorithm::trim
171      "trim_left",    # boost::algorithm::trim_left
172      "trim_right",   # boost::algorithm::trim_right
173      "ungetwc",
174      "vasprintf",
175      "vdprintf",
176      "versionsort",
177      "vfprintf",
178      "vfscanf",
179      "vfwprintf",
180      "vprintf",
181      "vscanf",
182      "vsnprintf",
183      "vsprintf",
184      "vsscanf",
185      "vswprintf",
186      "vwprintf",
187      "wcrtomb",
188      "wcscasecmp",
189      "wcscoll",      # LC_COLLATE
190      "wcsftime",     # LC_TIME
191      "wcsncasecmp",
192      "wcsnrtombs",
193      "wcsrtombs",
194      "wcstod",       # LC_NUMERIC
195      "wcstof",
196      "wcstoimax",
197      "wcstol",       # LC_NUMERIC
198      "wcstold",
199      "wcstoll",
200      "wcstombs",     # LC_CTYPE
201      "wcstoul",      # LC_NUMERIC
202      "wcstoull",
203      "wcstoumax",
204      "wcswidth",
205      "wcsxfrm",      # LC_COLLATE
206      "wctob",
207      "wctomb",       # LC_CTYPE
208      "wctrans",
209      "wctype",
210      "wcwidth",
211      "wprintf"
212  ]
213  
214  
215  def find_locale_dependent_function_uses():
216      regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
217      exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
218      git_grep_command = ["git", "grep", "--extended-regexp", "[^a-zA-Z0-9_\\`'\"<>](" +  regexp_locale_dependent_functions + ")(_r|_s)?\\(", "--", "*.cpp", "*.h"] + exclude_args
219      git_grep_output = list()
220  
221      try:
222          git_grep_output = check_output(git_grep_command, text=True).splitlines()
223      except CalledProcessError as e:
224          if e.returncode > 1:
225              raise e
226  
227      return git_grep_output
228  
229  
230  def main():
231      exit_code = 0
232  
233      regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
234      git_grep_output = find_locale_dependent_function_uses()
235  
236      for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
237          matches =  [line for line in git_grep_output
238                      if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?\\(", line)
239                      and not re.search("\\.(c|cpp|h):\\s*//.*" + locale_dependent_function, line)
240                      and not re.search(regexp_ignore_known_violations, line)]
241          if matches:
242              print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
243              for match in matches:
244                  print(match)
245              print("")
246              exit_code = 1
247  
248      if exit_code == 1:
249          print("Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale-dependent functions if possible.\n")
250          print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
251  
252      sys.exit(exit_code)
253  
254  
255  if __name__ == "__main__":
256      main()