/ src / test / util_string_tests.cpp
util_string_tests.cpp
  1  // Copyright (c) 2024-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #include <util/strencodings.h>
  6  #include <util/string.h>
  7  #include <vector>
  8  
  9  #include <boost/test/unit_test.hpp>
 10  #include <test/util/common.h>
 11  #include <tinyformat.h>
 12  
 13  using namespace util;
 14  using util::detail::CheckNumFormatSpecifiers;
 15  
 16  BOOST_AUTO_TEST_SUITE(util_string_tests)
 17  
 18  template <unsigned NumArgs>
 19  void TfmFormatZeroes(const std::string& fmt)
 20  {
 21      std::apply([&](auto... args) {
 22          (void)tfm::format(tfm::RuntimeFormat{fmt}, args...);
 23      }, std::array<int, NumArgs>{});
 24  }
 25  
 26  // Helper to allow compile-time sanity checks while providing the number of
 27  // args directly. Normally PassFmt<sizeof...(Args)> would be used.
 28  template <unsigned NumArgs>
 29  void PassFmt(ConstevalFormatString<NumArgs> fmt)
 30  {
 31      // Execute compile-time check again at run-time to get code coverage stats
 32      BOOST_CHECK_NO_THROW(CheckNumFormatSpecifiers<NumArgs>(fmt.fmt));
 33  
 34      // If ConstevalFormatString didn't throw above, make sure tinyformat doesn't
 35      // throw either for the same format string and parameter count combination.
 36      // Proves that we have some extent of protection from runtime errors
 37      // (tinyformat may still throw for some type mismatches).
 38      BOOST_CHECK_NO_THROW(TfmFormatZeroes<NumArgs>(fmt.fmt));
 39  }
 40  template <unsigned WrongNumArgs>
 41  void FailFmtWithError(const char* wrong_fmt, std::string_view error)
 42  {
 43      BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers<WrongNumArgs>(wrong_fmt), const char*, HasReason{error});
 44  }
 45  
 46  std::vector<std::byte> StringToBuffer(const std::string& str)
 47  {
 48      auto span = std::as_bytes(std::span(str));
 49      return {span.begin(), span.end()};
 50  }
 51  
 52  BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec)
 53  {
 54      PassFmt<0>("");
 55      PassFmt<0>("%%");
 56      PassFmt<1>("%s");
 57      PassFmt<1>("%c");
 58      PassFmt<0>("%%s");
 59      PassFmt<0>("s%%");
 60      PassFmt<1>("%%%s");
 61      PassFmt<1>("%s%%");
 62      PassFmt<0>(" 1$s");
 63      PassFmt<1>("%1$s");
 64      PassFmt<1>("%1$s%1$s");
 65      PassFmt<2>("%2$s");
 66      PassFmt<2>("%2$s 4$s %2$s");
 67      PassFmt<129>("%129$s 999$s %2$s");
 68      PassFmt<1>("%02d");
 69      PassFmt<1>("%+2s");
 70      PassFmt<1>("%.6i");
 71      PassFmt<1>("%5.2f");
 72      PassFmt<1>("%5.f");
 73      PassFmt<1>("%.f");
 74      PassFmt<1>("%#x");
 75      PassFmt<1>("%1$5i");
 76      PassFmt<1>("%1$-5i");
 77      PassFmt<1>("%1$.5i");
 78      // tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'.
 79      PassFmt<1>("%123%");
 80      PassFmt<1>("%123%s");
 81      PassFmt<1>("%_");
 82      PassFmt<1>("%\n");
 83  
 84      PassFmt<2>("%*c");
 85      PassFmt<2>("%+*c");
 86      PassFmt<2>("%.*f");
 87      PassFmt<3>("%*.*f");
 88      PassFmt<3>("%2$*3$d");
 89      PassFmt<3>("%2$*3$.9d");
 90      PassFmt<3>("%2$.*3$d");
 91      PassFmt<3>("%2$9.*3$d");
 92      PassFmt<3>("%2$+9.*3$d");
 93      PassFmt<4>("%3$*2$.*4$f");
 94  
 95      // Make sure multiple flag characters "- 0+" are accepted
 96      PassFmt<3>("'%- 0+*.*f'");
 97      PassFmt<3>("'%1$- 0+*3$.*2$f'");
 98  
 99      auto err_mix{"Format specifiers must be all positional or all non-positional!"};
100      FailFmtWithError<1>("%s%1$s", err_mix);
101      FailFmtWithError<2>("%2$*d", err_mix);
102      FailFmtWithError<2>("%*2$d", err_mix);
103      FailFmtWithError<2>("%.*3$d", err_mix);
104      FailFmtWithError<2>("%2$.*d", err_mix);
105  
106      auto err_num{"Format specifier count must match the argument count!"};
107      FailFmtWithError<1>("", err_num);
108      FailFmtWithError<0>("%s", err_num);
109      FailFmtWithError<2>("%s", err_num);
110      FailFmtWithError<0>("%1$s", err_num);
111      FailFmtWithError<2>("%1$s", err_num);
112      FailFmtWithError<1>("%*c", err_num);
113  
114      auto err_0_pos{"Positional format specifier must have position of at least 1"};
115      FailFmtWithError<1>("%$s", err_0_pos);
116      FailFmtWithError<1>("%$", err_0_pos);
117      FailFmtWithError<0>("%0$", err_0_pos);
118      FailFmtWithError<0>("%0$s", err_0_pos);
119      FailFmtWithError<2>("%2$*$d", err_0_pos);
120      FailFmtWithError<2>("%2$*0$d", err_0_pos);
121      FailFmtWithError<3>("%3$*2$.*$f", err_0_pos);
122      FailFmtWithError<3>("%3$*2$.*0$f", err_0_pos);
123  
124      auto err_term{"Format specifier incorrectly terminated by end of string"};
125      FailFmtWithError<1>("%", err_term);
126      FailFmtWithError<1>("%9", err_term);
127      FailFmtWithError<1>("%9.", err_term);
128      FailFmtWithError<1>("%9.9", err_term);
129      FailFmtWithError<1>("%*", err_term);
130      FailFmtWithError<1>("%+*", err_term);
131      FailFmtWithError<1>("%.*", err_term);
132      FailFmtWithError<1>("%9.*", err_term);
133      FailFmtWithError<1>("%1$", err_term);
134      FailFmtWithError<1>("%1$9", err_term);
135      FailFmtWithError<2>("%1$*2$", err_term);
136      FailFmtWithError<2>("%1$.*2$", err_term);
137      FailFmtWithError<2>("%1$9.*2$", err_term);
138  
139      // Non-parity between tinyformat and ConstevalFormatString.
140      // tinyformat throws but ConstevalFormatString does not.
141      BOOST_CHECK_EXCEPTION(tfm::format(ConstevalFormatString<1>{"%n"}, 0), tfm::format_error,
142          HasReason{"tinyformat: %n conversion spec not supported"});
143      BOOST_CHECK_EXCEPTION(tfm::format(ConstevalFormatString<2>{"%*s"}, "hi", "hi"), tfm::format_error,
144          HasReason{"tinyformat: Cannot convert from argument type to integer for use as variable width or precision"});
145      BOOST_CHECK_EXCEPTION(tfm::format(ConstevalFormatString<2>{"%.*s"}, "hi", "hi"), tfm::format_error,
146          HasReason{"tinyformat: Cannot convert from argument type to integer for use as variable width or precision"});
147  
148      // Ensure that tinyformat throws if format string contains wrong number
149      // of specifiers. PassFmt relies on this to verify tinyformat successfully
150      // formats the strings, and will need to be updated if tinyformat is changed
151      // not to throw on failure.
152      BOOST_CHECK_EXCEPTION(TfmFormatZeroes<2>("%s"), tfm::format_error,
153          HasReason{"tinyformat: Not enough conversion specifiers in format string"});
154      BOOST_CHECK_EXCEPTION(TfmFormatZeroes<1>("%s %s"), tfm::format_error,
155          HasReason{"tinyformat: Too many conversion specifiers in format string"});
156  }
157  
158  BOOST_AUTO_TEST_CASE(ascii_case_insensitive_key_equal_test)
159  {
160      AsciiCaseInsensitiveKeyEqual cmp;
161      BOOST_CHECK(!cmp("A", "B"));
162      BOOST_CHECK(!cmp("A", "b"));
163      BOOST_CHECK(!cmp("a", "B"));
164      BOOST_CHECK(!cmp("B", "A"));
165      BOOST_CHECK(!cmp("B", "a"));
166      BOOST_CHECK(!cmp("b", "A"));
167      BOOST_CHECK(!cmp("A", "AA"));
168      BOOST_CHECK(cmp("A-A", "a-a"));
169      BOOST_CHECK(cmp("A", "A"));
170      BOOST_CHECK(cmp("A", "a"));
171      BOOST_CHECK(cmp("a", "a"));
172      BOOST_CHECK(cmp("B", "b"));
173      BOOST_CHECK(cmp("ab", "aB"));
174      BOOST_CHECK(cmp("Ab", "aB"));
175      BOOST_CHECK(cmp("AB", "ab"));
176  
177      // Use a character with value > 127
178      // to ensure we don't trigger implicit-integer-sign-change
179      BOOST_CHECK(!cmp("a", "\xe4"));
180  }
181  
182  BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test)
183  {
184      AsciiCaseInsensitiveHash hsh;
185      BOOST_CHECK_NE(hsh("A"), hsh("B"));
186      BOOST_CHECK_NE(hsh("AA"), hsh("A"));
187      BOOST_CHECK_EQUAL(hsh("A"), hsh("a"));
188      BOOST_CHECK_EQUAL(hsh("Ab"), hsh("aB"));
189      BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe"));
190  }
191  
192  BOOST_AUTO_TEST_CASE(line_reader_test)
193  {
194      {
195          // Check three lines terminated by \n and \r\n, trimming whitespace
196          const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")};
197          LineReader reader(input, /*max_line_length=*/128);
198          std::optional<std::string> line1{reader.ReadLine()};
199          BOOST_CHECK_EQUAL(reader.Remaining(), 34);
200          std::optional<std::string> line2{reader.ReadLine()};
201          BOOST_CHECK_EQUAL(reader.Remaining(), 15);
202          std::optional<std::string> line3{reader.ReadLine()};
203          std::optional<std::string> line4{reader.ReadLine()};
204          BOOST_CHECK(line1);
205          BOOST_CHECK(line2);
206          BOOST_CHECK(line3);
207          BOOST_CHECK(!line4);
208          BOOST_CHECK_EQUAL(line1.value(), "once upon a time");
209          BOOST_CHECK_EQUAL(line2.value(), "there was a dog");
210          BOOST_CHECK_EQUAL(line3.value(), "who liked food");
211      }
212      {
213          // Do not exceed max_line_length + 1 while searching for \n
214          // Test with 22-character line + \n + 23-character line + \n
215          const std::vector<std::byte> input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")};
216  
217          LineReader reader1(input, /*max_line_length=*/22);
218          // First line is exactly the length of max_line_length
219          BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there");
220          // Second line is +1 character too long
221          BOOST_CHECK_EXCEPTION(reader1.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
222  
223          // Increase max_line_length by 1
224          LineReader reader2(input, /*max_line_length=*/23);
225          // Both lines fit within limit
226          BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there");
227          BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea");
228          // End of buffer reached
229          BOOST_CHECK(!reader2.ReadLine());
230      }
231      {
232          // Empty lines are empty
233          const std::vector<std::byte> input{StringToBuffer("\n")};
234          LineReader reader(input, /*max_line_length=*/1024);
235          BOOST_CHECK_EQUAL(reader.ReadLine(), "");
236          BOOST_CHECK(!reader.ReadLine());
237      }
238      {
239          // Empty buffers are null
240          const std::vector<std::byte> input{StringToBuffer("")};
241          LineReader reader(input, /*max_line_length=*/1024);
242          BOOST_CHECK(!reader.ReadLine());
243      }
244      {
245          // Even one character is too long, if it's not \n
246          const std::vector<std::byte> input{StringToBuffer("ab\n")};
247          LineReader reader(input, /*max_line_length=*/1);
248          // First line is +1 character too long
249          BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
250      }
251      {
252          const std::vector<std::byte> input{StringToBuffer("a\nb\n")};
253          LineReader reader(input, /*max_line_length=*/1);
254          BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
255          BOOST_CHECK_EQUAL(reader.ReadLine(), "b");
256          BOOST_CHECK(!reader.ReadLine());
257      }
258      {
259          // If ReadLine fails, the iterator is reset and we can ReadLength instead
260          const std::vector<std::byte> input{StringToBuffer("a\nbaboon\n")};
261          LineReader reader(input, /*max_line_length=*/1);
262          BOOST_CHECK_EQUAL(reader.ReadLine(), "a");
263          // "baboon" is too long
264          BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
265          BOOST_CHECK_EQUAL(reader.ReadLength(1), "b");
266          BOOST_CHECK_EQUAL(reader.ReadLength(1), "a");
267          BOOST_CHECK_EQUAL(reader.ReadLength(2), "bo");
268          // "on" is too long
269          BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"});
270          BOOST_CHECK_EQUAL(reader.ReadLength(1), "o");
271          BOOST_CHECK_EQUAL(reader.ReadLine(), "n"); // now the remainder of the buffer fits in one line
272          BOOST_CHECK(!reader.ReadLine());
273      }
274      {
275          // The end of the buffer (EOB) does not count as end of line \n
276          const std::vector<std::byte> input{StringToBuffer("once upon a time there")};
277  
278          LineReader reader(input, /*max_line_length=*/22);
279          // First line is exactly the length of max_line_length, but that doesn't matter because \n is missing
280          BOOST_CHECK(!reader.ReadLine());
281          // Data can still be read using ReadLength
282          BOOST_CHECK_EQUAL(reader.ReadLength(22), "once upon a time there");
283          // End of buffer reached
284          BOOST_CHECK_EQUAL(reader.Remaining(), 0);
285      }
286      {
287          // Read specific number of bytes regardless of max_line_length or \n unless buffer is too short
288          const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")};
289          LineReader reader(input, /*max_line_length=*/1);
290          BOOST_CHECK_EQUAL(reader.ReadLength(0), "");
291          BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc");
292          BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a");
293          BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t");
294          BOOST_CHECK_EXCEPTION(reader.ReadLength(128), std::runtime_error, HasReason{"Not enough data in buffer"});
295          // After the error the iterator is reset so we can try again
296          BOOST_CHECK_EQUAL(reader.ReadLength(31), "here was a dog \r\nwho liked food");
297          // End of buffer reached
298          BOOST_CHECK_EQUAL(reader.Remaining(), 0);
299      }
300  }
301  
302  BOOST_AUTO_TEST_SUITE_END()