util_string_tests.cpp
1 // Copyright (c) 2024-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <util/strencodings.h> 6 #include <util/string.h> 7 #include <vector> 8 9 #include <boost/test/unit_test.hpp> 10 #include <test/util/common.h> 11 #include <tinyformat.h> 12 13 using namespace util; 14 using util::detail::CheckNumFormatSpecifiers; 15 16 BOOST_AUTO_TEST_SUITE(util_string_tests) 17 18 template <unsigned NumArgs> 19 void TfmFormatZeroes(const std::string& fmt) 20 { 21 std::apply([&](auto... args) { 22 (void)tfm::format(tfm::RuntimeFormat{fmt}, args...); 23 }, std::array<int, NumArgs>{}); 24 } 25 26 // Helper to allow compile-time sanity checks while providing the number of 27 // args directly. Normally PassFmt<sizeof...(Args)> would be used. 28 template <unsigned NumArgs> 29 void PassFmt(ConstevalFormatString<NumArgs> fmt) 30 { 31 // Execute compile-time check again at run-time to get code coverage stats 32 BOOST_CHECK_NO_THROW(CheckNumFormatSpecifiers<NumArgs>(fmt.fmt)); 33 34 // If ConstevalFormatString didn't throw above, make sure tinyformat doesn't 35 // throw either for the same format string and parameter count combination. 36 // Proves that we have some extent of protection from runtime errors 37 // (tinyformat may still throw for some type mismatches). 38 BOOST_CHECK_NO_THROW(TfmFormatZeroes<NumArgs>(fmt.fmt)); 39 } 40 template <unsigned WrongNumArgs> 41 void FailFmtWithError(const char* wrong_fmt, std::string_view error) 42 { 43 BOOST_CHECK_EXCEPTION(CheckNumFormatSpecifiers<WrongNumArgs>(wrong_fmt), const char*, HasReason{error}); 44 } 45 46 std::vector<std::byte> StringToBuffer(const std::string& str) 47 { 48 auto span = std::as_bytes(std::span(str)); 49 return {span.begin(), span.end()}; 50 } 51 52 BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) 53 { 54 PassFmt<0>(""); 55 PassFmt<0>("%%"); 56 PassFmt<1>("%s"); 57 PassFmt<1>("%c"); 58 PassFmt<0>("%%s"); 59 PassFmt<0>("s%%"); 60 PassFmt<1>("%%%s"); 61 PassFmt<1>("%s%%"); 62 PassFmt<0>(" 1$s"); 63 PassFmt<1>("%1$s"); 64 PassFmt<1>("%1$s%1$s"); 65 PassFmt<2>("%2$s"); 66 PassFmt<2>("%2$s 4$s %2$s"); 67 PassFmt<129>("%129$s 999$s %2$s"); 68 PassFmt<1>("%02d"); 69 PassFmt<1>("%+2s"); 70 PassFmt<1>("%.6i"); 71 PassFmt<1>("%5.2f"); 72 PassFmt<1>("%5.f"); 73 PassFmt<1>("%.f"); 74 PassFmt<1>("%#x"); 75 PassFmt<1>("%1$5i"); 76 PassFmt<1>("%1$-5i"); 77 PassFmt<1>("%1$.5i"); 78 // tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'. 79 PassFmt<1>("%123%"); 80 PassFmt<1>("%123%s"); 81 PassFmt<1>("%_"); 82 PassFmt<1>("%\n"); 83 84 PassFmt<2>("%*c"); 85 PassFmt<2>("%+*c"); 86 PassFmt<2>("%.*f"); 87 PassFmt<3>("%*.*f"); 88 PassFmt<3>("%2$*3$d"); 89 PassFmt<3>("%2$*3$.9d"); 90 PassFmt<3>("%2$.*3$d"); 91 PassFmt<3>("%2$9.*3$d"); 92 PassFmt<3>("%2$+9.*3$d"); 93 PassFmt<4>("%3$*2$.*4$f"); 94 95 // Make sure multiple flag characters "- 0+" are accepted 96 PassFmt<3>("'%- 0+*.*f'"); 97 PassFmt<3>("'%1$- 0+*3$.*2$f'"); 98 99 auto err_mix{"Format specifiers must be all positional or all non-positional!"}; 100 FailFmtWithError<1>("%s%1$s", err_mix); 101 FailFmtWithError<2>("%2$*d", err_mix); 102 FailFmtWithError<2>("%*2$d", err_mix); 103 FailFmtWithError<2>("%.*3$d", err_mix); 104 FailFmtWithError<2>("%2$.*d", err_mix); 105 106 auto err_num{"Format specifier count must match the argument count!"}; 107 FailFmtWithError<1>("", err_num); 108 FailFmtWithError<0>("%s", err_num); 109 FailFmtWithError<2>("%s", err_num); 110 FailFmtWithError<0>("%1$s", err_num); 111 FailFmtWithError<2>("%1$s", err_num); 112 FailFmtWithError<1>("%*c", err_num); 113 114 auto err_0_pos{"Positional format specifier must have position of at least 1"}; 115 FailFmtWithError<1>("%$s", err_0_pos); 116 FailFmtWithError<1>("%$", err_0_pos); 117 FailFmtWithError<0>("%0$", err_0_pos); 118 FailFmtWithError<0>("%0$s", err_0_pos); 119 FailFmtWithError<2>("%2$*$d", err_0_pos); 120 FailFmtWithError<2>("%2$*0$d", err_0_pos); 121 FailFmtWithError<3>("%3$*2$.*$f", err_0_pos); 122 FailFmtWithError<3>("%3$*2$.*0$f", err_0_pos); 123 124 auto err_term{"Format specifier incorrectly terminated by end of string"}; 125 FailFmtWithError<1>("%", err_term); 126 FailFmtWithError<1>("%9", err_term); 127 FailFmtWithError<1>("%9.", err_term); 128 FailFmtWithError<1>("%9.9", err_term); 129 FailFmtWithError<1>("%*", err_term); 130 FailFmtWithError<1>("%+*", err_term); 131 FailFmtWithError<1>("%.*", err_term); 132 FailFmtWithError<1>("%9.*", err_term); 133 FailFmtWithError<1>("%1$", err_term); 134 FailFmtWithError<1>("%1$9", err_term); 135 FailFmtWithError<2>("%1$*2$", err_term); 136 FailFmtWithError<2>("%1$.*2$", err_term); 137 FailFmtWithError<2>("%1$9.*2$", err_term); 138 139 // Non-parity between tinyformat and ConstevalFormatString. 140 // tinyformat throws but ConstevalFormatString does not. 141 BOOST_CHECK_EXCEPTION(tfm::format(ConstevalFormatString<1>{"%n"}, 0), tfm::format_error, 142 HasReason{"tinyformat: %n conversion spec not supported"}); 143 BOOST_CHECK_EXCEPTION(tfm::format(ConstevalFormatString<2>{"%*s"}, "hi", "hi"), tfm::format_error, 144 HasReason{"tinyformat: Cannot convert from argument type to integer for use as variable width or precision"}); 145 BOOST_CHECK_EXCEPTION(tfm::format(ConstevalFormatString<2>{"%.*s"}, "hi", "hi"), tfm::format_error, 146 HasReason{"tinyformat: Cannot convert from argument type to integer for use as variable width or precision"}); 147 148 // Ensure that tinyformat throws if format string contains wrong number 149 // of specifiers. PassFmt relies on this to verify tinyformat successfully 150 // formats the strings, and will need to be updated if tinyformat is changed 151 // not to throw on failure. 152 BOOST_CHECK_EXCEPTION(TfmFormatZeroes<2>("%s"), tfm::format_error, 153 HasReason{"tinyformat: Not enough conversion specifiers in format string"}); 154 BOOST_CHECK_EXCEPTION(TfmFormatZeroes<1>("%s %s"), tfm::format_error, 155 HasReason{"tinyformat: Too many conversion specifiers in format string"}); 156 } 157 158 BOOST_AUTO_TEST_CASE(ascii_case_insensitive_key_equal_test) 159 { 160 AsciiCaseInsensitiveKeyEqual cmp; 161 BOOST_CHECK(!cmp("A", "B")); 162 BOOST_CHECK(!cmp("A", "b")); 163 BOOST_CHECK(!cmp("a", "B")); 164 BOOST_CHECK(!cmp("B", "A")); 165 BOOST_CHECK(!cmp("B", "a")); 166 BOOST_CHECK(!cmp("b", "A")); 167 BOOST_CHECK(!cmp("A", "AA")); 168 BOOST_CHECK(cmp("A-A", "a-a")); 169 BOOST_CHECK(cmp("A", "A")); 170 BOOST_CHECK(cmp("A", "a")); 171 BOOST_CHECK(cmp("a", "a")); 172 BOOST_CHECK(cmp("B", "b")); 173 BOOST_CHECK(cmp("ab", "aB")); 174 BOOST_CHECK(cmp("Ab", "aB")); 175 BOOST_CHECK(cmp("AB", "ab")); 176 177 // Use a character with value > 127 178 // to ensure we don't trigger implicit-integer-sign-change 179 BOOST_CHECK(!cmp("a", "\xe4")); 180 } 181 182 BOOST_AUTO_TEST_CASE(ascii_case_insensitive_hash_test) 183 { 184 AsciiCaseInsensitiveHash hsh; 185 BOOST_CHECK_NE(hsh("A"), hsh("B")); 186 BOOST_CHECK_NE(hsh("AA"), hsh("A")); 187 BOOST_CHECK_EQUAL(hsh("A"), hsh("a")); 188 BOOST_CHECK_EQUAL(hsh("Ab"), hsh("aB")); 189 BOOST_CHECK_EQUAL(hsh("A\xfe"), hsh("a\xfe")); 190 } 191 192 BOOST_AUTO_TEST_CASE(line_reader_test) 193 { 194 { 195 // Check three lines terminated by \n and \r\n, trimming whitespace 196 const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food\n")}; 197 LineReader reader(input, /*max_line_length=*/128); 198 std::optional<std::string> line1{reader.ReadLine()}; 199 BOOST_CHECK_EQUAL(reader.Remaining(), 34); 200 std::optional<std::string> line2{reader.ReadLine()}; 201 BOOST_CHECK_EQUAL(reader.Remaining(), 15); 202 std::optional<std::string> line3{reader.ReadLine()}; 203 std::optional<std::string> line4{reader.ReadLine()}; 204 BOOST_CHECK(line1); 205 BOOST_CHECK(line2); 206 BOOST_CHECK(line3); 207 BOOST_CHECK(!line4); 208 BOOST_CHECK_EQUAL(line1.value(), "once upon a time"); 209 BOOST_CHECK_EQUAL(line2.value(), "there was a dog"); 210 BOOST_CHECK_EQUAL(line3.value(), "who liked food"); 211 } 212 { 213 // Do not exceed max_line_length + 1 while searching for \n 214 // Test with 22-character line + \n + 23-character line + \n 215 const std::vector<std::byte> input{StringToBuffer("once upon a time there\nwas a dog who liked tea\n")}; 216 217 LineReader reader1(input, /*max_line_length=*/22); 218 // First line is exactly the length of max_line_length 219 BOOST_CHECK_EQUAL(reader1.ReadLine(), "once upon a time there"); 220 // Second line is +1 character too long 221 BOOST_CHECK_EXCEPTION(reader1.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); 222 223 // Increase max_line_length by 1 224 LineReader reader2(input, /*max_line_length=*/23); 225 // Both lines fit within limit 226 BOOST_CHECK_EQUAL(reader2.ReadLine(), "once upon a time there"); 227 BOOST_CHECK_EQUAL(reader2.ReadLine(), "was a dog who liked tea"); 228 // End of buffer reached 229 BOOST_CHECK(!reader2.ReadLine()); 230 } 231 { 232 // Empty lines are empty 233 const std::vector<std::byte> input{StringToBuffer("\n")}; 234 LineReader reader(input, /*max_line_length=*/1024); 235 BOOST_CHECK_EQUAL(reader.ReadLine(), ""); 236 BOOST_CHECK(!reader.ReadLine()); 237 } 238 { 239 // Empty buffers are null 240 const std::vector<std::byte> input{StringToBuffer("")}; 241 LineReader reader(input, /*max_line_length=*/1024); 242 BOOST_CHECK(!reader.ReadLine()); 243 } 244 { 245 // Even one character is too long, if it's not \n 246 const std::vector<std::byte> input{StringToBuffer("ab\n")}; 247 LineReader reader(input, /*max_line_length=*/1); 248 // First line is +1 character too long 249 BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); 250 } 251 { 252 const std::vector<std::byte> input{StringToBuffer("a\nb\n")}; 253 LineReader reader(input, /*max_line_length=*/1); 254 BOOST_CHECK_EQUAL(reader.ReadLine(), "a"); 255 BOOST_CHECK_EQUAL(reader.ReadLine(), "b"); 256 BOOST_CHECK(!reader.ReadLine()); 257 } 258 { 259 // If ReadLine fails, the iterator is reset and we can ReadLength instead 260 const std::vector<std::byte> input{StringToBuffer("a\nbaboon\n")}; 261 LineReader reader(input, /*max_line_length=*/1); 262 BOOST_CHECK_EQUAL(reader.ReadLine(), "a"); 263 // "baboon" is too long 264 BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); 265 BOOST_CHECK_EQUAL(reader.ReadLength(1), "b"); 266 BOOST_CHECK_EQUAL(reader.ReadLength(1), "a"); 267 BOOST_CHECK_EQUAL(reader.ReadLength(2), "bo"); 268 // "on" is too long 269 BOOST_CHECK_EXCEPTION(reader.ReadLine(), std::runtime_error, HasReason{"max_line_length exceeded by LineReader"}); 270 BOOST_CHECK_EQUAL(reader.ReadLength(1), "o"); 271 BOOST_CHECK_EQUAL(reader.ReadLine(), "n"); // now the remainder of the buffer fits in one line 272 BOOST_CHECK(!reader.ReadLine()); 273 } 274 { 275 // The end of the buffer (EOB) does not count as end of line \n 276 const std::vector<std::byte> input{StringToBuffer("once upon a time there")}; 277 278 LineReader reader(input, /*max_line_length=*/22); 279 // First line is exactly the length of max_line_length, but that doesn't matter because \n is missing 280 BOOST_CHECK(!reader.ReadLine()); 281 // Data can still be read using ReadLength 282 BOOST_CHECK_EQUAL(reader.ReadLength(22), "once upon a time there"); 283 // End of buffer reached 284 BOOST_CHECK_EQUAL(reader.Remaining(), 0); 285 } 286 { 287 // Read specific number of bytes regardless of max_line_length or \n unless buffer is too short 288 const std::vector<std::byte> input{StringToBuffer("once upon a time\n there was a dog \r\nwho liked food")}; 289 LineReader reader(input, /*max_line_length=*/1); 290 BOOST_CHECK_EQUAL(reader.ReadLength(0), ""); 291 BOOST_CHECK_EQUAL(reader.ReadLength(3), "onc"); 292 BOOST_CHECK_EQUAL(reader.ReadLength(8), "e upon a"); 293 BOOST_CHECK_EQUAL(reader.ReadLength(8), " time\n t"); 294 BOOST_CHECK_EXCEPTION(reader.ReadLength(128), std::runtime_error, HasReason{"Not enough data in buffer"}); 295 // After the error the iterator is reset so we can try again 296 BOOST_CHECK_EQUAL(reader.ReadLength(31), "here was a dog \r\nwho liked food"); 297 // End of buffer reached 298 BOOST_CHECK_EQUAL(reader.Remaining(), 0); 299 } 300 } 301 302 BOOST_AUTO_TEST_SUITE_END()