strencodings.cpp
1 // Copyright (c) 2009-2010 Satoshi Nakamoto 2 // Copyright (c) 2009-2022 The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #include <span.h> 7 #include <util/strencodings.h> 8 9 #include <array> 10 #include <cassert> 11 #include <cstring> 12 #include <limits> 13 #include <optional> 14 #include <ostream> 15 #include <string> 16 #include <vector> 17 18 static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 19 20 static const std::string SAFE_CHARS[] = 21 { 22 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT 23 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT 24 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME 25 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI 26 }; 27 28 std::string SanitizeString(std::string_view str, int rule) 29 { 30 std::string result; 31 for (char c : str) { 32 if (SAFE_CHARS[rule].find(c) != std::string::npos) { 33 result.push_back(c); 34 } 35 } 36 return result; 37 } 38 39 const signed char p_util_hexdigit[256] = 40 { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 43 0,1,2,3,4,5,6,7,8,9,-1,-1,-1,-1,-1,-1, 44 -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, 45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 46 -1,0xa,0xb,0xc,0xd,0xe,0xf,-1,-1,-1,-1,-1,-1,-1,-1,-1, 47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, }; 56 57 signed char HexDigit(char c) 58 { 59 return p_util_hexdigit[(unsigned char)c]; 60 } 61 62 bool IsHex(std::string_view str) 63 { 64 for (char c : str) { 65 if (HexDigit(c) < 0) return false; 66 } 67 return (str.size() > 0) && (str.size()%2 == 0); 68 } 69 70 bool IsHexNumber(std::string_view str) 71 { 72 if (str.substr(0, 2) == "0x") str.remove_prefix(2); 73 for (char c : str) { 74 if (HexDigit(c) < 0) return false; 75 } 76 // Return false for empty string or "0x". 77 return str.size() > 0; 78 } 79 80 template <typename Byte> 81 std::optional<std::vector<Byte>> TryParseHex(std::string_view str) 82 { 83 std::vector<Byte> vch; 84 vch.reserve(str.size() / 2); // two hex characters form a single byte 85 86 auto it = str.begin(); 87 while (it != str.end()) { 88 if (IsSpace(*it)) { 89 ++it; 90 continue; 91 } 92 auto c1 = HexDigit(*(it++)); 93 if (it == str.end()) return std::nullopt; 94 auto c2 = HexDigit(*(it++)); 95 if (c1 < 0 || c2 < 0) return std::nullopt; 96 vch.push_back(Byte(c1 << 4) | Byte(c2)); 97 } 98 return vch; 99 } 100 template std::optional<std::vector<std::byte>> TryParseHex(std::string_view); 101 template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view); 102 103 bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut) 104 { 105 bool valid = false; 106 size_t colon = in.find_last_of(':'); 107 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator 108 bool fHaveColon = colon != in.npos; 109 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe 110 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)}; 111 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) { 112 uint16_t n; 113 if (ParseUInt16(in.substr(colon + 1), &n)) { 114 in = in.substr(0, colon); 115 portOut = n; 116 valid = (portOut != 0); 117 } 118 } else { 119 valid = true; 120 } 121 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') { 122 hostOut = in.substr(1, in.size() - 2); 123 } else { 124 hostOut = in; 125 } 126 127 return valid; 128 } 129 130 std::string EncodeBase64(Span<const unsigned char> input) 131 { 132 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 133 134 std::string str; 135 str.reserve(((input.size() + 2) / 3) * 4); 136 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end()); 137 while (str.size() % 4) str += '='; 138 return str; 139 } 140 141 std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str) 142 { 143 static const int8_t decode64_table[256]{ 144 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 145 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 146 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, 147 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 148 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 149 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 150 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 151 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 152 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 153 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 154 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 155 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 156 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 157 }; 158 159 if (str.size() % 4 != 0) return {}; 160 /* One or two = characters at the end are permitted. */ 161 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 162 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 163 164 std::vector<unsigned char> ret; 165 ret.reserve((str.size() * 3) / 4); 166 bool valid = ConvertBits<6, 8, false>( 167 [&](unsigned char c) { ret.push_back(c); }, 168 str.begin(), str.end(), 169 [](char c) { return decode64_table[uint8_t(c)]; } 170 ); 171 if (!valid) return {}; 172 173 return ret; 174 } 175 176 std::string EncodeBase32(Span<const unsigned char> input, bool pad) 177 { 178 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; 179 180 std::string str; 181 str.reserve(((input.size() + 4) / 5) * 8); 182 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end()); 183 if (pad) { 184 while (str.size() % 8) { 185 str += '='; 186 } 187 } 188 return str; 189 } 190 191 std::string EncodeBase32(std::string_view str, bool pad) 192 { 193 return EncodeBase32(MakeUCharSpan(str), pad); 194 } 195 196 std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str) 197 { 198 static const int8_t decode32_table[256]{ 199 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 200 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 201 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, 202 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 203 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 204 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 205 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 206 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 207 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 208 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 209 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 210 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 211 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 212 }; 213 214 if (str.size() % 8 != 0) return {}; 215 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */ 216 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 217 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); 218 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 219 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); 220 221 std::vector<unsigned char> ret; 222 ret.reserve((str.size() * 5) / 8); 223 bool valid = ConvertBits<5, 8, false>( 224 [&](unsigned char c) { ret.push_back(c); }, 225 str.begin(), str.end(), 226 [](char c) { return decode32_table[uint8_t(c)]; } 227 ); 228 229 if (!valid) return {}; 230 231 return ret; 232 } 233 234 namespace { 235 template <typename T> 236 bool ParseIntegral(std::string_view str, T* out) 237 { 238 static_assert(std::is_integral<T>::value); 239 // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when 240 // handling leading +/- for backwards compatibility. 241 if (str.length() >= 2 && str[0] == '+' && str[1] == '-') { 242 return false; 243 } 244 const std::optional<T> opt_int = ToIntegral<T>((!str.empty() && str[0] == '+') ? str.substr(1) : str); 245 if (!opt_int) { 246 return false; 247 } 248 if (out != nullptr) { 249 *out = *opt_int; 250 } 251 return true; 252 } 253 }; // namespace 254 255 bool ParseInt32(std::string_view str, int32_t* out) 256 { 257 return ParseIntegral<int32_t>(str, out); 258 } 259 260 bool ParseInt64(std::string_view str, int64_t* out) 261 { 262 return ParseIntegral<int64_t>(str, out); 263 } 264 265 bool ParseUInt8(std::string_view str, uint8_t* out) 266 { 267 return ParseIntegral<uint8_t>(str, out); 268 } 269 270 bool ParseUInt16(std::string_view str, uint16_t* out) 271 { 272 return ParseIntegral<uint16_t>(str, out); 273 } 274 275 bool ParseUInt32(std::string_view str, uint32_t* out) 276 { 277 return ParseIntegral<uint32_t>(str, out); 278 } 279 280 bool ParseUInt64(std::string_view str, uint64_t* out) 281 { 282 return ParseIntegral<uint64_t>(str, out); 283 } 284 285 std::string FormatParagraph(std::string_view in, size_t width, size_t indent) 286 { 287 assert(width >= indent); 288 std::stringstream out; 289 size_t ptr = 0; 290 size_t indented = 0; 291 while (ptr < in.size()) 292 { 293 size_t lineend = in.find_first_of('\n', ptr); 294 if (lineend == std::string::npos) { 295 lineend = in.size(); 296 } 297 const size_t linelen = lineend - ptr; 298 const size_t rem_width = width - indented; 299 if (linelen <= rem_width) { 300 out << in.substr(ptr, linelen + 1); 301 ptr = lineend + 1; 302 indented = 0; 303 } else { 304 size_t finalspace = in.find_last_of(" \n", ptr + rem_width); 305 if (finalspace == std::string::npos || finalspace < ptr) { 306 // No place to break; just include the entire word and move on 307 finalspace = in.find_first_of("\n ", ptr); 308 if (finalspace == std::string::npos) { 309 // End of the string, just add it and break 310 out << in.substr(ptr); 311 break; 312 } 313 } 314 out << in.substr(ptr, finalspace - ptr) << "\n"; 315 if (in[finalspace] == '\n') { 316 indented = 0; 317 } else if (indent) { 318 out << std::string(indent, ' '); 319 indented = indent; 320 } 321 ptr = finalspace + 1; 322 } 323 } 324 return out.str(); 325 } 326 327 /** Upper bound for mantissa. 328 * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. 329 * Larger integers cannot consist of arbitrary combinations of 0-9: 330 * 331 * 999999999999999999 1^18-1 332 * 9223372036854775807 (1<<63)-1 (max int64_t) 333 * 9999999999999999999 1^19-1 (would overflow) 334 */ 335 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL; 336 337 /** Helper function for ParseFixedPoint */ 338 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros) 339 { 340 if(ch == '0') 341 ++mantissa_tzeros; 342 else { 343 for (int i=0; i<=mantissa_tzeros; ++i) { 344 if (mantissa > (UPPER_BOUND / 10LL)) 345 return false; /* overflow */ 346 mantissa *= 10; 347 } 348 mantissa += ch - '0'; 349 mantissa_tzeros = 0; 350 } 351 return true; 352 } 353 354 bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out) 355 { 356 int64_t mantissa = 0; 357 int64_t exponent = 0; 358 int mantissa_tzeros = 0; 359 bool mantissa_sign = false; 360 bool exponent_sign = false; 361 int ptr = 0; 362 int end = val.size(); 363 int point_ofs = 0; 364 365 if (ptr < end && val[ptr] == '-') { 366 mantissa_sign = true; 367 ++ptr; 368 } 369 if (ptr < end) 370 { 371 if (val[ptr] == '0') { 372 /* pass single 0 */ 373 ++ptr; 374 } else if (val[ptr] >= '1' && val[ptr] <= '9') { 375 while (ptr < end && IsDigit(val[ptr])) { 376 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) 377 return false; /* overflow */ 378 ++ptr; 379 } 380 } else return false; /* missing expected digit */ 381 } else return false; /* empty string or loose '-' */ 382 if (ptr < end && val[ptr] == '.') 383 { 384 ++ptr; 385 if (ptr < end && IsDigit(val[ptr])) 386 { 387 while (ptr < end && IsDigit(val[ptr])) { 388 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) 389 return false; /* overflow */ 390 ++ptr; 391 ++point_ofs; 392 } 393 } else return false; /* missing expected digit */ 394 } 395 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E')) 396 { 397 ++ptr; 398 if (ptr < end && val[ptr] == '+') 399 ++ptr; 400 else if (ptr < end && val[ptr] == '-') { 401 exponent_sign = true; 402 ++ptr; 403 } 404 if (ptr < end && IsDigit(val[ptr])) { 405 while (ptr < end && IsDigit(val[ptr])) { 406 if (exponent > (UPPER_BOUND / 10LL)) 407 return false; /* overflow */ 408 exponent = exponent * 10 + val[ptr] - '0'; 409 ++ptr; 410 } 411 } else return false; /* missing expected digit */ 412 } 413 if (ptr != end) 414 return false; /* trailing garbage */ 415 416 /* finalize exponent */ 417 if (exponent_sign) 418 exponent = -exponent; 419 exponent = exponent - point_ofs + mantissa_tzeros; 420 421 /* finalize mantissa */ 422 if (mantissa_sign) 423 mantissa = -mantissa; 424 425 /* convert to one 64-bit fixed-point value */ 426 exponent += decimals; 427 if (exponent < 0) 428 return false; /* cannot represent values smaller than 10^-decimals */ 429 if (exponent >= 18) 430 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */ 431 432 for (int i=0; i < exponent; ++i) { 433 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL)) 434 return false; /* overflow */ 435 mantissa *= 10; 436 } 437 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND) 438 return false; /* overflow */ 439 440 if (amount_out) 441 *amount_out = mantissa; 442 443 return true; 444 } 445 446 std::string ToLower(std::string_view str) 447 { 448 std::string r; 449 r.reserve(str.size()); 450 for (auto ch : str) r += ToLower(ch); 451 return r; 452 } 453 454 std::string ToUpper(std::string_view str) 455 { 456 std::string r; 457 r.reserve(str.size()); 458 for (auto ch : str) r += ToUpper(ch); 459 return r; 460 } 461 462 std::string Capitalize(std::string str) 463 { 464 if (str.empty()) return str; 465 str[0] = ToUpper(str.front()); 466 return str; 467 } 468 469 namespace { 470 471 using ByteAsHex = std::array<char, 2>; 472 473 constexpr std::array<ByteAsHex, 256> CreateByteToHexMap() 474 { 475 constexpr char hexmap[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; 476 477 std::array<ByteAsHex, 256> byte_to_hex{}; 478 for (size_t i = 0; i < byte_to_hex.size(); ++i) { 479 byte_to_hex[i][0] = hexmap[i >> 4]; 480 byte_to_hex[i][1] = hexmap[i & 15]; 481 } 482 return byte_to_hex; 483 } 484 485 } // namespace 486 487 std::string HexStr(const Span<const uint8_t> s) 488 { 489 std::string rv(s.size() * 2, '\0'); 490 static constexpr auto byte_to_hex = CreateByteToHexMap(); 491 static_assert(sizeof(byte_to_hex) == 512); 492 493 char* it = rv.data(); 494 for (uint8_t v : s) { 495 std::memcpy(it, byte_to_hex[v].data(), 2); 496 it += 2; 497 } 498 499 assert(it == rv.data() + rv.size()); 500 return rv; 501 } 502 503 std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier) 504 { 505 if (str.empty()) { 506 return std::nullopt; 507 } 508 auto multiplier = default_multiplier; 509 char unit = str.back(); 510 switch (unit) { 511 case 'k': 512 multiplier = ByteUnit::k; 513 break; 514 case 'K': 515 multiplier = ByteUnit::K; 516 break; 517 case 'm': 518 multiplier = ByteUnit::m; 519 break; 520 case 'M': 521 multiplier = ByteUnit::M; 522 break; 523 case 'g': 524 multiplier = ByteUnit::g; 525 break; 526 case 'G': 527 multiplier = ByteUnit::G; 528 break; 529 case 't': 530 multiplier = ByteUnit::t; 531 break; 532 case 'T': 533 multiplier = ByteUnit::T; 534 break; 535 default: 536 unit = 0; 537 break; 538 } 539 540 uint64_t unit_amount = static_cast<uint64_t>(multiplier); 541 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str); 542 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow 543 return std::nullopt; 544 } 545 return *parsed_num * unit_amount; 546 }