strencodings.cpp
1 // Copyright (c) 2009-2010 Satoshi Nakamoto 2 // Copyright (c) 2009-present The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #include <util/strencodings.h> 7 8 #include <crypto/hex_base.h> 9 #include <span.h> 10 #include <util/check.h> 11 #include <util/overflow.h> 12 13 #include <compare> 14 #include <limits> 15 #include <optional> 16 #include <sstream> 17 #include <string> 18 #include <vector> 19 20 static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 21 22 static const std::string SAFE_CHARS[] = 23 { 24 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT 25 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT 26 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME 27 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI 28 }; 29 30 std::string SanitizeString(std::string_view str, int rule) 31 { 32 std::string result; 33 for (char c : str) { 34 if (SAFE_CHARS[rule].find(c) != std::string::npos) { 35 result.push_back(c); 36 } 37 } 38 return result; 39 } 40 41 bool IsHex(std::string_view str) 42 { 43 for (char c : str) { 44 if (HexDigit(c) < 0) return false; 45 } 46 return (str.size() > 0) && (str.size()%2 == 0); 47 } 48 49 template <typename Byte> 50 std::optional<std::vector<Byte>> TryParseHex(std::string_view str) 51 { 52 std::vector<Byte> vch; 53 vch.reserve(str.size() / 2); // two hex characters form a single byte 54 55 auto it = str.begin(); 56 while (it != str.end()) { 57 if (IsSpace(*it)) { 58 ++it; 59 continue; 60 } 61 auto c1 = HexDigit(*(it++)); 62 if (it == str.end()) return std::nullopt; 63 auto c2 = HexDigit(*(it++)); 64 if (c1 < 0 || c2 < 0) return std::nullopt; 65 vch.push_back(Byte(c1 << 4) | Byte(c2)); 66 } 67 return vch; 68 } 69 template std::optional<std::vector<std::byte>> TryParseHex(std::string_view); 70 template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view); 71 72 bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut) 73 { 74 bool valid = false; 75 size_t colon = in.find_last_of(':'); 76 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator 77 bool fHaveColon = colon != in.npos; 78 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe 79 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)}; 80 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) { 81 if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) { 82 in = in.substr(0, colon); 83 portOut = *n; 84 valid = (portOut != 0); 85 } 86 } else { 87 valid = true; 88 } 89 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') { 90 hostOut = in.substr(1, in.size() - 2); 91 } else { 92 hostOut = in; 93 } 94 95 return valid; 96 } 97 98 std::string EncodeBase64(std::span<const unsigned char> input) 99 { 100 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 101 102 std::string str; 103 str.reserve(CeilDiv(input.size(), 3u) * 4); 104 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end()); 105 while (str.size() % 4) str += '='; 106 return str; 107 } 108 109 std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str) 110 { 111 static const int8_t decode64_table[256]{ 112 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 114 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, 115 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 116 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 117 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 118 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 119 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 125 }; 126 127 if (str.size() % 4 != 0) return {}; 128 /* One or two = characters at the end are permitted. */ 129 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 130 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 131 132 std::vector<unsigned char> ret; 133 ret.reserve((str.size() * 3) / 4); 134 bool valid = ConvertBits<6, 8, false>( 135 [&](unsigned char c) { ret.push_back(c); }, 136 str.begin(), str.end(), 137 [](char c) { return decode64_table[uint8_t(c)]; } 138 ); 139 if (!valid) return {}; 140 141 return ret; 142 } 143 144 std::string EncodeBase32(std::span<const unsigned char> input, bool pad) 145 { 146 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; 147 148 std::string str; 149 str.reserve(CeilDiv(input.size(), 5u) * 8); 150 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end()); 151 if (pad) { 152 while (str.size() % 8) { 153 str += '='; 154 } 155 } 156 return str; 157 } 158 159 std::string EncodeBase32(std::string_view str, bool pad) 160 { 161 return EncodeBase32(MakeUCharSpan(str), pad); 162 } 163 164 std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str) 165 { 166 static const int8_t decode32_table[256]{ 167 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, 170 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 171 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 172 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 173 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 174 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 180 }; 181 182 if (str.size() % 8 != 0) return {}; 183 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */ 184 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 185 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); 186 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 187 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); 188 189 std::vector<unsigned char> ret; 190 ret.reserve((str.size() * 5) / 8); 191 bool valid = ConvertBits<5, 8, false>( 192 [&](unsigned char c) { ret.push_back(c); }, 193 str.begin(), str.end(), 194 [](char c) { return decode32_table[uint8_t(c)]; } 195 ); 196 197 if (!valid) return {}; 198 199 return ret; 200 } 201 202 std::string FormatParagraph(std::string_view in, size_t width, size_t indent) 203 { 204 assert(width >= indent); 205 std::stringstream out; 206 size_t ptr = 0; 207 size_t indented = 0; 208 while (ptr < in.size()) 209 { 210 size_t lineend = in.find_first_of('\n', ptr); 211 if (lineend == std::string::npos) { 212 lineend = in.size(); 213 } 214 const size_t linelen = lineend - ptr; 215 const size_t rem_width = width - indented; 216 if (linelen <= rem_width) { 217 out << in.substr(ptr, linelen + 1); 218 ptr = lineend + 1; 219 indented = 0; 220 } else { 221 size_t finalspace = in.find_last_of(" \n", ptr + rem_width); 222 if (finalspace == std::string::npos || finalspace < ptr) { 223 // No place to break; just include the entire word and move on 224 finalspace = in.find_first_of("\n ", ptr); 225 if (finalspace == std::string::npos) { 226 // End of the string, just add it and break 227 out << in.substr(ptr); 228 break; 229 } 230 } 231 out << in.substr(ptr, finalspace - ptr) << "\n"; 232 if (in[finalspace] == '\n') { 233 indented = 0; 234 } else if (indent) { 235 out << std::string(indent, ' '); 236 indented = indent; 237 } 238 ptr = finalspace + 1; 239 } 240 } 241 return out.str(); 242 } 243 244 /** Upper bound for mantissa. 245 * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. 246 * Larger integers cannot consist of arbitrary combinations of 0-9: 247 * 248 * 999999999999999999 1^18-1 249 * 9223372036854775807 (1<<63)-1 (max int64_t) 250 * 9999999999999999999 1^19-1 (would overflow) 251 */ 252 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL; 253 254 /** Helper function for ParseFixedPoint */ 255 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros) 256 { 257 if(ch == '0') 258 ++mantissa_tzeros; 259 else { 260 for (int i=0; i<=mantissa_tzeros; ++i) { 261 if (mantissa > (UPPER_BOUND / 10LL)) 262 return false; /* overflow */ 263 mantissa *= 10; 264 } 265 mantissa += ch - '0'; 266 mantissa_tzeros = 0; 267 } 268 return true; 269 } 270 271 bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out) 272 { 273 int64_t mantissa = 0; 274 int64_t exponent = 0; 275 int mantissa_tzeros = 0; 276 bool mantissa_sign = false; 277 bool exponent_sign = false; 278 int ptr = 0; 279 int end = val.size(); 280 int point_ofs = 0; 281 282 if (ptr < end && val[ptr] == '-') { 283 mantissa_sign = true; 284 ++ptr; 285 } 286 if (ptr < end) 287 { 288 if (val[ptr] == '0') { 289 /* pass single 0 */ 290 ++ptr; 291 } else if (val[ptr] >= '1' && val[ptr] <= '9') { 292 while (ptr < end && IsDigit(val[ptr])) { 293 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) 294 return false; /* overflow */ 295 ++ptr; 296 } 297 } else return false; /* missing expected digit */ 298 } else return false; /* empty string or loose '-' */ 299 if (ptr < end && val[ptr] == '.') 300 { 301 ++ptr; 302 if (ptr < end && IsDigit(val[ptr])) 303 { 304 while (ptr < end && IsDigit(val[ptr])) { 305 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) 306 return false; /* overflow */ 307 ++ptr; 308 ++point_ofs; 309 } 310 } else return false; /* missing expected digit */ 311 } 312 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E')) 313 { 314 ++ptr; 315 if (ptr < end && val[ptr] == '+') 316 ++ptr; 317 else if (ptr < end && val[ptr] == '-') { 318 exponent_sign = true; 319 ++ptr; 320 } 321 if (ptr < end && IsDigit(val[ptr])) { 322 while (ptr < end && IsDigit(val[ptr])) { 323 if (exponent > (UPPER_BOUND / 10LL)) 324 return false; /* overflow */ 325 exponent = exponent * 10 + val[ptr] - '0'; 326 ++ptr; 327 } 328 } else return false; /* missing expected digit */ 329 } 330 if (ptr != end) 331 return false; /* trailing garbage */ 332 333 /* finalize exponent */ 334 if (exponent_sign) 335 exponent = -exponent; 336 exponent = exponent - point_ofs + mantissa_tzeros; 337 338 /* finalize mantissa */ 339 if (mantissa_sign) 340 mantissa = -mantissa; 341 342 /* convert to one 64-bit fixed-point value */ 343 exponent += decimals; 344 if (exponent < 0) 345 return false; /* cannot represent values smaller than 10^-decimals */ 346 if (exponent >= 18) 347 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */ 348 349 for (int i=0; i < exponent; ++i) { 350 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL)) 351 return false; /* overflow */ 352 mantissa *= 10; 353 } 354 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND) 355 return false; /* overflow */ 356 357 if (amount_out) 358 *amount_out = mantissa; 359 360 return true; 361 } 362 363 std::string ToLower(std::string_view str) 364 { 365 std::string r; 366 r.reserve(str.size()); 367 for (auto ch : str) r += ToLower(ch); 368 return r; 369 } 370 371 std::string ToUpper(std::string_view str) 372 { 373 std::string r; 374 r.reserve(str.size()); 375 for (auto ch : str) r += ToUpper(ch); 376 return r; 377 } 378 379 std::string Capitalize(std::string str) 380 { 381 if (str.empty()) return str; 382 str[0] = ToUpper(str.front()); 383 return str; 384 } 385 386 std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier) 387 { 388 if (str.empty()) { 389 return std::nullopt; 390 } 391 auto multiplier = default_multiplier; 392 char unit = str.back(); 393 switch (unit) { 394 case 'k': 395 multiplier = ByteUnit::k; 396 break; 397 case 'K': 398 multiplier = ByteUnit::K; 399 break; 400 case 'm': 401 multiplier = ByteUnit::m; 402 break; 403 case 'M': 404 multiplier = ByteUnit::M; 405 break; 406 case 'g': 407 multiplier = ByteUnit::g; 408 break; 409 case 'G': 410 multiplier = ByteUnit::G; 411 break; 412 case 't': 413 multiplier = ByteUnit::t; 414 break; 415 case 'T': 416 multiplier = ByteUnit::T; 417 break; 418 default: 419 unit = 0; 420 break; 421 } 422 423 uint64_t unit_amount = static_cast<uint64_t>(multiplier); 424 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str); 425 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow 426 return std::nullopt; 427 } 428 return *parsed_num * unit_amount; 429 } 430 431 bool CaseInsensitiveEqual(std::string_view s1, std::string_view s2) 432 { 433 if (s1.size() != s2.size()) return false; 434 for (size_t i = 0; i < s1.size(); ++i) { 435 char c1 = s1[i]; 436 if (c1 >= 'A' && c1 <= 'Z') c1 -= ('A' - 'a'); 437 char c2 = s2[i]; 438 if (c2 >= 'A' && c2 <= 'Z') c2 -= ('A' - 'a'); 439 if (c1 != c2) return false; 440 } 441 return true; 442 }