strencodings.cpp
1 // Copyright (c) 2009-2010 Satoshi Nakamoto 2 // Copyright (c) 2009-present The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #include <util/strencodings.h> 7 8 #include <crypto/hex_base.h> 9 #include <span.h> 10 #include <util/overflow.h> 11 12 #include <array> 13 #include <cassert> 14 #include <cstring> 15 #include <limits> 16 #include <optional> 17 #include <ostream> 18 #include <string> 19 #include <vector> 20 21 static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 22 23 static const std::string SAFE_CHARS[] = 24 { 25 CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT 26 CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT 27 CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME 28 CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI 29 }; 30 31 std::string SanitizeString(std::string_view str, int rule) 32 { 33 std::string result; 34 for (char c : str) { 35 if (SAFE_CHARS[rule].find(c) != std::string::npos) { 36 result.push_back(c); 37 } 38 } 39 return result; 40 } 41 42 bool IsHex(std::string_view str) 43 { 44 for (char c : str) { 45 if (HexDigit(c) < 0) return false; 46 } 47 return (str.size() > 0) && (str.size()%2 == 0); 48 } 49 50 template <typename Byte> 51 std::optional<std::vector<Byte>> TryParseHex(std::string_view str) 52 { 53 std::vector<Byte> vch; 54 vch.reserve(str.size() / 2); // two hex characters form a single byte 55 56 auto it = str.begin(); 57 while (it != str.end()) { 58 if (IsSpace(*it)) { 59 ++it; 60 continue; 61 } 62 auto c1 = HexDigit(*(it++)); 63 if (it == str.end()) return std::nullopt; 64 auto c2 = HexDigit(*(it++)); 65 if (c1 < 0 || c2 < 0) return std::nullopt; 66 vch.push_back(Byte(c1 << 4) | Byte(c2)); 67 } 68 return vch; 69 } 70 template std::optional<std::vector<std::byte>> TryParseHex(std::string_view); 71 template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view); 72 73 bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut) 74 { 75 bool valid = false; 76 size_t colon = in.find_last_of(':'); 77 // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator 78 bool fHaveColon = colon != in.npos; 79 bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe 80 bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)}; 81 if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) { 82 if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) { 83 in = in.substr(0, colon); 84 portOut = *n; 85 valid = (portOut != 0); 86 } 87 } else { 88 valid = true; 89 } 90 if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') { 91 hostOut = in.substr(1, in.size() - 2); 92 } else { 93 hostOut = in; 94 } 95 96 return valid; 97 } 98 99 std::string EncodeBase64(std::span<const unsigned char> input) 100 { 101 static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 102 103 std::string str; 104 str.reserve(CeilDiv(input.size(), 3u) * 4); 105 ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end()); 106 while (str.size() % 4) str += '='; 107 return str; 108 } 109 110 std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str) 111 { 112 static const int8_t decode64_table[256]{ 113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 114 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 115 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, 116 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 117 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 118 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 119 49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 124 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 125 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 126 }; 127 128 if (str.size() % 4 != 0) return {}; 129 /* One or two = characters at the end are permitted. */ 130 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 131 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 132 133 std::vector<unsigned char> ret; 134 ret.reserve((str.size() * 3) / 4); 135 bool valid = ConvertBits<6, 8, false>( 136 [&](unsigned char c) { ret.push_back(c); }, 137 str.begin(), str.end(), 138 [](char c) { return decode64_table[uint8_t(c)]; } 139 ); 140 if (!valid) return {}; 141 142 return ret; 143 } 144 145 std::string EncodeBase32(std::span<const unsigned char> input, bool pad) 146 { 147 static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; 148 149 std::string str; 150 str.reserve(CeilDiv(input.size(), 5u) * 8); 151 ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end()); 152 if (pad) { 153 while (str.size() % 8) { 154 str += '='; 155 } 156 } 157 return str; 158 } 159 160 std::string EncodeBase32(std::string_view str, bool pad) 161 { 162 return EncodeBase32(MakeUCharSpan(str), pad); 163 } 164 165 std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str) 166 { 167 static const int8_t decode32_table[256]{ 168 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 169 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 170 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, 171 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 172 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 0, 1, 2, 173 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 174 23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 175 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 176 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 177 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 178 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 179 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 180 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 181 }; 182 183 if (str.size() % 8 != 0) return {}; 184 /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */ 185 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 186 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); 187 if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); 188 if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); 189 190 std::vector<unsigned char> ret; 191 ret.reserve((str.size() * 5) / 8); 192 bool valid = ConvertBits<5, 8, false>( 193 [&](unsigned char c) { ret.push_back(c); }, 194 str.begin(), str.end(), 195 [](char c) { return decode32_table[uint8_t(c)]; } 196 ); 197 198 if (!valid) return {}; 199 200 return ret; 201 } 202 203 std::string FormatParagraph(std::string_view in, size_t width, size_t indent) 204 { 205 assert(width >= indent); 206 std::stringstream out; 207 size_t ptr = 0; 208 size_t indented = 0; 209 while (ptr < in.size()) 210 { 211 size_t lineend = in.find_first_of('\n', ptr); 212 if (lineend == std::string::npos) { 213 lineend = in.size(); 214 } 215 const size_t linelen = lineend - ptr; 216 const size_t rem_width = width - indented; 217 if (linelen <= rem_width) { 218 out << in.substr(ptr, linelen + 1); 219 ptr = lineend + 1; 220 indented = 0; 221 } else { 222 size_t finalspace = in.find_last_of(" \n", ptr + rem_width); 223 if (finalspace == std::string::npos || finalspace < ptr) { 224 // No place to break; just include the entire word and move on 225 finalspace = in.find_first_of("\n ", ptr); 226 if (finalspace == std::string::npos) { 227 // End of the string, just add it and break 228 out << in.substr(ptr); 229 break; 230 } 231 } 232 out << in.substr(ptr, finalspace - ptr) << "\n"; 233 if (in[finalspace] == '\n') { 234 indented = 0; 235 } else if (indent) { 236 out << std::string(indent, ' '); 237 indented = indent; 238 } 239 ptr = finalspace + 1; 240 } 241 } 242 return out.str(); 243 } 244 245 /** Upper bound for mantissa. 246 * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. 247 * Larger integers cannot consist of arbitrary combinations of 0-9: 248 * 249 * 999999999999999999 1^18-1 250 * 9223372036854775807 (1<<63)-1 (max int64_t) 251 * 9999999999999999999 1^19-1 (would overflow) 252 */ 253 static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL; 254 255 /** Helper function for ParseFixedPoint */ 256 static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros) 257 { 258 if(ch == '0') 259 ++mantissa_tzeros; 260 else { 261 for (int i=0; i<=mantissa_tzeros; ++i) { 262 if (mantissa > (UPPER_BOUND / 10LL)) 263 return false; /* overflow */ 264 mantissa *= 10; 265 } 266 mantissa += ch - '0'; 267 mantissa_tzeros = 0; 268 } 269 return true; 270 } 271 272 bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out) 273 { 274 int64_t mantissa = 0; 275 int64_t exponent = 0; 276 int mantissa_tzeros = 0; 277 bool mantissa_sign = false; 278 bool exponent_sign = false; 279 int ptr = 0; 280 int end = val.size(); 281 int point_ofs = 0; 282 283 if (ptr < end && val[ptr] == '-') { 284 mantissa_sign = true; 285 ++ptr; 286 } 287 if (ptr < end) 288 { 289 if (val[ptr] == '0') { 290 /* pass single 0 */ 291 ++ptr; 292 } else if (val[ptr] >= '1' && val[ptr] <= '9') { 293 while (ptr < end && IsDigit(val[ptr])) { 294 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) 295 return false; /* overflow */ 296 ++ptr; 297 } 298 } else return false; /* missing expected digit */ 299 } else return false; /* empty string or loose '-' */ 300 if (ptr < end && val[ptr] == '.') 301 { 302 ++ptr; 303 if (ptr < end && IsDigit(val[ptr])) 304 { 305 while (ptr < end && IsDigit(val[ptr])) { 306 if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros)) 307 return false; /* overflow */ 308 ++ptr; 309 ++point_ofs; 310 } 311 } else return false; /* missing expected digit */ 312 } 313 if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E')) 314 { 315 ++ptr; 316 if (ptr < end && val[ptr] == '+') 317 ++ptr; 318 else if (ptr < end && val[ptr] == '-') { 319 exponent_sign = true; 320 ++ptr; 321 } 322 if (ptr < end && IsDigit(val[ptr])) { 323 while (ptr < end && IsDigit(val[ptr])) { 324 if (exponent > (UPPER_BOUND / 10LL)) 325 return false; /* overflow */ 326 exponent = exponent * 10 + val[ptr] - '0'; 327 ++ptr; 328 } 329 } else return false; /* missing expected digit */ 330 } 331 if (ptr != end) 332 return false; /* trailing garbage */ 333 334 /* finalize exponent */ 335 if (exponent_sign) 336 exponent = -exponent; 337 exponent = exponent - point_ofs + mantissa_tzeros; 338 339 /* finalize mantissa */ 340 if (mantissa_sign) 341 mantissa = -mantissa; 342 343 /* convert to one 64-bit fixed-point value */ 344 exponent += decimals; 345 if (exponent < 0) 346 return false; /* cannot represent values smaller than 10^-decimals */ 347 if (exponent >= 18) 348 return false; /* cannot represent values larger than or equal to 10^(18-decimals) */ 349 350 for (int i=0; i < exponent; ++i) { 351 if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL)) 352 return false; /* overflow */ 353 mantissa *= 10; 354 } 355 if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND) 356 return false; /* overflow */ 357 358 if (amount_out) 359 *amount_out = mantissa; 360 361 return true; 362 } 363 364 std::string ToLower(std::string_view str) 365 { 366 std::string r; 367 r.reserve(str.size()); 368 for (auto ch : str) r += ToLower(ch); 369 return r; 370 } 371 372 std::string ToUpper(std::string_view str) 373 { 374 std::string r; 375 r.reserve(str.size()); 376 for (auto ch : str) r += ToUpper(ch); 377 return r; 378 } 379 380 std::string Capitalize(std::string str) 381 { 382 if (str.empty()) return str; 383 str[0] = ToUpper(str.front()); 384 return str; 385 } 386 387 std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier) 388 { 389 if (str.empty()) { 390 return std::nullopt; 391 } 392 auto multiplier = default_multiplier; 393 char unit = str.back(); 394 switch (unit) { 395 case 'k': 396 multiplier = ByteUnit::k; 397 break; 398 case 'K': 399 multiplier = ByteUnit::K; 400 break; 401 case 'm': 402 multiplier = ByteUnit::m; 403 break; 404 case 'M': 405 multiplier = ByteUnit::M; 406 break; 407 case 'g': 408 multiplier = ByteUnit::g; 409 break; 410 case 'G': 411 multiplier = ByteUnit::G; 412 break; 413 case 't': 414 multiplier = ByteUnit::t; 415 break; 416 case 'T': 417 multiplier = ByteUnit::T; 418 break; 419 default: 420 unit = 0; 421 break; 422 } 423 424 uint64_t unit_amount = static_cast<uint64_t>(multiplier); 425 auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str); 426 if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow 427 return std::nullopt; 428 } 429 return *parsed_num * unit_amount; 430 }