/ src / util / strencodings.cpp
strencodings.cpp
  1  // Copyright (c) 2009-2010 Satoshi Nakamoto
  2  // Copyright (c) 2009-present The Bitcoin Core developers
  3  // Distributed under the MIT software license, see the accompanying
  4  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  #include <util/strencodings.h>
  7  
  8  #include <crypto/hex_base.h>
  9  #include <span.h>
 10  #include <util/check.h>
 11  #include <util/overflow.h>
 12  
 13  #include <compare>
 14  #include <limits>
 15  #include <optional>
 16  #include <sstream>
 17  #include <string>
 18  #include <vector>
 19  
 20  static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
 21  
 22  static const std::string SAFE_CHARS[] =
 23  {
 24      CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
 25      CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
 26      CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
 27      CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
 28  };
 29  
 30  std::string SanitizeString(std::string_view str, int rule)
 31  {
 32      std::string result;
 33      for (char c : str) {
 34          if (SAFE_CHARS[rule].find(c) != std::string::npos) {
 35              result.push_back(c);
 36          }
 37      }
 38      return result;
 39  }
 40  
 41  bool IsHex(std::string_view str)
 42  {
 43      for (char c : str) {
 44          if (HexDigit(c) < 0) return false;
 45      }
 46      return (str.size() > 0) && (str.size()%2 == 0);
 47  }
 48  
 49  template <typename Byte>
 50  std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
 51  {
 52      std::vector<Byte> vch;
 53      vch.reserve(str.size() / 2); // two hex characters form a single byte
 54  
 55      auto it = str.begin();
 56      while (it != str.end()) {
 57          if (IsSpace(*it)) {
 58              ++it;
 59              continue;
 60          }
 61          auto c1 = HexDigit(*(it++));
 62          if (it == str.end()) return std::nullopt;
 63          auto c2 = HexDigit(*(it++));
 64          if (c1 < 0 || c2 < 0) return std::nullopt;
 65          vch.push_back(Byte(c1 << 4) | Byte(c2));
 66      }
 67      return vch;
 68  }
 69  template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
 70  template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
 71  
 72  bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
 73  {
 74      bool valid = false;
 75      size_t colon = in.find_last_of(':');
 76      // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
 77      bool fHaveColon = colon != in.npos;
 78      bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
 79      bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
 80      if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
 81          if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) {
 82              in = in.substr(0, colon);
 83              portOut = *n;
 84              valid = (portOut != 0);
 85          }
 86      } else {
 87          valid = true;
 88      }
 89      if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
 90          hostOut = in.substr(1, in.size() - 2);
 91      } else {
 92          hostOut = in;
 93      }
 94  
 95      return valid;
 96  }
 97  
 98  std::string EncodeBase64(std::span<const unsigned char> input)
 99  {
100      static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
101  
102      std::string str;
103      str.reserve(CeilDiv(input.size(), 3u) * 4);
104      ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
105      while (str.size() % 4) str += '=';
106      return str;
107  }
108  
109  std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
110  {
111      static const int8_t decode64_table[256]{
112          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
113          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114          -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
115          -1, -1, -1, -1, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
116          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
117          29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
118          49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
119          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
125      };
126  
127      if (str.size() % 4 != 0) return {};
128      /* One or two = characters at the end are permitted. */
129      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
130      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131  
132      std::vector<unsigned char> ret;
133      ret.reserve((str.size() * 3) / 4);
134      bool valid = ConvertBits<6, 8, false>(
135          [&](unsigned char c) { ret.push_back(c); },
136          str.begin(), str.end(),
137          [](char c) { return decode64_table[uint8_t(c)]; }
138      );
139      if (!valid) return {};
140  
141      return ret;
142  }
143  
144  std::string EncodeBase32(std::span<const unsigned char> input, bool pad)
145  {
146      static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
147  
148      std::string str;
149      str.reserve(CeilDiv(input.size(), 5u) * 8);
150      ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
151      if (pad) {
152          while (str.size() % 8) {
153              str += '=';
154          }
155      }
156      return str;
157  }
158  
159  std::string EncodeBase32(std::string_view str, bool pad)
160  {
161      return EncodeBase32(MakeUCharSpan(str), pad);
162  }
163  
164  std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
165  {
166      static const int8_t decode32_table[256]{
167          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
168          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
170          -1, -1, -1, -1, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
171          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1,  0,  1,  2,
172           3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
173          23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
174          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
180      };
181  
182      if (str.size() % 8 != 0) return {};
183      /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
184      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
185      if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
186      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
187      if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
188  
189      std::vector<unsigned char> ret;
190      ret.reserve((str.size() * 5) / 8);
191      bool valid = ConvertBits<5, 8, false>(
192          [&](unsigned char c) { ret.push_back(c); },
193          str.begin(), str.end(),
194          [](char c) { return decode32_table[uint8_t(c)]; }
195      );
196  
197      if (!valid) return {};
198  
199      return ret;
200  }
201  
202  std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
203  {
204      assert(width >= indent);
205      std::stringstream out;
206      size_t ptr = 0;
207      size_t indented = 0;
208      while (ptr < in.size())
209      {
210          size_t lineend = in.find_first_of('\n', ptr);
211          if (lineend == std::string::npos) {
212              lineend = in.size();
213          }
214          const size_t linelen = lineend - ptr;
215          const size_t rem_width = width - indented;
216          if (linelen <= rem_width) {
217              out << in.substr(ptr, linelen + 1);
218              ptr = lineend + 1;
219              indented = 0;
220          } else {
221              size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
222              if (finalspace == std::string::npos || finalspace < ptr) {
223                  // No place to break; just include the entire word and move on
224                  finalspace = in.find_first_of("\n ", ptr);
225                  if (finalspace == std::string::npos) {
226                      // End of the string, just add it and break
227                      out << in.substr(ptr);
228                      break;
229                  }
230              }
231              out << in.substr(ptr, finalspace - ptr) << "\n";
232              if (in[finalspace] == '\n') {
233                  indented = 0;
234              } else if (indent) {
235                  out << std::string(indent, ' ');
236                  indented = indent;
237              }
238              ptr = finalspace + 1;
239          }
240      }
241      return out.str();
242  }
243  
244  /** Upper bound for mantissa.
245   * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
246   * Larger integers cannot consist of arbitrary combinations of 0-9:
247   *
248   *   999999999999999999  1^18-1
249   *  9223372036854775807  (1<<63)-1  (max int64_t)
250   *  9999999999999999999  1^19-1     (would overflow)
251   */
252  static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
253  
254  /** Helper function for ParseFixedPoint */
255  static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
256  {
257      if(ch == '0')
258          ++mantissa_tzeros;
259      else {
260          for (int i=0; i<=mantissa_tzeros; ++i) {
261              if (mantissa > (UPPER_BOUND / 10LL))
262                  return false; /* overflow */
263              mantissa *= 10;
264          }
265          mantissa += ch - '0';
266          mantissa_tzeros = 0;
267      }
268      return true;
269  }
270  
271  bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
272  {
273      int64_t mantissa = 0;
274      int64_t exponent = 0;
275      int mantissa_tzeros = 0;
276      bool mantissa_sign = false;
277      bool exponent_sign = false;
278      int ptr = 0;
279      int end = val.size();
280      int point_ofs = 0;
281  
282      if (ptr < end && val[ptr] == '-') {
283          mantissa_sign = true;
284          ++ptr;
285      }
286      if (ptr < end)
287      {
288          if (val[ptr] == '0') {
289              /* pass single 0 */
290              ++ptr;
291          } else if (val[ptr] >= '1' && val[ptr] <= '9') {
292              while (ptr < end && IsDigit(val[ptr])) {
293                  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
294                      return false; /* overflow */
295                  ++ptr;
296              }
297          } else return false; /* missing expected digit */
298      } else return false; /* empty string or loose '-' */
299      if (ptr < end && val[ptr] == '.')
300      {
301          ++ptr;
302          if (ptr < end && IsDigit(val[ptr]))
303          {
304              while (ptr < end && IsDigit(val[ptr])) {
305                  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
306                      return false; /* overflow */
307                  ++ptr;
308                  ++point_ofs;
309              }
310          } else return false; /* missing expected digit */
311      }
312      if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
313      {
314          ++ptr;
315          if (ptr < end && val[ptr] == '+')
316              ++ptr;
317          else if (ptr < end && val[ptr] == '-') {
318              exponent_sign = true;
319              ++ptr;
320          }
321          if (ptr < end && IsDigit(val[ptr])) {
322              while (ptr < end && IsDigit(val[ptr])) {
323                  if (exponent > (UPPER_BOUND / 10LL))
324                      return false; /* overflow */
325                  exponent = exponent * 10 + val[ptr] - '0';
326                  ++ptr;
327              }
328          } else return false; /* missing expected digit */
329      }
330      if (ptr != end)
331          return false; /* trailing garbage */
332  
333      /* finalize exponent */
334      if (exponent_sign)
335          exponent = -exponent;
336      exponent = exponent - point_ofs + mantissa_tzeros;
337  
338      /* finalize mantissa */
339      if (mantissa_sign)
340          mantissa = -mantissa;
341  
342      /* convert to one 64-bit fixed-point value */
343      exponent += decimals;
344      if (exponent < 0)
345          return false; /* cannot represent values smaller than 10^-decimals */
346      if (exponent >= 18)
347          return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
348  
349      for (int i=0; i < exponent; ++i) {
350          if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
351              return false; /* overflow */
352          mantissa *= 10;
353      }
354      if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
355          return false; /* overflow */
356  
357      if (amount_out)
358          *amount_out = mantissa;
359  
360      return true;
361  }
362  
363  std::string ToLower(std::string_view str)
364  {
365      std::string r;
366      r.reserve(str.size());
367      for (auto ch : str) r += ToLower(ch);
368      return r;
369  }
370  
371  std::string ToUpper(std::string_view str)
372  {
373      std::string r;
374      r.reserve(str.size());
375      for (auto ch : str) r += ToUpper(ch);
376      return r;
377  }
378  
379  std::string Capitalize(std::string str)
380  {
381      if (str.empty()) return str;
382      str[0] = ToUpper(str.front());
383      return str;
384  }
385  
386  std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
387  {
388      if (str.empty()) {
389          return std::nullopt;
390      }
391      auto multiplier = default_multiplier;
392      char unit = str.back();
393      switch (unit) {
394      case 'k':
395          multiplier = ByteUnit::k;
396          break;
397      case 'K':
398          multiplier = ByteUnit::K;
399          break;
400      case 'm':
401          multiplier = ByteUnit::m;
402          break;
403      case 'M':
404          multiplier = ByteUnit::M;
405          break;
406      case 'g':
407          multiplier = ByteUnit::g;
408          break;
409      case 'G':
410          multiplier = ByteUnit::G;
411          break;
412      case 't':
413          multiplier = ByteUnit::t;
414          break;
415      case 'T':
416          multiplier = ByteUnit::T;
417          break;
418      default:
419          unit = 0;
420          break;
421      }
422  
423      uint64_t unit_amount = static_cast<uint64_t>(multiplier);
424      auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
425      if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
426          return std::nullopt;
427      }
428      return *parsed_num * unit_amount;
429  }
430  
431  bool CaseInsensitiveEqual(std::string_view s1, std::string_view s2)
432  {
433      if (s1.size() != s2.size()) return false;
434      for (size_t i = 0; i < s1.size(); ++i) {
435          char c1 = s1[i];
436          if (c1 >= 'A' && c1 <= 'Z') c1 -= ('A' - 'a');
437          char c2 = s2[i];
438          if (c2 >= 'A' && c2 <= 'Z') c2 -= ('A' - 'a');
439          if (c1 != c2) return false;
440      }
441      return true;
442  }