/ src / util / strencodings.cpp
strencodings.cpp
  1  // Copyright (c) 2009-2010 Satoshi Nakamoto
  2  // Copyright (c) 2009-present The Bitcoin Core developers
  3  // Distributed under the MIT software license, see the accompanying
  4  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  #include <util/strencodings.h>
  7  
  8  #include <crypto/hex_base.h>
  9  #include <span.h>
 10  #include <util/overflow.h>
 11  
 12  #include <array>
 13  #include <cassert>
 14  #include <cstring>
 15  #include <limits>
 16  #include <optional>
 17  #include <ostream>
 18  #include <string>
 19  #include <vector>
 20  
 21  static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
 22  
 23  static const std::string SAFE_CHARS[] =
 24  {
 25      CHARS_ALPHA_NUM + " .,;-_/:?@()", // SAFE_CHARS_DEFAULT
 26      CHARS_ALPHA_NUM + " .,;-_?@", // SAFE_CHARS_UA_COMMENT
 27      CHARS_ALPHA_NUM + ".-_", // SAFE_CHARS_FILENAME
 28      CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI
 29  };
 30  
 31  std::string SanitizeString(std::string_view str, int rule)
 32  {
 33      std::string result;
 34      for (char c : str) {
 35          if (SAFE_CHARS[rule].find(c) != std::string::npos) {
 36              result.push_back(c);
 37          }
 38      }
 39      return result;
 40  }
 41  
 42  bool IsHex(std::string_view str)
 43  {
 44      for (char c : str) {
 45          if (HexDigit(c) < 0) return false;
 46      }
 47      return (str.size() > 0) && (str.size()%2 == 0);
 48  }
 49  
 50  template <typename Byte>
 51  std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
 52  {
 53      std::vector<Byte> vch;
 54      vch.reserve(str.size() / 2); // two hex characters form a single byte
 55  
 56      auto it = str.begin();
 57      while (it != str.end()) {
 58          if (IsSpace(*it)) {
 59              ++it;
 60              continue;
 61          }
 62          auto c1 = HexDigit(*(it++));
 63          if (it == str.end()) return std::nullopt;
 64          auto c2 = HexDigit(*(it++));
 65          if (c1 < 0 || c2 < 0) return std::nullopt;
 66          vch.push_back(Byte(c1 << 4) | Byte(c2));
 67      }
 68      return vch;
 69  }
 70  template std::optional<std::vector<std::byte>> TryParseHex(std::string_view);
 71  template std::optional<std::vector<uint8_t>> TryParseHex(std::string_view);
 72  
 73  bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut)
 74  {
 75      bool valid = false;
 76      size_t colon = in.find_last_of(':');
 77      // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator
 78      bool fHaveColon = colon != in.npos;
 79      bool fBracketed = fHaveColon && (in[0] == '[' && in[colon - 1] == ']'); // if there is a colon, and in[0]=='[', colon is not 0, so in[colon-1] is safe
 80      bool fMultiColon{fHaveColon && colon != 0 && (in.find_last_of(':', colon - 1) != in.npos)};
 81      if (fHaveColon && (colon == 0 || fBracketed || !fMultiColon)) {
 82          if (const auto n{ToIntegral<uint16_t>(in.substr(colon + 1))}) {
 83              in = in.substr(0, colon);
 84              portOut = *n;
 85              valid = (portOut != 0);
 86          }
 87      } else {
 88          valid = true;
 89      }
 90      if (in.size() > 0 && in[0] == '[' && in[in.size() - 1] == ']') {
 91          hostOut = in.substr(1, in.size() - 2);
 92      } else {
 93          hostOut = in;
 94      }
 95  
 96      return valid;
 97  }
 98  
 99  std::string EncodeBase64(std::span<const unsigned char> input)
100  {
101      static const char *pbase64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
102  
103      std::string str;
104      str.reserve(CeilDiv(input.size(), 3u) * 4);
105      ConvertBits<8, 6, true>([&](int v) { str += pbase64[v]; }, input.begin(), input.end());
106      while (str.size() % 4) str += '=';
107      return str;
108  }
109  
110  std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str)
111  {
112      static const int8_t decode64_table[256]{
113          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
115          -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1,
116          -1, -1, -1, -1, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
117          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28,
118          29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
119          49, 50, 51, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
120          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
123          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
124          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
125          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
126      };
127  
128      if (str.size() % 4 != 0) return {};
129      /* One or two = characters at the end are permitted. */
130      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
131      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
132  
133      std::vector<unsigned char> ret;
134      ret.reserve((str.size() * 3) / 4);
135      bool valid = ConvertBits<6, 8, false>(
136          [&](unsigned char c) { ret.push_back(c); },
137          str.begin(), str.end(),
138          [](char c) { return decode64_table[uint8_t(c)]; }
139      );
140      if (!valid) return {};
141  
142      return ret;
143  }
144  
145  std::string EncodeBase32(std::span<const unsigned char> input, bool pad)
146  {
147      static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567";
148  
149      std::string str;
150      str.reserve(CeilDiv(input.size(), 5u) * 8);
151      ConvertBits<8, 5, true>([&](int v) { str += pbase32[v]; }, input.begin(), input.end());
152      if (pad) {
153          while (str.size() % 8) {
154              str += '=';
155          }
156      }
157      return str;
158  }
159  
160  std::string EncodeBase32(std::string_view str, bool pad)
161  {
162      return EncodeBase32(MakeUCharSpan(str), pad);
163  }
164  
165  std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str)
166  {
167      static const int8_t decode32_table[256]{
168          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
169          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
170          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1,
171          -1, -1, -1, -1, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
172          15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1,  0,  1,  2,
173           3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
174          23, 24, 25, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
175          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
176          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
177          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
178          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
179          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
180          -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
181      };
182  
183      if (str.size() % 8 != 0) return {};
184      /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */
185      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
186      if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
187      if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1);
188      if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2);
189  
190      std::vector<unsigned char> ret;
191      ret.reserve((str.size() * 5) / 8);
192      bool valid = ConvertBits<5, 8, false>(
193          [&](unsigned char c) { ret.push_back(c); },
194          str.begin(), str.end(),
195          [](char c) { return decode32_table[uint8_t(c)]; }
196      );
197  
198      if (!valid) return {};
199  
200      return ret;
201  }
202  
203  std::string FormatParagraph(std::string_view in, size_t width, size_t indent)
204  {
205      assert(width >= indent);
206      std::stringstream out;
207      size_t ptr = 0;
208      size_t indented = 0;
209      while (ptr < in.size())
210      {
211          size_t lineend = in.find_first_of('\n', ptr);
212          if (lineend == std::string::npos) {
213              lineend = in.size();
214          }
215          const size_t linelen = lineend - ptr;
216          const size_t rem_width = width - indented;
217          if (linelen <= rem_width) {
218              out << in.substr(ptr, linelen + 1);
219              ptr = lineend + 1;
220              indented = 0;
221          } else {
222              size_t finalspace = in.find_last_of(" \n", ptr + rem_width);
223              if (finalspace == std::string::npos || finalspace < ptr) {
224                  // No place to break; just include the entire word and move on
225                  finalspace = in.find_first_of("\n ", ptr);
226                  if (finalspace == std::string::npos) {
227                      // End of the string, just add it and break
228                      out << in.substr(ptr);
229                      break;
230                  }
231              }
232              out << in.substr(ptr, finalspace - ptr) << "\n";
233              if (in[finalspace] == '\n') {
234                  indented = 0;
235              } else if (indent) {
236                  out << std::string(indent, ' ');
237                  indented = indent;
238              }
239              ptr = finalspace + 1;
240          }
241      }
242      return out.str();
243  }
244  
245  /** Upper bound for mantissa.
246   * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
247   * Larger integers cannot consist of arbitrary combinations of 0-9:
248   *
249   *   999999999999999999  1^18-1
250   *  9223372036854775807  (1<<63)-1  (max int64_t)
251   *  9999999999999999999  1^19-1     (would overflow)
252   */
253  static const int64_t UPPER_BOUND = 1000000000000000000LL - 1LL;
254  
255  /** Helper function for ParseFixedPoint */
256  static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantissa_tzeros)
257  {
258      if(ch == '0')
259          ++mantissa_tzeros;
260      else {
261          for (int i=0; i<=mantissa_tzeros; ++i) {
262              if (mantissa > (UPPER_BOUND / 10LL))
263                  return false; /* overflow */
264              mantissa *= 10;
265          }
266          mantissa += ch - '0';
267          mantissa_tzeros = 0;
268      }
269      return true;
270  }
271  
272  bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out)
273  {
274      int64_t mantissa = 0;
275      int64_t exponent = 0;
276      int mantissa_tzeros = 0;
277      bool mantissa_sign = false;
278      bool exponent_sign = false;
279      int ptr = 0;
280      int end = val.size();
281      int point_ofs = 0;
282  
283      if (ptr < end && val[ptr] == '-') {
284          mantissa_sign = true;
285          ++ptr;
286      }
287      if (ptr < end)
288      {
289          if (val[ptr] == '0') {
290              /* pass single 0 */
291              ++ptr;
292          } else if (val[ptr] >= '1' && val[ptr] <= '9') {
293              while (ptr < end && IsDigit(val[ptr])) {
294                  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
295                      return false; /* overflow */
296                  ++ptr;
297              }
298          } else return false; /* missing expected digit */
299      } else return false; /* empty string or loose '-' */
300      if (ptr < end && val[ptr] == '.')
301      {
302          ++ptr;
303          if (ptr < end && IsDigit(val[ptr]))
304          {
305              while (ptr < end && IsDigit(val[ptr])) {
306                  if (!ProcessMantissaDigit(val[ptr], mantissa, mantissa_tzeros))
307                      return false; /* overflow */
308                  ++ptr;
309                  ++point_ofs;
310              }
311          } else return false; /* missing expected digit */
312      }
313      if (ptr < end && (val[ptr] == 'e' || val[ptr] == 'E'))
314      {
315          ++ptr;
316          if (ptr < end && val[ptr] == '+')
317              ++ptr;
318          else if (ptr < end && val[ptr] == '-') {
319              exponent_sign = true;
320              ++ptr;
321          }
322          if (ptr < end && IsDigit(val[ptr])) {
323              while (ptr < end && IsDigit(val[ptr])) {
324                  if (exponent > (UPPER_BOUND / 10LL))
325                      return false; /* overflow */
326                  exponent = exponent * 10 + val[ptr] - '0';
327                  ++ptr;
328              }
329          } else return false; /* missing expected digit */
330      }
331      if (ptr != end)
332          return false; /* trailing garbage */
333  
334      /* finalize exponent */
335      if (exponent_sign)
336          exponent = -exponent;
337      exponent = exponent - point_ofs + mantissa_tzeros;
338  
339      /* finalize mantissa */
340      if (mantissa_sign)
341          mantissa = -mantissa;
342  
343      /* convert to one 64-bit fixed-point value */
344      exponent += decimals;
345      if (exponent < 0)
346          return false; /* cannot represent values smaller than 10^-decimals */
347      if (exponent >= 18)
348          return false; /* cannot represent values larger than or equal to 10^(18-decimals) */
349  
350      for (int i=0; i < exponent; ++i) {
351          if (mantissa > (UPPER_BOUND / 10LL) || mantissa < -(UPPER_BOUND / 10LL))
352              return false; /* overflow */
353          mantissa *= 10;
354      }
355      if (mantissa > UPPER_BOUND || mantissa < -UPPER_BOUND)
356          return false; /* overflow */
357  
358      if (amount_out)
359          *amount_out = mantissa;
360  
361      return true;
362  }
363  
364  std::string ToLower(std::string_view str)
365  {
366      std::string r;
367      r.reserve(str.size());
368      for (auto ch : str) r += ToLower(ch);
369      return r;
370  }
371  
372  std::string ToUpper(std::string_view str)
373  {
374      std::string r;
375      r.reserve(str.size());
376      for (auto ch : str) r += ToUpper(ch);
377      return r;
378  }
379  
380  std::string Capitalize(std::string str)
381  {
382      if (str.empty()) return str;
383      str[0] = ToUpper(str.front());
384      return str;
385  }
386  
387  std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier)
388  {
389      if (str.empty()) {
390          return std::nullopt;
391      }
392      auto multiplier = default_multiplier;
393      char unit = str.back();
394      switch (unit) {
395      case 'k':
396          multiplier = ByteUnit::k;
397          break;
398      case 'K':
399          multiplier = ByteUnit::K;
400          break;
401      case 'm':
402          multiplier = ByteUnit::m;
403          break;
404      case 'M':
405          multiplier = ByteUnit::M;
406          break;
407      case 'g':
408          multiplier = ByteUnit::g;
409          break;
410      case 'G':
411          multiplier = ByteUnit::G;
412          break;
413      case 't':
414          multiplier = ByteUnit::t;
415          break;
416      case 'T':
417          multiplier = ByteUnit::T;
418          break;
419      default:
420          unit = 0;
421          break;
422      }
423  
424      uint64_t unit_amount = static_cast<uint64_t>(multiplier);
425      auto parsed_num = ToIntegral<uint64_t>(unit ? str.substr(0, str.size() - 1) : str);
426      if (!parsed_num || parsed_num > std::numeric_limits<uint64_t>::max() / unit_amount) { // check overflow
427          return std::nullopt;
428      }
429      return *parsed_num * unit_amount;
430  }