/ src / util / string.h
string.h
  1  // Copyright (c) 2019-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #ifndef BITCOIN_UTIL_STRING_H
  6  #define BITCOIN_UTIL_STRING_H
  7  
  8  #include <algorithm>
  9  #include <array>
 10  #include <cstddef>
 11  #include <cstdint>
 12  #include <initializer_list>
 13  #include <locale>
 14  #include <optional>
 15  #include <span>
 16  #include <sstream>
 17  #include <string>
 18  #include <string_view>
 19  #include <vector>
 20  
 21  namespace util {
 22  namespace detail {
 23  template <unsigned num_params>
 24  constexpr static void CheckNumFormatSpecifiers(const char* str)
 25  {
 26      unsigned count_normal{0}; // Number of "normal" specifiers, like %s
 27      unsigned count_pos{0};    // Max number in positional specifier, like %8$s
 28      for (auto it{str}; *it != '\0'; ++it) {
 29          if (*it != '%' || *++it == '%') continue; // Skip escaped %%
 30  
 31          auto add_arg = [&] {
 32              unsigned maybe_num{0};
 33              while ('0' <= *it && *it <= '9') {
 34                  maybe_num *= 10;
 35                  maybe_num += *it - '0';
 36                  ++it;
 37              }
 38  
 39              if (*it == '$') {
 40                  ++it;
 41                  // Positional specifier, like %8$s
 42                  if (maybe_num == 0) throw "Positional format specifier must have position of at least 1";
 43                  count_pos = std::max(count_pos, maybe_num);
 44              } else {
 45                  // Non-positional specifier, like %s
 46                  ++count_normal;
 47              }
 48          };
 49  
 50          // Increase argument count and consume positional specifier, if present.
 51          add_arg();
 52  
 53          // Consume flags.
 54          while (*it == '#' || *it == '0' || *it == '-' || *it == ' ' || *it == '+') ++it;
 55  
 56          auto parse_size = [&] {
 57              if (*it == '*') {
 58                  ++it;
 59                  add_arg();
 60              } else {
 61                  while ('0' <= *it && *it <= '9') ++it;
 62              }
 63          };
 64  
 65          // Consume dynamic or static width value.
 66          parse_size();
 67  
 68          // Consume dynamic or static precision value.
 69          if (*it == '.') {
 70              ++it;
 71              parse_size();
 72          }
 73  
 74          if (*it == '\0') throw "Format specifier incorrectly terminated by end of string";
 75  
 76          // Length and type in "[flags][width][.precision][length]type"
 77          // is not checked. Parsing continues with the next '%'.
 78      }
 79      if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!";
 80      unsigned count{count_normal | count_pos};
 81      if (num_params != count) throw "Format specifier count must match the argument count!";
 82  }
 83  } // namespace detail
 84  
 85  /**
 86   * @brief A wrapper for a compile-time partially validated format string
 87   *
 88   * This struct can be used to enforce partial compile-time validation of format
 89   * strings, to reduce the likelihood of tinyformat throwing exceptions at
 90   * run-time. Validation is partial to try and prevent the most common errors
 91   * while avoiding re-implementing the entire parsing logic.
 92   */
 93  template <unsigned num_params>
 94  struct ConstevalFormatString {
 95      const char* const fmt;
 96      consteval ConstevalFormatString(const char* str) : fmt{str} { detail::CheckNumFormatSpecifiers<num_params>(fmt); }
 97  };
 98  
 99  void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute);
100  
101  /** Split a string on any char found in separators, returning a vector.
102   *
103   * If sep does not occur in sp, a singleton with the entirety of sp is returned.
104   *
105   * @param[in] include_sep Whether to include the separator at the end of the left side of the splits.
106   *
107   * Note that this function does not care about braces, so splitting
108   * "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
109   *
110   * If include_sep == true, splitting "foo(bar(1),2),3) on ','
111   * will return:
112   *  - foo(bar(1),
113   *  - 2),
114   *  - 3)
115   */
116  template <typename T = std::span<const char>>
117  std::vector<T> Split(const std::span<const char>& sp, std::string_view separators, bool include_sep = false)
118  {
119      std::vector<T> ret;
120      auto it = sp.begin();
121      auto start = it;
122      while (it != sp.end()) {
123          if (separators.find(*it) != std::string::npos) {
124              if (include_sep) {
125                  ret.emplace_back(start, it + 1);
126              } else {
127                  ret.emplace_back(start, it);
128              }
129              start = it + 1;
130          }
131          ++it;
132      }
133      ret.emplace_back(start, it);
134      return ret;
135  }
136  
137  /** Split a string on every instance of sep, returning a vector.
138   *
139   * If sep does not occur in sp, a singleton with the entirety of sp is returned.
140   *
141   * Note that this function does not care about braces, so splitting
142   * "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}.
143   */
144  template <typename T = std::span<const char>>
145  std::vector<T> Split(const std::span<const char>& sp, char sep, bool include_sep = false)
146  {
147      return Split<T>(sp, std::string_view{&sep, 1}, include_sep);
148  }
149  
150  [[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, char sep)
151  {
152      return Split<std::string>(str, sep);
153  }
154  
155  [[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, std::string_view separators)
156  {
157      return Split<std::string>(str, separators);
158  }
159  
160  [[nodiscard]] inline std::string_view TrimStringView(std::string_view str, std::string_view pattern = " \f\n\r\t\v")
161  {
162      std::string::size_type front = str.find_first_not_of(pattern);
163      if (front == std::string::npos) {
164          return {};
165      }
166      std::string::size_type end = str.find_last_not_of(pattern);
167      return str.substr(front, end - front + 1);
168  }
169  
170  [[nodiscard]] inline std::string TrimString(std::string_view str, std::string_view pattern = " \f\n\r\t\v")
171  {
172      return std::string(TrimStringView(str, pattern));
173  }
174  
175  [[nodiscard]] inline std::string_view RemoveSuffixView(std::string_view str, std::string_view suffix)
176  {
177      if (str.ends_with(suffix)) {
178          return str.substr(0, str.size() - suffix.size());
179      }
180      return str;
181  }
182  
183  [[nodiscard]] inline std::string_view RemovePrefixView(std::string_view str, std::string_view prefix)
184  {
185      if (str.starts_with(prefix)) {
186          return str.substr(prefix.size());
187      }
188      return str;
189  }
190  
191  [[nodiscard]] inline std::string RemovePrefix(std::string_view str, std::string_view prefix)
192  {
193      return std::string(RemovePrefixView(str, prefix));
194  }
195  
196  /**
197   * Join all container items. Typically used to concatenate strings but accepts
198   * containers with elements of any type.
199   *
200   * @param container The items to join
201   * @param separator The separator
202   * @param unary_op  Apply this operator to each item
203   */
204  template <typename C, typename S, typename UnaryOp>
205  // NOLINTNEXTLINE(misc-no-recursion)
206  auto Join(const C& container, const S& separator, UnaryOp unary_op)
207  {
208      decltype(unary_op(*container.begin())) ret;
209      bool first{true};
210      for (const auto& item : container) {
211          if (!first) ret += separator;
212          ret += unary_op(item);
213          first = false;
214      }
215      return ret;
216  }
217  
218  template <typename C, typename S>
219  auto Join(const C& container, const S& separator)
220  {
221      return Join(container, separator, [](const auto& i) { return i; });
222  }
223  
224  /**
225   * Create an unordered multi-line list of items.
226   */
227  inline std::string MakeUnorderedList(const std::vector<std::string>& items)
228  {
229      return Join(items, "\n", [](const std::string& item) { return "- " + item; });
230  }
231  
232  /**
233   * Check if a string does not contain any embedded NUL (\0) characters
234   */
235  [[nodiscard]] inline bool ContainsNoNUL(std::string_view str) noexcept
236  {
237      for (auto c : str) {
238          if (c == 0) return false;
239      }
240      return true;
241  }
242  
243  /**
244   * Locale-independent version of std::to_string
245   */
246  template <typename T>
247  std::string ToString(const T& t)
248  {
249      std::ostringstream oss;
250      oss.imbue(std::locale::classic());
251      oss << t;
252      return oss.str();
253  }
254  
255  /**
256   * Check whether a container begins with the given prefix.
257   */
258  template <typename T1, size_t PREFIX_LEN>
259  [[nodiscard]] inline bool HasPrefix(const T1& obj,
260                                  const std::array<uint8_t, PREFIX_LEN>& prefix)
261  {
262      return obj.size() >= PREFIX_LEN &&
263             std::equal(std::begin(prefix), std::end(prefix), std::begin(obj));
264  }
265  
266  struct LineReader {
267      const std::span<const std::byte>::iterator start;
268      const std::span<const std::byte>::iterator end;
269      const size_t max_line_length;
270      std::span<const std::byte>::iterator it;
271  
272      explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length);
273      explicit LineReader(std::string_view str, size_t max_line_length) : LineReader{std::as_bytes(std::span{str}), max_line_length} {}
274  
275      /**
276       * Returns a string from current iterator position up to (but not including) next \n
277       * and advances iterator to the character following the \n on success.
278       * Will not return a line longer than max_line_length.
279       * @returns the next string from the buffer.
280       *          std::nullopt if end of buffer is reached without finding a \n.
281       * @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n.
282       */
283      std::optional<std::string> ReadLine();
284  
285      /**
286       * Returns string from current iterator position of specified length
287       * if possible and advances iterator on success.
288       * May exceed max_line_length but will not read past end of buffer.
289       * @param[in]   len     The number of bytes to read from the buffer
290       * @returns a string of the expected length.
291       * @throws a std::runtime_error if there is not enough data in the buffer.
292       */
293      std::string ReadLength(size_t len);
294  
295      /**
296       * Returns remaining size of bytes in buffer
297       */
298      size_t Remaining() const;
299  
300      /**
301       * Returns number of bytes already read from buffer
302       */
303      size_t Consumed() const;
304  };
305  } // namespace util
306  
307  #endif // BITCOIN_UTIL_STRING_H