string.h
1 // Copyright (c) 2019-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #ifndef BITCOIN_UTIL_STRING_H 6 #define BITCOIN_UTIL_STRING_H 7 8 #include <algorithm> 9 #include <array> 10 #include <cstddef> 11 #include <cstdint> 12 #include <initializer_list> 13 #include <locale> 14 #include <optional> 15 #include <span> 16 #include <sstream> 17 #include <string> 18 #include <string_view> 19 #include <vector> 20 21 namespace util { 22 namespace detail { 23 template <unsigned num_params> 24 constexpr static void CheckNumFormatSpecifiers(const char* str) 25 { 26 unsigned count_normal{0}; // Number of "normal" specifiers, like %s 27 unsigned count_pos{0}; // Max number in positional specifier, like %8$s 28 for (auto it{str}; *it != '\0'; ++it) { 29 if (*it != '%' || *++it == '%') continue; // Skip escaped %% 30 31 auto add_arg = [&] { 32 unsigned maybe_num{0}; 33 while ('0' <= *it && *it <= '9') { 34 maybe_num *= 10; 35 maybe_num += *it - '0'; 36 ++it; 37 } 38 39 if (*it == '$') { 40 ++it; 41 // Positional specifier, like %8$s 42 if (maybe_num == 0) throw "Positional format specifier must have position of at least 1"; 43 count_pos = std::max(count_pos, maybe_num); 44 } else { 45 // Non-positional specifier, like %s 46 ++count_normal; 47 } 48 }; 49 50 // Increase argument count and consume positional specifier, if present. 51 add_arg(); 52 53 // Consume flags. 54 while (*it == '#' || *it == '0' || *it == '-' || *it == ' ' || *it == '+') ++it; 55 56 auto parse_size = [&] { 57 if (*it == '*') { 58 ++it; 59 add_arg(); 60 } else { 61 while ('0' <= *it && *it <= '9') ++it; 62 } 63 }; 64 65 // Consume dynamic or static width value. 66 parse_size(); 67 68 // Consume dynamic or static precision value. 69 if (*it == '.') { 70 ++it; 71 parse_size(); 72 } 73 74 if (*it == '\0') throw "Format specifier incorrectly terminated by end of string"; 75 76 // Length and type in "[flags][width][.precision][length]type" 77 // is not checked. Parsing continues with the next '%'. 78 } 79 if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!"; 80 unsigned count{count_normal | count_pos}; 81 if (num_params != count) throw "Format specifier count must match the argument count!"; 82 } 83 } // namespace detail 84 85 /** 86 * @brief A wrapper for a compile-time partially validated format string 87 * 88 * This struct can be used to enforce partial compile-time validation of format 89 * strings, to reduce the likelihood of tinyformat throwing exceptions at 90 * run-time. Validation is partial to try and prevent the most common errors 91 * while avoiding re-implementing the entire parsing logic. 92 */ 93 template <unsigned num_params> 94 struct ConstevalFormatString { 95 const char* const fmt; 96 consteval ConstevalFormatString(const char* str) : fmt{str} { detail::CheckNumFormatSpecifiers<num_params>(fmt); } 97 }; 98 99 void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute); 100 101 /** Split a string on any char found in separators, returning a vector. 102 * 103 * If sep does not occur in sp, a singleton with the entirety of sp is returned. 104 * 105 * @param[in] include_sep Whether to include the separator at the end of the left side of the splits. 106 * 107 * Note that this function does not care about braces, so splitting 108 * "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}. 109 * 110 * If include_sep == true, splitting "foo(bar(1),2),3) on ',' 111 * will return: 112 * - foo(bar(1), 113 * - 2), 114 * - 3) 115 */ 116 template <typename T = std::span<const char>> 117 std::vector<T> Split(const std::span<const char>& sp, std::string_view separators, bool include_sep = false) 118 { 119 std::vector<T> ret; 120 auto it = sp.begin(); 121 auto start = it; 122 while (it != sp.end()) { 123 if (separators.find(*it) != std::string::npos) { 124 if (include_sep) { 125 ret.emplace_back(start, it + 1); 126 } else { 127 ret.emplace_back(start, it); 128 } 129 start = it + 1; 130 } 131 ++it; 132 } 133 ret.emplace_back(start, it); 134 return ret; 135 } 136 137 /** Split a string on every instance of sep, returning a vector. 138 * 139 * If sep does not occur in sp, a singleton with the entirety of sp is returned. 140 * 141 * Note that this function does not care about braces, so splitting 142 * "foo(bar(1),2),3) on ',' will return {"foo(bar(1)", "2)", "3)"}. 143 */ 144 template <typename T = std::span<const char>> 145 std::vector<T> Split(const std::span<const char>& sp, char sep, bool include_sep = false) 146 { 147 return Split<T>(sp, std::string_view{&sep, 1}, include_sep); 148 } 149 150 [[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, char sep) 151 { 152 return Split<std::string>(str, sep); 153 } 154 155 [[nodiscard]] inline std::vector<std::string> SplitString(std::string_view str, std::string_view separators) 156 { 157 return Split<std::string>(str, separators); 158 } 159 160 [[nodiscard]] inline std::string_view TrimStringView(std::string_view str, std::string_view pattern = " \f\n\r\t\v") 161 { 162 std::string::size_type front = str.find_first_not_of(pattern); 163 if (front == std::string::npos) { 164 return {}; 165 } 166 std::string::size_type end = str.find_last_not_of(pattern); 167 return str.substr(front, end - front + 1); 168 } 169 170 [[nodiscard]] inline std::string TrimString(std::string_view str, std::string_view pattern = " \f\n\r\t\v") 171 { 172 return std::string(TrimStringView(str, pattern)); 173 } 174 175 [[nodiscard]] inline std::string_view RemoveSuffixView(std::string_view str, std::string_view suffix) 176 { 177 if (str.ends_with(suffix)) { 178 return str.substr(0, str.size() - suffix.size()); 179 } 180 return str; 181 } 182 183 [[nodiscard]] inline std::string_view RemovePrefixView(std::string_view str, std::string_view prefix) 184 { 185 if (str.starts_with(prefix)) { 186 return str.substr(prefix.size()); 187 } 188 return str; 189 } 190 191 [[nodiscard]] inline std::string RemovePrefix(std::string_view str, std::string_view prefix) 192 { 193 return std::string(RemovePrefixView(str, prefix)); 194 } 195 196 /** 197 * Join all container items. Typically used to concatenate strings but accepts 198 * containers with elements of any type. 199 * 200 * @param container The items to join 201 * @param separator The separator 202 * @param unary_op Apply this operator to each item 203 */ 204 template <typename C, typename S, typename UnaryOp> 205 // NOLINTNEXTLINE(misc-no-recursion) 206 auto Join(const C& container, const S& separator, UnaryOp unary_op) 207 { 208 decltype(unary_op(*container.begin())) ret; 209 bool first{true}; 210 for (const auto& item : container) { 211 if (!first) ret += separator; 212 ret += unary_op(item); 213 first = false; 214 } 215 return ret; 216 } 217 218 template <typename C, typename S> 219 auto Join(const C& container, const S& separator) 220 { 221 return Join(container, separator, [](const auto& i) { return i; }); 222 } 223 224 /** 225 * Create an unordered multi-line list of items. 226 */ 227 inline std::string MakeUnorderedList(const std::vector<std::string>& items) 228 { 229 return Join(items, "\n", [](const std::string& item) { return "- " + item; }); 230 } 231 232 /** 233 * Check if a string does not contain any embedded NUL (\0) characters 234 */ 235 [[nodiscard]] inline bool ContainsNoNUL(std::string_view str) noexcept 236 { 237 for (auto c : str) { 238 if (c == 0) return false; 239 } 240 return true; 241 } 242 243 /** 244 * Locale-independent version of std::to_string 245 */ 246 template <typename T> 247 std::string ToString(const T& t) 248 { 249 std::ostringstream oss; 250 oss.imbue(std::locale::classic()); 251 oss << t; 252 return oss.str(); 253 } 254 255 /** 256 * Check whether a container begins with the given prefix. 257 */ 258 template <typename T1, size_t PREFIX_LEN> 259 [[nodiscard]] inline bool HasPrefix(const T1& obj, 260 const std::array<uint8_t, PREFIX_LEN>& prefix) 261 { 262 return obj.size() >= PREFIX_LEN && 263 std::equal(std::begin(prefix), std::end(prefix), std::begin(obj)); 264 } 265 266 struct LineReader { 267 const std::span<const std::byte>::iterator start; 268 const std::span<const std::byte>::iterator end; 269 const size_t max_line_length; 270 std::span<const std::byte>::iterator it; 271 272 explicit LineReader(std::span<const std::byte> buffer, size_t max_line_length); 273 explicit LineReader(std::string_view str, size_t max_line_length) : LineReader{std::as_bytes(std::span{str}), max_line_length} {} 274 275 /** 276 * Returns a string from current iterator position up to (but not including) next \n 277 * and advances iterator to the character following the \n on success. 278 * Will not return a line longer than max_line_length. 279 * @returns the next string from the buffer. 280 * std::nullopt if end of buffer is reached without finding a \n. 281 * @throws a std::runtime_error if max_line_length + 1 bytes are read without finding \n. 282 */ 283 std::optional<std::string> ReadLine(); 284 285 /** 286 * Returns string from current iterator position of specified length 287 * if possible and advances iterator on success. 288 * May exceed max_line_length but will not read past end of buffer. 289 * @param[in] len The number of bytes to read from the buffer 290 * @returns a string of the expected length. 291 * @throws a std::runtime_error if there is not enough data in the buffer. 292 */ 293 std::string ReadLength(size_t len); 294 295 /** 296 * Returns remaining size of bytes in buffer 297 */ 298 size_t Remaining() const; 299 300 /** 301 * Returns number of bytes already read from buffer 302 */ 303 size_t Consumed() const; 304 }; 305 } // namespace util 306 307 #endif // BITCOIN_UTIL_STRING_H