streams.h
1 // Copyright (c) 2009-2010 Satoshi Nakamoto 2 // Copyright (c) 2009-2022 The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #ifndef BITCOIN_STREAMS_H 7 #define BITCOIN_STREAMS_H 8 9 #include <serialize.h> 10 #include <span.h> 11 #include <support/allocators/zeroafterfree.h> 12 #include <util/overflow.h> 13 14 #include <algorithm> 15 #include <assert.h> 16 #include <cstddef> 17 #include <cstdio> 18 #include <ios> 19 #include <limits> 20 #include <optional> 21 #include <stdint.h> 22 #include <string.h> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 27 namespace util { 28 inline void Xor(Span<std::byte> write, Span<const std::byte> key, size_t key_offset = 0) 29 { 30 if (key.size() == 0) { 31 return; 32 } 33 key_offset %= key.size(); 34 35 for (size_t i = 0, j = key_offset; i != write.size(); i++) { 36 write[i] ^= key[j++]; 37 38 // This potentially acts on very many bytes of data, so it's 39 // important that we calculate `j`, i.e. the `key` index in this 40 // way instead of doing a %, which would effectively be a division 41 // for each byte Xor'd -- much slower than need be. 42 if (j == key.size()) 43 j = 0; 44 } 45 } 46 } // namespace util 47 48 /* Minimal stream for overwriting and/or appending to an existing byte vector 49 * 50 * The referenced vector will grow as necessary 51 */ 52 class VectorWriter 53 { 54 public: 55 /* 56 * @param[in] vchDataIn Referenced byte vector to overwrite/append 57 * @param[in] nPosIn Starting position. Vector index where writes should start. The vector will initially 58 * grow as necessary to max(nPosIn, vec.size()). So to append, use vec.size(). 59 */ 60 VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn) : vchData{vchDataIn}, nPos{nPosIn} 61 { 62 if(nPos > vchData.size()) 63 vchData.resize(nPos); 64 } 65 /* 66 * (other params same as above) 67 * @param[in] args A list of items to serialize starting at nPosIn. 68 */ 69 template <typename... Args> 70 VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn, Args&&... args) : VectorWriter{vchDataIn, nPosIn} 71 { 72 ::SerializeMany(*this, std::forward<Args>(args)...); 73 } 74 void write(Span<const std::byte> src) 75 { 76 assert(nPos <= vchData.size()); 77 size_t nOverwrite = std::min(src.size(), vchData.size() - nPos); 78 if (nOverwrite) { 79 memcpy(vchData.data() + nPos, src.data(), nOverwrite); 80 } 81 if (nOverwrite < src.size()) { 82 vchData.insert(vchData.end(), UCharCast(src.data()) + nOverwrite, UCharCast(src.end())); 83 } 84 nPos += src.size(); 85 } 86 template <typename T> 87 VectorWriter& operator<<(const T& obj) 88 { 89 ::Serialize(*this, obj); 90 return (*this); 91 } 92 93 private: 94 std::vector<unsigned char>& vchData; 95 size_t nPos; 96 }; 97 98 /** Minimal stream for reading from an existing byte array by Span. 99 */ 100 class SpanReader 101 { 102 private: 103 Span<const unsigned char> m_data; 104 105 public: 106 /** 107 * @param[in] data Referenced byte vector to overwrite/append 108 */ 109 explicit SpanReader(Span<const unsigned char> data) : m_data{data} {} 110 111 template<typename T> 112 SpanReader& operator>>(T&& obj) 113 { 114 ::Unserialize(*this, obj); 115 return (*this); 116 } 117 118 size_t size() const { return m_data.size(); } 119 bool empty() const { return m_data.empty(); } 120 121 void read(Span<std::byte> dst) 122 { 123 if (dst.size() == 0) { 124 return; 125 } 126 127 // Read from the beginning of the buffer 128 if (dst.size() > m_data.size()) { 129 throw std::ios_base::failure("SpanReader::read(): end of data"); 130 } 131 memcpy(dst.data(), m_data.data(), dst.size()); 132 m_data = m_data.subspan(dst.size()); 133 } 134 135 void ignore(size_t n) 136 { 137 m_data = m_data.subspan(n); 138 } 139 }; 140 141 /** Double ended buffer combining vector and stream-like interfaces. 142 * 143 * >> and << read and write unformatted data using the above serialization templates. 144 * Fills with data in linear time; some stringstream implementations take N^2 time. 145 */ 146 class DataStream 147 { 148 protected: 149 using vector_type = SerializeData; 150 vector_type vch; 151 vector_type::size_type m_read_pos{0}; 152 153 public: 154 typedef vector_type::allocator_type allocator_type; 155 typedef vector_type::size_type size_type; 156 typedef vector_type::difference_type difference_type; 157 typedef vector_type::reference reference; 158 typedef vector_type::const_reference const_reference; 159 typedef vector_type::value_type value_type; 160 typedef vector_type::iterator iterator; 161 typedef vector_type::const_iterator const_iterator; 162 typedef vector_type::reverse_iterator reverse_iterator; 163 164 explicit DataStream() {} 165 explicit DataStream(Span<const uint8_t> sp) : DataStream{AsBytes(sp)} {} 166 explicit DataStream(Span<const value_type> sp) : vch(sp.data(), sp.data() + sp.size()) {} 167 168 std::string str() const 169 { 170 return std::string{UCharCast(data()), UCharCast(data() + size())}; 171 } 172 173 174 // 175 // Vector subset 176 // 177 const_iterator begin() const { return vch.begin() + m_read_pos; } 178 iterator begin() { return vch.begin() + m_read_pos; } 179 const_iterator end() const { return vch.end(); } 180 iterator end() { return vch.end(); } 181 size_type size() const { return vch.size() - m_read_pos; } 182 bool empty() const { return vch.size() == m_read_pos; } 183 void resize(size_type n, value_type c = value_type{}) { vch.resize(n + m_read_pos, c); } 184 void reserve(size_type n) { vch.reserve(n + m_read_pos); } 185 const_reference operator[](size_type pos) const { return vch[pos + m_read_pos]; } 186 reference operator[](size_type pos) { return vch[pos + m_read_pos]; } 187 void clear() { vch.clear(); m_read_pos = 0; } 188 value_type* data() { return vch.data() + m_read_pos; } 189 const value_type* data() const { return vch.data() + m_read_pos; } 190 191 inline void Compact() 192 { 193 vch.erase(vch.begin(), vch.begin() + m_read_pos); 194 m_read_pos = 0; 195 } 196 197 bool Rewind(std::optional<size_type> n = std::nullopt) 198 { 199 // Total rewind if no size is passed 200 if (!n) { 201 m_read_pos = 0; 202 return true; 203 } 204 // Rewind by n characters if the buffer hasn't been compacted yet 205 if (*n > m_read_pos) 206 return false; 207 m_read_pos -= *n; 208 return true; 209 } 210 211 212 // 213 // Stream subset 214 // 215 bool eof() const { return size() == 0; } 216 int in_avail() const { return size(); } 217 218 void read(Span<value_type> dst) 219 { 220 if (dst.size() == 0) return; 221 222 // Read from the beginning of the buffer 223 auto next_read_pos{CheckedAdd(m_read_pos, dst.size())}; 224 if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) { 225 throw std::ios_base::failure("DataStream::read(): end of data"); 226 } 227 memcpy(dst.data(), &vch[m_read_pos], dst.size()); 228 if (next_read_pos.value() == vch.size()) { 229 m_read_pos = 0; 230 vch.clear(); 231 return; 232 } 233 m_read_pos = next_read_pos.value(); 234 } 235 236 void ignore(size_t num_ignore) 237 { 238 // Ignore from the beginning of the buffer 239 auto next_read_pos{CheckedAdd(m_read_pos, num_ignore)}; 240 if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) { 241 throw std::ios_base::failure("DataStream::ignore(): end of data"); 242 } 243 if (next_read_pos.value() == vch.size()) { 244 m_read_pos = 0; 245 vch.clear(); 246 return; 247 } 248 m_read_pos = next_read_pos.value(); 249 } 250 251 void write(Span<const value_type> src) 252 { 253 // Write to the end of the buffer 254 vch.insert(vch.end(), src.begin(), src.end()); 255 } 256 257 template<typename T> 258 DataStream& operator<<(const T& obj) 259 { 260 ::Serialize(*this, obj); 261 return (*this); 262 } 263 264 template<typename T> 265 DataStream& operator>>(T&& obj) 266 { 267 ::Unserialize(*this, obj); 268 return (*this); 269 } 270 271 /** 272 * XOR the contents of this stream with a certain key. 273 * 274 * @param[in] key The key used to XOR the data in this stream. 275 */ 276 void Xor(const std::vector<unsigned char>& key) 277 { 278 util::Xor(MakeWritableByteSpan(*this), MakeByteSpan(key)); 279 } 280 }; 281 282 template <typename IStream> 283 class BitStreamReader 284 { 285 private: 286 IStream& m_istream; 287 288 /// Buffered byte read in from the input stream. A new byte is read into the 289 /// buffer when m_offset reaches 8. 290 uint8_t m_buffer{0}; 291 292 /// Number of high order bits in m_buffer already returned by previous 293 /// Read() calls. The next bit to be returned is at this offset from the 294 /// most significant bit position. 295 int m_offset{8}; 296 297 public: 298 explicit BitStreamReader(IStream& istream) : m_istream(istream) {} 299 300 /** Read the specified number of bits from the stream. The data is returned 301 * in the nbits least significant bits of a 64-bit uint. 302 */ 303 uint64_t Read(int nbits) { 304 if (nbits < 0 || nbits > 64) { 305 throw std::out_of_range("nbits must be between 0 and 64"); 306 } 307 308 uint64_t data = 0; 309 while (nbits > 0) { 310 if (m_offset == 8) { 311 m_istream >> m_buffer; 312 m_offset = 0; 313 } 314 315 int bits = std::min(8 - m_offset, nbits); 316 data <<= bits; 317 data |= static_cast<uint8_t>(m_buffer << m_offset) >> (8 - bits); 318 m_offset += bits; 319 nbits -= bits; 320 } 321 return data; 322 } 323 }; 324 325 template <typename OStream> 326 class BitStreamWriter 327 { 328 private: 329 OStream& m_ostream; 330 331 /// Buffered byte waiting to be written to the output stream. The byte is 332 /// written buffer when m_offset reaches 8 or Flush() is called. 333 uint8_t m_buffer{0}; 334 335 /// Number of high order bits in m_buffer already written by previous 336 /// Write() calls and not yet flushed to the stream. The next bit to be 337 /// written to is at this offset from the most significant bit position. 338 int m_offset{0}; 339 340 public: 341 explicit BitStreamWriter(OStream& ostream) : m_ostream(ostream) {} 342 343 ~BitStreamWriter() 344 { 345 Flush(); 346 } 347 348 /** Write the nbits least significant bits of a 64-bit int to the output 349 * stream. Data is buffered until it completes an octet. 350 */ 351 void Write(uint64_t data, int nbits) { 352 if (nbits < 0 || nbits > 64) { 353 throw std::out_of_range("nbits must be between 0 and 64"); 354 } 355 356 while (nbits > 0) { 357 int bits = std::min(8 - m_offset, nbits); 358 m_buffer |= (data << (64 - nbits)) >> (64 - 8 + m_offset); 359 m_offset += bits; 360 nbits -= bits; 361 362 if (m_offset == 8) { 363 Flush(); 364 } 365 } 366 } 367 368 /** Flush any unwritten bits to the output stream, padding with 0's to the 369 * next byte boundary. 370 */ 371 void Flush() { 372 if (m_offset == 0) { 373 return; 374 } 375 376 m_ostream << m_buffer; 377 m_buffer = 0; 378 m_offset = 0; 379 } 380 }; 381 382 /** Non-refcounted RAII wrapper for FILE* 383 * 384 * Will automatically close the file when it goes out of scope if not null. 385 * If you're returning the file pointer, return file.release(). 386 * If you need to close the file early, use file.fclose() instead of fclose(file). 387 */ 388 class AutoFile 389 { 390 protected: 391 std::FILE* m_file; 392 std::vector<std::byte> m_xor; 393 394 public: 395 explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={}) : m_file{file}, m_xor{std::move(data_xor)} {} 396 397 ~AutoFile() { fclose(); } 398 399 // Disallow copies 400 AutoFile(const AutoFile&) = delete; 401 AutoFile& operator=(const AutoFile&) = delete; 402 403 bool feof() const { return std::feof(m_file); } 404 405 int fclose() 406 { 407 if (auto rel{release()}) return std::fclose(rel); 408 return 0; 409 } 410 411 /** Get wrapped FILE* with transfer of ownership. 412 * @note This will invalidate the AutoFile object, and makes it the responsibility of the caller 413 * of this function to clean up the returned FILE*. 414 */ 415 std::FILE* release() 416 { 417 std::FILE* ret{m_file}; 418 m_file = nullptr; 419 return ret; 420 } 421 422 /** Get wrapped FILE* without transfer of ownership. 423 * @note Ownership of the FILE* will remain with this class. Use this only if the scope of the 424 * AutoFile outlives use of the passed pointer. 425 */ 426 std::FILE* Get() const { return m_file; } 427 428 /** Return true if the wrapped FILE* is nullptr, false otherwise. 429 */ 430 bool IsNull() const { return m_file == nullptr; } 431 432 /** Continue with a different XOR key */ 433 void SetXor(std::vector<std::byte> data_xor) { m_xor = data_xor; } 434 435 /** Implementation detail, only used internally. */ 436 std::size_t detail_fread(Span<std::byte> dst); 437 438 // 439 // Stream subset 440 // 441 void read(Span<std::byte> dst); 442 void ignore(size_t nSize); 443 void write(Span<const std::byte> src); 444 445 template <typename T> 446 AutoFile& operator<<(const T& obj) 447 { 448 ::Serialize(*this, obj); 449 return *this; 450 } 451 452 template <typename T> 453 AutoFile& operator>>(T&& obj) 454 { 455 ::Unserialize(*this, obj); 456 return *this; 457 } 458 }; 459 460 /** Wrapper around an AutoFile& that implements a ring buffer to 461 * deserialize from. It guarantees the ability to rewind a given number of bytes. 462 * 463 * Will automatically close the file when it goes out of scope if not null. 464 * If you need to close the file early, use file.fclose() instead of fclose(file). 465 */ 466 class BufferedFile 467 { 468 private: 469 AutoFile& m_src; 470 uint64_t nSrcPos{0}; //!< how many bytes have been read from source 471 uint64_t m_read_pos{0}; //!< how many bytes have been read from this 472 uint64_t nReadLimit; //!< up to which position we're allowed to read 473 uint64_t nRewind; //!< how many bytes we guarantee to rewind 474 std::vector<std::byte> vchBuf; //!< the buffer 475 476 //! read data from the source to fill the buffer 477 bool Fill() { 478 unsigned int pos = nSrcPos % vchBuf.size(); 479 unsigned int readNow = vchBuf.size() - pos; 480 unsigned int nAvail = vchBuf.size() - (nSrcPos - m_read_pos) - nRewind; 481 if (nAvail < readNow) 482 readNow = nAvail; 483 if (readNow == 0) 484 return false; 485 size_t nBytes{m_src.detail_fread(Span{vchBuf}.subspan(pos, readNow))}; 486 if (nBytes == 0) { 487 throw std::ios_base::failure{m_src.feof() ? "BufferedFile::Fill: end of file" : "BufferedFile::Fill: fread failed"}; 488 } 489 nSrcPos += nBytes; 490 return true; 491 } 492 493 //! Advance the stream's read pointer (m_read_pos) by up to 'length' bytes, 494 //! filling the buffer from the file so that at least one byte is available. 495 //! Return a pointer to the available buffer data and the number of bytes 496 //! (which may be less than the requested length) that may be accessed 497 //! beginning at that pointer. 498 std::pair<std::byte*, size_t> AdvanceStream(size_t length) 499 { 500 assert(m_read_pos <= nSrcPos); 501 if (m_read_pos + length > nReadLimit) { 502 throw std::ios_base::failure("Attempt to position past buffer limit"); 503 } 504 // If there are no bytes available, read from the file. 505 if (m_read_pos == nSrcPos && length > 0) Fill(); 506 507 size_t buffer_offset{static_cast<size_t>(m_read_pos % vchBuf.size())}; 508 size_t buffer_available{static_cast<size_t>(vchBuf.size() - buffer_offset)}; 509 size_t bytes_until_source_pos{static_cast<size_t>(nSrcPos - m_read_pos)}; 510 size_t advance{std::min({length, buffer_available, bytes_until_source_pos})}; 511 m_read_pos += advance; 512 return std::make_pair(&vchBuf[buffer_offset], advance); 513 } 514 515 public: 516 BufferedFile(AutoFile& file, uint64_t nBufSize, uint64_t nRewindIn) 517 : m_src{file}, nReadLimit{std::numeric_limits<uint64_t>::max()}, nRewind{nRewindIn}, vchBuf(nBufSize, std::byte{0}) 518 { 519 if (nRewindIn >= nBufSize) 520 throw std::ios_base::failure("Rewind limit must be less than buffer size"); 521 } 522 523 //! check whether we're at the end of the source file 524 bool eof() const { 525 return m_read_pos == nSrcPos && m_src.feof(); 526 } 527 528 //! read a number of bytes 529 void read(Span<std::byte> dst) 530 { 531 while (dst.size() > 0) { 532 auto [buffer_pointer, length]{AdvanceStream(dst.size())}; 533 memcpy(dst.data(), buffer_pointer, length); 534 dst = dst.subspan(length); 535 } 536 } 537 538 //! Move the read position ahead in the stream to the given position. 539 //! Use SetPos() to back up in the stream, not SkipTo(). 540 void SkipTo(const uint64_t file_pos) 541 { 542 assert(file_pos >= m_read_pos); 543 while (m_read_pos < file_pos) AdvanceStream(file_pos - m_read_pos); 544 } 545 546 //! return the current reading position 547 uint64_t GetPos() const { 548 return m_read_pos; 549 } 550 551 //! rewind to a given reading position 552 bool SetPos(uint64_t nPos) { 553 size_t bufsize = vchBuf.size(); 554 if (nPos + bufsize < nSrcPos) { 555 // rewinding too far, rewind as far as possible 556 m_read_pos = nSrcPos - bufsize; 557 return false; 558 } 559 if (nPos > nSrcPos) { 560 // can't go this far forward, go as far as possible 561 m_read_pos = nSrcPos; 562 return false; 563 } 564 m_read_pos = nPos; 565 return true; 566 } 567 568 //! prevent reading beyond a certain position 569 //! no argument removes the limit 570 bool SetLimit(uint64_t nPos = std::numeric_limits<uint64_t>::max()) { 571 if (nPos < m_read_pos) 572 return false; 573 nReadLimit = nPos; 574 return true; 575 } 576 577 template<typename T> 578 BufferedFile& operator>>(T&& obj) { 579 ::Unserialize(*this, obj); 580 return (*this); 581 } 582 583 //! search for a given byte in the stream, and remain positioned on it 584 void FindByte(std::byte byte) 585 { 586 // For best performance, avoid mod operation within the loop. 587 size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))}; 588 while (true) { 589 if (m_read_pos == nSrcPos) { 590 // No more bytes available; read from the file into the buffer, 591 // setting nSrcPos to one beyond the end of the new data. 592 // Throws exception if end-of-file reached. 593 Fill(); 594 } 595 const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)}; 596 const auto it_start{vchBuf.begin() + buf_offset}; 597 const auto it_find{std::find(it_start, it_start + len, byte)}; 598 const size_t inc{size_t(std::distance(it_start, it_find))}; 599 m_read_pos += inc; 600 if (inc < len) break; 601 buf_offset += inc; 602 if (buf_offset >= vchBuf.size()) buf_offset = 0; 603 } 604 } 605 }; 606 607 #endif // BITCOIN_STREAMS_H