streams.h
1 // Copyright (c) 2009-2010 Satoshi Nakamoto 2 // Copyright (c) 2009-present The Bitcoin Core developers 3 // Distributed under the MIT software license, see the accompanying 4 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 5 6 #ifndef BITCOIN_STREAMS_H 7 #define BITCOIN_STREAMS_H 8 9 #include <serialize.h> 10 #include <span.h> 11 #include <support/allocators/zeroafterfree.h> 12 #include <util/check.h> 13 #include <util/log.h> 14 #include <util/obfuscation.h> 15 #include <util/overflow.h> 16 #include <util/syserror.h> 17 18 #include <algorithm> 19 #include <cassert> 20 #include <cstddef> 21 #include <cstdint> 22 #include <cstdio> 23 #include <cstring> 24 #include <ios> 25 #include <limits> 26 #include <optional> 27 #include <string> 28 #include <vector> 29 30 /* Minimal stream for overwriting and/or appending to an existing byte vector 31 * 32 * The referenced vector will grow as necessary 33 */ 34 class VectorWriter 35 { 36 public: 37 /* 38 * @param[in] vchDataIn Referenced byte vector to overwrite/append 39 * @param[in] nPosIn Starting position. Vector index where writes should start. The vector will initially 40 * grow as necessary to max(nPosIn, vec.size()). So to append, use vec.size(). 41 */ 42 VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn) : vchData{vchDataIn}, nPos{nPosIn} 43 { 44 if(nPos > vchData.size()) 45 vchData.resize(nPos); 46 } 47 /* 48 * (other params same as above) 49 * @param[in] args A list of items to serialize starting at nPosIn. 50 */ 51 template <typename... Args> 52 VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn, Args&&... args) : VectorWriter{vchDataIn, nPosIn} 53 { 54 ::SerializeMany(*this, std::forward<Args>(args)...); 55 } 56 void write(std::span<const std::byte> src) 57 { 58 assert(nPos <= vchData.size()); 59 size_t nOverwrite = std::min(src.size(), vchData.size() - nPos); 60 if (nOverwrite) { 61 memcpy(vchData.data() + nPos, src.data(), nOverwrite); 62 } 63 if (nOverwrite < src.size()) { 64 vchData.insert(vchData.end(), UCharCast(src.data()) + nOverwrite, UCharCast(src.data() + src.size())); 65 } 66 nPos += src.size(); 67 } 68 template <typename T> 69 VectorWriter& operator<<(const T& obj) 70 { 71 ::Serialize(*this, obj); 72 return (*this); 73 } 74 75 private: 76 std::vector<unsigned char>& vchData; 77 size_t nPos; 78 }; 79 80 /** Minimal stream for reading from an existing byte array by std::span. 81 */ 82 class SpanReader 83 { 84 private: 85 std::span<const std::byte> m_data; 86 87 public: 88 /** 89 * @param[in] data Referenced byte vector to overwrite/append 90 */ 91 explicit SpanReader(std::span<const unsigned char> data) : m_data{std::as_bytes(data)} {} 92 explicit SpanReader(std::span<const std::byte> data) : m_data{data} {} 93 94 template<typename T> 95 SpanReader& operator>>(T&& obj) 96 { 97 ::Unserialize(*this, obj); 98 return (*this); 99 } 100 101 size_t size() const { return m_data.size(); } 102 bool empty() const { return m_data.empty(); } 103 104 void read(std::span<std::byte> dst) 105 { 106 if (dst.size() == 0) { 107 return; 108 } 109 110 // Read from the beginning of the buffer 111 if (dst.size() > m_data.size()) { 112 throw std::ios_base::failure("SpanReader::read(): end of data"); 113 } 114 memcpy(dst.data(), m_data.data(), dst.size()); 115 m_data = m_data.subspan(dst.size()); 116 } 117 118 void ignore(size_t n) 119 { 120 if (n > m_data.size()) { 121 throw std::ios_base::failure("SpanReader::ignore(): end of data"); 122 } 123 m_data = m_data.subspan(n); 124 } 125 }; 126 127 /** Double ended buffer combining vector and stream-like interfaces. 128 * 129 * >> and << read and write unformatted data using the above serialization templates. 130 * Fills with data in linear time; some stringstream implementations take N^2 time. 131 */ 132 class DataStream 133 { 134 protected: 135 using vector_type = SerializeData; 136 vector_type vch; 137 vector_type::size_type m_read_pos{0}; 138 139 public: 140 typedef vector_type::allocator_type allocator_type; 141 typedef vector_type::size_type size_type; 142 typedef vector_type::difference_type difference_type; 143 typedef vector_type::reference reference; 144 typedef vector_type::const_reference const_reference; 145 typedef vector_type::value_type value_type; 146 typedef vector_type::iterator iterator; 147 typedef vector_type::const_iterator const_iterator; 148 typedef vector_type::reverse_iterator reverse_iterator; 149 150 explicit DataStream() = default; 151 explicit DataStream(std::span<const uint8_t> sp) : DataStream{std::as_bytes(sp)} {} 152 explicit DataStream(std::span<const value_type> sp) : vch(sp.data(), sp.data() + sp.size()) {} 153 154 std::string str() const 155 { 156 return std::string{UCharCast(data()), UCharCast(data() + size())}; 157 } 158 159 160 // 161 // Vector subset 162 // 163 const_iterator begin() const { return vch.begin() + m_read_pos; } 164 iterator begin() { return vch.begin() + m_read_pos; } 165 const_iterator end() const { return vch.end(); } 166 iterator end() { return vch.end(); } 167 size_type size() const { return vch.size() - m_read_pos; } 168 bool empty() const { return vch.size() == m_read_pos; } 169 void resize(size_type n, value_type c = value_type{}) { vch.resize(n + m_read_pos, c); } 170 void reserve(size_type n) { vch.reserve(n + m_read_pos); } 171 const_reference operator[](size_type pos) const { return vch[pos + m_read_pos]; } 172 reference operator[](size_type pos) { return vch[pos + m_read_pos]; } 173 void clear() { vch.clear(); m_read_pos = 0; } 174 value_type* data() { return vch.data() + m_read_pos; } 175 const value_type* data() const { return vch.data() + m_read_pos; } 176 177 inline void Compact() 178 { 179 vch.erase(vch.begin(), vch.begin() + m_read_pos); 180 m_read_pos = 0; 181 } 182 183 bool Rewind(std::optional<size_type> n = std::nullopt) 184 { 185 // Total rewind if no size is passed 186 if (!n) { 187 m_read_pos = 0; 188 return true; 189 } 190 // Rewind by n characters if the buffer hasn't been compacted yet 191 if (*n > m_read_pos) 192 return false; 193 m_read_pos -= *n; 194 return true; 195 } 196 197 198 // 199 // Stream subset 200 // 201 int in_avail() const { return size(); } 202 203 void read(std::span<value_type> dst) 204 { 205 if (dst.size() == 0) return; 206 207 // Read from the beginning of the buffer 208 auto next_read_pos{CheckedAdd(m_read_pos, dst.size())}; 209 if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) { 210 throw std::ios_base::failure("DataStream::read(): end of data"); 211 } 212 memcpy(dst.data(), &vch[m_read_pos], dst.size()); 213 if (next_read_pos.value() == vch.size()) { 214 m_read_pos = 0; 215 vch.clear(); 216 return; 217 } 218 m_read_pos = next_read_pos.value(); 219 } 220 221 void ignore(size_t num_ignore) 222 { 223 // Ignore from the beginning of the buffer 224 auto next_read_pos{CheckedAdd(m_read_pos, num_ignore)}; 225 if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) { 226 throw std::ios_base::failure("DataStream::ignore(): end of data"); 227 } 228 if (next_read_pos.value() == vch.size()) { 229 m_read_pos = 0; 230 vch.clear(); 231 return; 232 } 233 m_read_pos = next_read_pos.value(); 234 } 235 236 void write(std::span<const value_type> src) 237 { 238 // Write to the end of the buffer 239 vch.insert(vch.end(), src.begin(), src.end()); 240 } 241 242 template<typename T> 243 DataStream& operator<<(const T& obj) 244 { 245 ::Serialize(*this, obj); 246 return (*this); 247 } 248 249 template <typename T> 250 DataStream& operator>>(T&& obj) 251 { 252 ::Unserialize(*this, obj); 253 return (*this); 254 } 255 256 /** Compute total memory usage of this object (own memory + any dynamic memory). */ 257 size_t GetMemoryUsage() const noexcept; 258 }; 259 260 template <typename IStream> 261 class BitStreamReader 262 { 263 private: 264 IStream& m_istream; 265 266 /// Buffered byte read in from the input stream. A new byte is read into the 267 /// buffer when m_offset reaches 8. 268 uint8_t m_buffer{0}; 269 270 /// Number of high order bits in m_buffer already returned by previous 271 /// Read() calls. The next bit to be returned is at this offset from the 272 /// most significant bit position. 273 int m_offset{8}; 274 275 public: 276 explicit BitStreamReader(IStream& istream) : m_istream(istream) {} 277 278 /** Read the specified number of bits from the stream. The data is returned 279 * in the nbits least significant bits of a 64-bit uint. 280 */ 281 uint64_t Read(int nbits) { 282 if (nbits < 0 || nbits > 64) { 283 throw std::out_of_range("nbits must be between 0 and 64"); 284 } 285 286 uint64_t data = 0; 287 while (nbits > 0) { 288 if (m_offset == 8) { 289 m_istream >> m_buffer; 290 m_offset = 0; 291 } 292 293 int bits = std::min(8 - m_offset, nbits); 294 data <<= bits; 295 data |= static_cast<uint8_t>(m_buffer << m_offset) >> (8 - bits); 296 m_offset += bits; 297 nbits -= bits; 298 } 299 return data; 300 } 301 }; 302 303 template <typename OStream> 304 class BitStreamWriter 305 { 306 private: 307 OStream& m_ostream; 308 309 /// Buffered byte waiting to be written to the output stream. The byte is 310 /// written buffer when m_offset reaches 8 or Flush() is called. 311 uint8_t m_buffer{0}; 312 313 /// Number of high order bits in m_buffer already written by previous 314 /// Write() calls and not yet flushed to the stream. The next bit to be 315 /// written to is at this offset from the most significant bit position. 316 int m_offset{0}; 317 318 public: 319 explicit BitStreamWriter(OStream& ostream) : m_ostream(ostream) {} 320 321 ~BitStreamWriter() 322 { 323 Flush(); 324 } 325 326 /** Write the nbits least significant bits of a 64-bit int to the output 327 * stream. Data is buffered until it completes an octet. 328 */ 329 void Write(uint64_t data, int nbits) { 330 if (nbits < 0 || nbits > 64) { 331 throw std::out_of_range("nbits must be between 0 and 64"); 332 } 333 334 while (nbits > 0) { 335 int bits = std::min(8 - m_offset, nbits); 336 m_buffer |= (data << (64 - nbits)) >> (64 - 8 + m_offset); 337 m_offset += bits; 338 nbits -= bits; 339 340 if (m_offset == 8) { 341 Flush(); 342 } 343 } 344 } 345 346 /** Flush any unwritten bits to the output stream, padding with 0's to the 347 * next byte boundary. 348 */ 349 void Flush() { 350 if (m_offset == 0) { 351 return; 352 } 353 354 m_ostream << m_buffer; 355 m_buffer = 0; 356 m_offset = 0; 357 } 358 }; 359 360 /** Non-refcounted RAII wrapper for FILE* 361 * 362 * Will automatically close the file when it goes out of scope if not null. 363 * If you're returning the file pointer, return file.release(). 364 * If you need to close the file early, use autofile.fclose() instead of fclose(underlying_FILE). 365 * 366 * @note If the file has been written to, then the caller must close it 367 * explicitly with the `fclose()` method, check if it returns an error and treat 368 * such an error as if the `write()` method failed. The OS's `fclose(3)` may 369 * fail to flush to disk data that has been previously written, rendering the 370 * file corrupt. 371 */ 372 class AutoFile 373 { 374 protected: 375 std::FILE* m_file; 376 Obfuscation m_obfuscation; 377 std::optional<int64_t> m_position; 378 bool m_was_written{false}; 379 380 public: 381 explicit AutoFile(std::FILE* file, const Obfuscation& obfuscation = {}); 382 383 ~AutoFile() 384 { 385 if (m_was_written) { 386 // Callers that wrote to the file must have closed it explicitly 387 // with the fclose() method and checked that the close succeeded. 388 // This is because here in the destructor we have no way to signal 389 // errors from fclose() which, after write, could mean the file is 390 // corrupted and must be handled properly at the call site. 391 // Destructors in C++ cannot signal an error to the callers because 392 // they do not return a value and are not allowed to throw exceptions. 393 Assume(IsNull()); 394 } 395 396 if (fclose() != 0) { 397 LogError("Failed to close file: %s", SysErrorString(errno)); 398 } 399 } 400 401 // Disallow copies 402 AutoFile(const AutoFile&) = delete; 403 AutoFile& operator=(const AutoFile&) = delete; 404 405 bool feof() const { return std::feof(m_file); } 406 407 [[nodiscard]] int fclose() 408 { 409 if (auto rel{release()}) return std::fclose(rel); 410 return 0; 411 } 412 413 /** Get wrapped FILE* with transfer of ownership. 414 * @note This will invalidate the AutoFile object, and makes it the responsibility of the caller 415 * of this function to clean up the returned FILE*. 416 */ 417 std::FILE* release() 418 { 419 std::FILE* ret{m_file}; 420 m_file = nullptr; 421 return ret; 422 } 423 424 /** Return true if the wrapped FILE* is nullptr, false otherwise. 425 */ 426 bool IsNull() const { return m_file == nullptr; } 427 428 /** Continue with a different XOR key */ 429 void SetObfuscation(const Obfuscation& obfuscation) { m_obfuscation = obfuscation; } 430 431 /** Implementation detail, only used internally. */ 432 std::size_t detail_fread(std::span<std::byte> dst); 433 434 /** Wrapper around fseek(). Will throw if seeking is not possible. */ 435 void seek(int64_t offset, int origin); 436 437 /** Find position within the file. Will throw if unknown. */ 438 int64_t tell(); 439 440 /** Return the size of the file. Will throw if unknown. */ 441 int64_t size(); 442 443 /** Wrapper around FileCommit(). */ 444 bool Commit(); 445 446 /** Wrapper around TruncateFile(). */ 447 bool Truncate(unsigned size); 448 449 //! Write a mutable buffer more efficiently than write(), obfuscating the buffer in-place. 450 void write_buffer(std::span<std::byte> src); 451 452 // 453 // Stream subset 454 // 455 void read(std::span<std::byte> dst); 456 void ignore(size_t nSize); 457 void write(std::span<const std::byte> src); 458 459 template <typename T> 460 AutoFile& operator<<(const T& obj) 461 { 462 ::Serialize(*this, obj); 463 return *this; 464 } 465 466 template <typename T> 467 AutoFile& operator>>(T&& obj) 468 { 469 ::Unserialize(*this, obj); 470 return *this; 471 } 472 }; 473 474 using DataBuffer = std::vector<std::byte>; 475 476 /** Wrapper around an AutoFile& that implements a ring buffer to 477 * deserialize from. It guarantees the ability to rewind a given number of bytes. 478 * 479 * Will automatically close the file when it goes out of scope if not null. 480 * If you need to close the file early, use file.fclose() instead of fclose(file). 481 */ 482 class BufferedFile 483 { 484 private: 485 AutoFile& m_src; 486 uint64_t nSrcPos{0}; //!< how many bytes have been read from source 487 uint64_t m_read_pos{0}; //!< how many bytes have been read from this 488 uint64_t nReadLimit; //!< up to which position we're allowed to read 489 uint64_t nRewind; //!< how many bytes we guarantee to rewind 490 DataBuffer vchBuf; 491 492 //! read data from the source to fill the buffer 493 bool Fill() { 494 unsigned int pos = nSrcPos % vchBuf.size(); 495 unsigned int readNow = vchBuf.size() - pos; 496 unsigned int nAvail = vchBuf.size() - (nSrcPos - m_read_pos) - nRewind; 497 if (nAvail < readNow) 498 readNow = nAvail; 499 if (readNow == 0) 500 return false; 501 size_t nBytes{m_src.detail_fread(std::span{vchBuf}.subspan(pos, readNow))}; 502 if (nBytes == 0) { 503 throw std::ios_base::failure{m_src.feof() ? "BufferedFile::Fill: end of file" : "BufferedFile::Fill: fread failed"}; 504 } 505 nSrcPos += nBytes; 506 return true; 507 } 508 509 //! Advance the stream's read pointer (m_read_pos) by up to 'length' bytes, 510 //! filling the buffer from the file so that at least one byte is available. 511 //! Return a pointer to the available buffer data and the number of bytes 512 //! (which may be less than the requested length) that may be accessed 513 //! beginning at that pointer. 514 std::pair<std::byte*, size_t> AdvanceStream(size_t length) 515 { 516 assert(m_read_pos <= nSrcPos); 517 if (m_read_pos + length > nReadLimit) { 518 throw std::ios_base::failure("Attempt to position past buffer limit"); 519 } 520 // If there are no bytes available, read from the file. 521 if (m_read_pos == nSrcPos && length > 0) Fill(); 522 523 size_t buffer_offset{static_cast<size_t>(m_read_pos % vchBuf.size())}; 524 size_t buffer_available{static_cast<size_t>(vchBuf.size() - buffer_offset)}; 525 size_t bytes_until_source_pos{static_cast<size_t>(nSrcPos - m_read_pos)}; 526 size_t advance{std::min({length, buffer_available, bytes_until_source_pos})}; 527 m_read_pos += advance; 528 return std::make_pair(&vchBuf[buffer_offset], advance); 529 } 530 531 public: 532 BufferedFile(AutoFile& file LIFETIMEBOUND, uint64_t nBufSize, uint64_t nRewindIn) 533 : m_src{file}, nReadLimit{std::numeric_limits<uint64_t>::max()}, nRewind{nRewindIn}, vchBuf(nBufSize, std::byte{0}) 534 { 535 if (nRewindIn >= nBufSize) 536 throw std::ios_base::failure("Rewind limit must be less than buffer size"); 537 } 538 539 //! check whether we're at the end of the source file 540 bool eof() const { 541 return m_read_pos == nSrcPos && m_src.feof(); 542 } 543 544 //! read a number of bytes 545 void read(std::span<std::byte> dst) 546 { 547 while (dst.size() > 0) { 548 auto [buffer_pointer, length]{AdvanceStream(dst.size())}; 549 memcpy(dst.data(), buffer_pointer, length); 550 dst = dst.subspan(length); 551 } 552 } 553 554 //! Move the read position ahead in the stream to the given position. 555 //! Use SetPos() to back up in the stream, not SkipTo(). 556 void SkipTo(const uint64_t file_pos) 557 { 558 assert(file_pos >= m_read_pos); 559 while (m_read_pos < file_pos) AdvanceStream(file_pos - m_read_pos); 560 } 561 562 //! return the current reading position 563 uint64_t GetPos() const { 564 return m_read_pos; 565 } 566 567 //! rewind to a given reading position 568 bool SetPos(uint64_t nPos) { 569 size_t bufsize = vchBuf.size(); 570 if (nPos + bufsize < nSrcPos) { 571 // rewinding too far, rewind as far as possible 572 m_read_pos = nSrcPos - bufsize; 573 return false; 574 } 575 if (nPos > nSrcPos) { 576 // can't go this far forward, go as far as possible 577 m_read_pos = nSrcPos; 578 return false; 579 } 580 m_read_pos = nPos; 581 return true; 582 } 583 584 //! prevent reading beyond a certain position 585 //! no argument removes the limit 586 bool SetLimit(uint64_t nPos = std::numeric_limits<uint64_t>::max()) { 587 if (nPos < m_read_pos) 588 return false; 589 nReadLimit = nPos; 590 return true; 591 } 592 593 template<typename T> 594 BufferedFile& operator>>(T&& obj) { 595 ::Unserialize(*this, obj); 596 return (*this); 597 } 598 599 //! search for a given byte in the stream, and remain positioned on it 600 void FindByte(std::byte byte) 601 { 602 // For best performance, avoid mod operation within the loop. 603 size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))}; 604 while (true) { 605 if (m_read_pos == nSrcPos) { 606 // No more bytes available; read from the file into the buffer, 607 // setting nSrcPos to one beyond the end of the new data. 608 // Throws exception if end-of-file reached. 609 Fill(); 610 } 611 const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)}; 612 const auto it_start{vchBuf.begin() + buf_offset}; 613 const auto it_find{std::find(it_start, it_start + len, byte)}; 614 const size_t inc{size_t(std::distance(it_start, it_find))}; 615 m_read_pos += inc; 616 if (inc < len) break; 617 buf_offset += inc; 618 if (buf_offset >= vchBuf.size()) buf_offset = 0; 619 } 620 } 621 }; 622 623 /** 624 * Wrapper that buffers reads from an underlying stream. 625 * Requires underlying stream to support read() and detail_fread() calls 626 * to support fixed-size and variable-sized reads, respectively. 627 */ 628 template <typename S> 629 class BufferedReader 630 { 631 S& m_src; 632 DataBuffer m_buf; 633 size_t m_buf_pos; 634 635 public: 636 //! Requires stream ownership to prevent leaving the stream at an unexpected position after buffered reads. 637 explicit BufferedReader(S&& stream LIFETIMEBOUND, size_t size = 1 << 16) 638 requires std::is_rvalue_reference_v<S&&> 639 : m_src{stream}, m_buf(size), m_buf_pos{size} {} 640 641 void read(std::span<std::byte> dst) 642 { 643 if (const auto available{std::min(dst.size(), m_buf.size() - m_buf_pos)}) { 644 std::copy_n(m_buf.begin() + m_buf_pos, available, dst.begin()); 645 m_buf_pos += available; 646 dst = dst.subspan(available); 647 } 648 if (dst.size()) { 649 assert(m_buf_pos == m_buf.size()); 650 m_src.read(dst); 651 652 m_buf_pos = 0; 653 m_buf.resize(m_src.detail_fread(m_buf)); 654 } 655 } 656 657 template <typename T> 658 BufferedReader& operator>>(T&& obj) 659 { 660 Unserialize(*this, obj); 661 return *this; 662 } 663 }; 664 665 /** 666 * Wrapper that buffers writes to an underlying stream. 667 * Requires underlying stream to support write_buffer() method 668 * for efficient buffer flushing and obfuscation. 669 */ 670 template <typename S> 671 class BufferedWriter 672 { 673 S& m_dst; 674 DataBuffer m_buf; 675 size_t m_buf_pos{0}; 676 677 public: 678 explicit BufferedWriter(S& stream LIFETIMEBOUND, size_t size = 1 << 16) : m_dst{stream}, m_buf(size) {} 679 680 ~BufferedWriter() { flush(); } 681 682 void flush() 683 { 684 if (m_buf_pos) m_dst.write_buffer(std::span{m_buf}.first(m_buf_pos)); 685 m_buf_pos = 0; 686 } 687 688 void write(std::span<const std::byte> src) 689 { 690 while (const auto available{std::min(src.size(), m_buf.size() - m_buf_pos)}) { 691 std::copy_n(src.begin(), available, m_buf.begin() + m_buf_pos); 692 m_buf_pos += available; 693 if (m_buf_pos == m_buf.size()) flush(); 694 src = src.subspan(available); 695 } 696 } 697 698 template <typename T> 699 BufferedWriter& operator<<(const T& obj) 700 { 701 Serialize(*this, obj); 702 return *this; 703 } 704 }; 705 706 #endif // BITCOIN_STREAMS_H