/ src / streams.h
streams.h
  1  // Copyright (c) 2009-2010 Satoshi Nakamoto
  2  // Copyright (c) 2009-present The Bitcoin Core developers
  3  // Distributed under the MIT software license, see the accompanying
  4  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  #ifndef BITCOIN_STREAMS_H
  7  #define BITCOIN_STREAMS_H
  8  
  9  #include <serialize.h>
 10  #include <span.h>
 11  #include <support/allocators/zeroafterfree.h>
 12  #include <util/check.h>
 13  #include <util/log.h>
 14  #include <util/obfuscation.h>
 15  #include <util/overflow.h>
 16  #include <util/syserror.h>
 17  
 18  #include <algorithm>
 19  #include <cassert>
 20  #include <cstddef>
 21  #include <cstdint>
 22  #include <cstdio>
 23  #include <cstring>
 24  #include <ios>
 25  #include <limits>
 26  #include <optional>
 27  #include <string>
 28  #include <vector>
 29  
 30  /* Minimal stream for overwriting and/or appending to an existing byte vector
 31   *
 32   * The referenced vector will grow as necessary
 33   */
 34  class VectorWriter
 35  {
 36  public:
 37  /*
 38   * @param[in]  vchDataIn  Referenced byte vector to overwrite/append
 39   * @param[in]  nPosIn Starting position. Vector index where writes should start. The vector will initially
 40   *                    grow as necessary to max(nPosIn, vec.size()). So to append, use vec.size().
 41  */
 42      VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn) : vchData{vchDataIn}, nPos{nPosIn}
 43      {
 44          if(nPos > vchData.size())
 45              vchData.resize(nPos);
 46      }
 47  /*
 48   * (other params same as above)
 49   * @param[in]  args  A list of items to serialize starting at nPosIn.
 50  */
 51      template <typename... Args>
 52      VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn, Args&&... args) : VectorWriter{vchDataIn, nPosIn}
 53      {
 54          ::SerializeMany(*this, std::forward<Args>(args)...);
 55      }
 56      void write(std::span<const std::byte> src)
 57      {
 58          assert(nPos <= vchData.size());
 59          size_t nOverwrite = std::min(src.size(), vchData.size() - nPos);
 60          if (nOverwrite) {
 61              memcpy(vchData.data() + nPos, src.data(), nOverwrite);
 62          }
 63          if (nOverwrite < src.size()) {
 64              vchData.insert(vchData.end(), UCharCast(src.data()) + nOverwrite, UCharCast(src.data() + src.size()));
 65          }
 66          nPos += src.size();
 67      }
 68      template <typename T>
 69      VectorWriter& operator<<(const T& obj)
 70      {
 71          ::Serialize(*this, obj);
 72          return (*this);
 73      }
 74  
 75  private:
 76      std::vector<unsigned char>& vchData;
 77      size_t nPos;
 78  };
 79  
 80  /** Minimal stream for reading from an existing byte array by std::span.
 81   */
 82  class SpanReader
 83  {
 84  private:
 85      std::span<const std::byte> m_data;
 86  
 87  public:
 88      /**
 89       * @param[in]  data Referenced byte vector to overwrite/append
 90       */
 91      explicit SpanReader(std::span<const unsigned char> data) : m_data{std::as_bytes(data)} {}
 92      explicit SpanReader(std::span<const std::byte> data) : m_data{data} {}
 93  
 94      template<typename T>
 95      SpanReader& operator>>(T&& obj)
 96      {
 97          ::Unserialize(*this, obj);
 98          return (*this);
 99      }
100  
101      size_t size() const { return m_data.size(); }
102      bool empty() const { return m_data.empty(); }
103  
104      void read(std::span<std::byte> dst)
105      {
106          if (dst.size() == 0) {
107              return;
108          }
109  
110          // Read from the beginning of the buffer
111          if (dst.size() > m_data.size()) {
112              throw std::ios_base::failure("SpanReader::read(): end of data");
113          }
114          memcpy(dst.data(), m_data.data(), dst.size());
115          m_data = m_data.subspan(dst.size());
116      }
117  
118      void ignore(size_t n)
119      {
120          if (n > m_data.size()) {
121              throw std::ios_base::failure("SpanReader::ignore(): end of data");
122          }
123          m_data = m_data.subspan(n);
124      }
125  };
126  
127  /** Double ended buffer combining vector and stream-like interfaces.
128   *
129   * >> and << read and write unformatted data using the above serialization templates.
130   * Fills with data in linear time; some stringstream implementations take N^2 time.
131   */
132  class DataStream
133  {
134  protected:
135      using vector_type = SerializeData;
136      vector_type vch;
137      vector_type::size_type m_read_pos{0};
138  
139  public:
140      typedef vector_type::allocator_type   allocator_type;
141      typedef vector_type::size_type        size_type;
142      typedef vector_type::difference_type  difference_type;
143      typedef vector_type::reference        reference;
144      typedef vector_type::const_reference  const_reference;
145      typedef vector_type::value_type       value_type;
146      typedef vector_type::iterator         iterator;
147      typedef vector_type::const_iterator   const_iterator;
148      typedef vector_type::reverse_iterator reverse_iterator;
149  
150      explicit DataStream() = default;
151      explicit DataStream(std::span<const uint8_t> sp) : DataStream{std::as_bytes(sp)} {}
152      explicit DataStream(std::span<const value_type> sp) : vch(sp.data(), sp.data() + sp.size()) {}
153  
154      std::string str() const
155      {
156          return std::string{UCharCast(data()), UCharCast(data() + size())};
157      }
158  
159  
160      //
161      // Vector subset
162      //
163      const_iterator begin() const                     { return vch.begin() + m_read_pos; }
164      iterator begin()                                 { return vch.begin() + m_read_pos; }
165      const_iterator end() const                       { return vch.end(); }
166      iterator end()                                   { return vch.end(); }
167      size_type size() const                           { return vch.size() - m_read_pos; }
168      bool empty() const                               { return vch.size() == m_read_pos; }
169      void resize(size_type n, value_type c = value_type{}) { vch.resize(n + m_read_pos, c); }
170      void reserve(size_type n)                        { vch.reserve(n + m_read_pos); }
171      const_reference operator[](size_type pos) const  { return vch[pos + m_read_pos]; }
172      reference operator[](size_type pos)              { return vch[pos + m_read_pos]; }
173      void clear()                                     { vch.clear(); m_read_pos = 0; }
174      value_type* data()                               { return vch.data() + m_read_pos; }
175      const value_type* data() const                   { return vch.data() + m_read_pos; }
176  
177      inline void Compact()
178      {
179          vch.erase(vch.begin(), vch.begin() + m_read_pos);
180          m_read_pos = 0;
181      }
182  
183      bool Rewind(std::optional<size_type> n = std::nullopt)
184      {
185          // Total rewind if no size is passed
186          if (!n) {
187              m_read_pos = 0;
188              return true;
189          }
190          // Rewind by n characters if the buffer hasn't been compacted yet
191          if (*n > m_read_pos)
192              return false;
193          m_read_pos -= *n;
194          return true;
195      }
196  
197  
198      //
199      // Stream subset
200      //
201      int in_avail() const         { return size(); }
202  
203      void read(std::span<value_type> dst)
204      {
205          if (dst.size() == 0) return;
206  
207          // Read from the beginning of the buffer
208          auto next_read_pos{CheckedAdd(m_read_pos, dst.size())};
209          if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) {
210              throw std::ios_base::failure("DataStream::read(): end of data");
211          }
212          memcpy(dst.data(), &vch[m_read_pos], dst.size());
213          if (next_read_pos.value() == vch.size()) {
214              m_read_pos = 0;
215              vch.clear();
216              return;
217          }
218          m_read_pos = next_read_pos.value();
219      }
220  
221      void ignore(size_t num_ignore)
222      {
223          // Ignore from the beginning of the buffer
224          auto next_read_pos{CheckedAdd(m_read_pos, num_ignore)};
225          if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) {
226              throw std::ios_base::failure("DataStream::ignore(): end of data");
227          }
228          if (next_read_pos.value() == vch.size()) {
229              m_read_pos = 0;
230              vch.clear();
231              return;
232          }
233          m_read_pos = next_read_pos.value();
234      }
235  
236      void write(std::span<const value_type> src)
237      {
238          // Write to the end of the buffer
239          vch.insert(vch.end(), src.begin(), src.end());
240      }
241  
242      template<typename T>
243      DataStream& operator<<(const T& obj)
244      {
245          ::Serialize(*this, obj);
246          return (*this);
247      }
248  
249      template <typename T>
250      DataStream& operator>>(T&& obj)
251      {
252          ::Unserialize(*this, obj);
253          return (*this);
254      }
255  
256      /** Compute total memory usage of this object (own memory + any dynamic memory). */
257      size_t GetMemoryUsage() const noexcept;
258  };
259  
260  template <typename IStream>
261  class BitStreamReader
262  {
263  private:
264      IStream& m_istream;
265  
266      /// Buffered byte read in from the input stream. A new byte is read into the
267      /// buffer when m_offset reaches 8.
268      uint8_t m_buffer{0};
269  
270      /// Number of high order bits in m_buffer already returned by previous
271      /// Read() calls. The next bit to be returned is at this offset from the
272      /// most significant bit position.
273      int m_offset{8};
274  
275  public:
276      explicit BitStreamReader(IStream& istream) : m_istream(istream) {}
277  
278      /** Read the specified number of bits from the stream. The data is returned
279       * in the nbits least significant bits of a 64-bit uint.
280       */
281      uint64_t Read(int nbits) {
282          if (nbits < 0 || nbits > 64) {
283              throw std::out_of_range("nbits must be between 0 and 64");
284          }
285  
286          uint64_t data = 0;
287          while (nbits > 0) {
288              if (m_offset == 8) {
289                  m_istream >> m_buffer;
290                  m_offset = 0;
291              }
292  
293              int bits = std::min(8 - m_offset, nbits);
294              data <<= bits;
295              data |= static_cast<uint8_t>(m_buffer << m_offset) >> (8 - bits);
296              m_offset += bits;
297              nbits -= bits;
298          }
299          return data;
300      }
301  };
302  
303  template <typename OStream>
304  class BitStreamWriter
305  {
306  private:
307      OStream& m_ostream;
308  
309      /// Buffered byte waiting to be written to the output stream. The byte is
310      /// written buffer when m_offset reaches 8 or Flush() is called.
311      uint8_t m_buffer{0};
312  
313      /// Number of high order bits in m_buffer already written by previous
314      /// Write() calls and not yet flushed to the stream. The next bit to be
315      /// written to is at this offset from the most significant bit position.
316      int m_offset{0};
317  
318  public:
319      explicit BitStreamWriter(OStream& ostream) : m_ostream(ostream) {}
320  
321      ~BitStreamWriter()
322      {
323          Flush();
324      }
325  
326      /** Write the nbits least significant bits of a 64-bit int to the output
327       * stream. Data is buffered until it completes an octet.
328       */
329      void Write(uint64_t data, int nbits) {
330          if (nbits < 0 || nbits > 64) {
331              throw std::out_of_range("nbits must be between 0 and 64");
332          }
333  
334          while (nbits > 0) {
335              int bits = std::min(8 - m_offset, nbits);
336              m_buffer |= (data << (64 - nbits)) >> (64 - 8 + m_offset);
337              m_offset += bits;
338              nbits -= bits;
339  
340              if (m_offset == 8) {
341                  Flush();
342              }
343          }
344      }
345  
346      /** Flush any unwritten bits to the output stream, padding with 0's to the
347       * next byte boundary.
348       */
349      void Flush() {
350          if (m_offset == 0) {
351              return;
352          }
353  
354          m_ostream << m_buffer;
355          m_buffer = 0;
356          m_offset = 0;
357      }
358  };
359  
360  /** Non-refcounted RAII wrapper for FILE*
361   *
362   * Will automatically close the file when it goes out of scope if not null.
363   * If you're returning the file pointer, return file.release().
364   * If you need to close the file early, use autofile.fclose() instead of fclose(underlying_FILE).
365   *
366   * @note If the file has been written to, then the caller must close it
367   * explicitly with the `fclose()` method, check if it returns an error and treat
368   * such an error as if the `write()` method failed. The OS's `fclose(3)` may
369   * fail to flush to disk data that has been previously written, rendering the
370   * file corrupt.
371   */
372  class AutoFile
373  {
374  protected:
375      std::FILE* m_file;
376      Obfuscation m_obfuscation;
377      std::optional<int64_t> m_position;
378      bool m_was_written{false};
379  
380  public:
381      explicit AutoFile(std::FILE* file, const Obfuscation& obfuscation = {});
382  
383      ~AutoFile()
384      {
385          if (m_was_written) {
386              // Callers that wrote to the file must have closed it explicitly
387              // with the fclose() method and checked that the close succeeded.
388              // This is because here in the destructor we have no way to signal
389              // errors from fclose() which, after write, could mean the file is
390              // corrupted and must be handled properly at the call site.
391              // Destructors in C++ cannot signal an error to the callers because
392              // they do not return a value and are not allowed to throw exceptions.
393              Assume(IsNull());
394          }
395  
396          if (fclose() != 0) {
397              LogError("Failed to close file: %s", SysErrorString(errno));
398          }
399      }
400  
401      // Disallow copies
402      AutoFile(const AutoFile&) = delete;
403      AutoFile& operator=(const AutoFile&) = delete;
404  
405      bool feof() const { return std::feof(m_file); }
406  
407      [[nodiscard]] int fclose()
408      {
409          if (auto rel{release()}) return std::fclose(rel);
410          return 0;
411      }
412  
413      /** Get wrapped FILE* with transfer of ownership.
414       * @note This will invalidate the AutoFile object, and makes it the responsibility of the caller
415       * of this function to clean up the returned FILE*.
416       */
417      std::FILE* release()
418      {
419          std::FILE* ret{m_file};
420          m_file = nullptr;
421          return ret;
422      }
423  
424      /** Return true if the wrapped FILE* is nullptr, false otherwise.
425       */
426      bool IsNull() const { return m_file == nullptr; }
427  
428      /** Continue with a different XOR key */
429      void SetObfuscation(const Obfuscation& obfuscation) { m_obfuscation = obfuscation; }
430  
431      /** Implementation detail, only used internally. */
432      std::size_t detail_fread(std::span<std::byte> dst);
433  
434      /** Wrapper around fseek(). Will throw if seeking is not possible. */
435      void seek(int64_t offset, int origin);
436  
437      /** Find position within the file. Will throw if unknown. */
438      int64_t tell();
439  
440      /** Return the size of the file. Will throw if unknown. */
441      int64_t size();
442  
443      /** Wrapper around FileCommit(). */
444      bool Commit();
445  
446      /** Wrapper around TruncateFile(). */
447      bool Truncate(unsigned size);
448  
449      //! Write a mutable buffer more efficiently than write(), obfuscating the buffer in-place.
450      void write_buffer(std::span<std::byte> src);
451  
452      //
453      // Stream subset
454      //
455      void read(std::span<std::byte> dst);
456      void ignore(size_t nSize);
457      void write(std::span<const std::byte> src);
458  
459      template <typename T>
460      AutoFile& operator<<(const T& obj)
461      {
462          ::Serialize(*this, obj);
463          return *this;
464      }
465  
466      template <typename T>
467      AutoFile& operator>>(T&& obj)
468      {
469          ::Unserialize(*this, obj);
470          return *this;
471      }
472  };
473  
474  using DataBuffer = std::vector<std::byte>;
475  
476  /** Wrapper around an AutoFile& that implements a ring buffer to
477   *  deserialize from. It guarantees the ability to rewind a given number of bytes.
478   *
479   *  Will automatically close the file when it goes out of scope if not null.
480   *  If you need to close the file early, use file.fclose() instead of fclose(file).
481   */
482  class BufferedFile
483  {
484  private:
485      AutoFile& m_src;
486      uint64_t nSrcPos{0};  //!< how many bytes have been read from source
487      uint64_t m_read_pos{0}; //!< how many bytes have been read from this
488      uint64_t nReadLimit;  //!< up to which position we're allowed to read
489      uint64_t nRewind;     //!< how many bytes we guarantee to rewind
490      DataBuffer vchBuf;
491  
492      //! read data from the source to fill the buffer
493      bool Fill() {
494          unsigned int pos = nSrcPos % vchBuf.size();
495          unsigned int readNow = vchBuf.size() - pos;
496          unsigned int nAvail = vchBuf.size() - (nSrcPos - m_read_pos) - nRewind;
497          if (nAvail < readNow)
498              readNow = nAvail;
499          if (readNow == 0)
500              return false;
501          size_t nBytes{m_src.detail_fread(std::span{vchBuf}.subspan(pos, readNow))};
502          if (nBytes == 0) {
503              throw std::ios_base::failure{m_src.feof() ? "BufferedFile::Fill: end of file" : "BufferedFile::Fill: fread failed"};
504          }
505          nSrcPos += nBytes;
506          return true;
507      }
508  
509      //! Advance the stream's read pointer (m_read_pos) by up to 'length' bytes,
510      //! filling the buffer from the file so that at least one byte is available.
511      //! Return a pointer to the available buffer data and the number of bytes
512      //! (which may be less than the requested length) that may be accessed
513      //! beginning at that pointer.
514      std::pair<std::byte*, size_t> AdvanceStream(size_t length)
515      {
516          assert(m_read_pos <= nSrcPos);
517          if (m_read_pos + length > nReadLimit) {
518              throw std::ios_base::failure("Attempt to position past buffer limit");
519          }
520          // If there are no bytes available, read from the file.
521          if (m_read_pos == nSrcPos && length > 0) Fill();
522  
523          size_t buffer_offset{static_cast<size_t>(m_read_pos % vchBuf.size())};
524          size_t buffer_available{static_cast<size_t>(vchBuf.size() - buffer_offset)};
525          size_t bytes_until_source_pos{static_cast<size_t>(nSrcPos - m_read_pos)};
526          size_t advance{std::min({length, buffer_available, bytes_until_source_pos})};
527          m_read_pos += advance;
528          return std::make_pair(&vchBuf[buffer_offset], advance);
529      }
530  
531  public:
532      BufferedFile(AutoFile& file LIFETIMEBOUND, uint64_t nBufSize, uint64_t nRewindIn)
533          : m_src{file}, nReadLimit{std::numeric_limits<uint64_t>::max()}, nRewind{nRewindIn}, vchBuf(nBufSize, std::byte{0})
534      {
535          if (nRewindIn >= nBufSize)
536              throw std::ios_base::failure("Rewind limit must be less than buffer size");
537      }
538  
539      //! check whether we're at the end of the source file
540      bool eof() const {
541          return m_read_pos == nSrcPos && m_src.feof();
542      }
543  
544      //! read a number of bytes
545      void read(std::span<std::byte> dst)
546      {
547          while (dst.size() > 0) {
548              auto [buffer_pointer, length]{AdvanceStream(dst.size())};
549              memcpy(dst.data(), buffer_pointer, length);
550              dst = dst.subspan(length);
551          }
552      }
553  
554      //! Move the read position ahead in the stream to the given position.
555      //! Use SetPos() to back up in the stream, not SkipTo().
556      void SkipTo(const uint64_t file_pos)
557      {
558          assert(file_pos >= m_read_pos);
559          while (m_read_pos < file_pos) AdvanceStream(file_pos - m_read_pos);
560      }
561  
562      //! return the current reading position
563      uint64_t GetPos() const {
564          return m_read_pos;
565      }
566  
567      //! rewind to a given reading position
568      bool SetPos(uint64_t nPos) {
569          size_t bufsize = vchBuf.size();
570          if (nPos + bufsize < nSrcPos) {
571              // rewinding too far, rewind as far as possible
572              m_read_pos = nSrcPos - bufsize;
573              return false;
574          }
575          if (nPos > nSrcPos) {
576              // can't go this far forward, go as far as possible
577              m_read_pos = nSrcPos;
578              return false;
579          }
580          m_read_pos = nPos;
581          return true;
582      }
583  
584      //! prevent reading beyond a certain position
585      //! no argument removes the limit
586      bool SetLimit(uint64_t nPos = std::numeric_limits<uint64_t>::max()) {
587          if (nPos < m_read_pos)
588              return false;
589          nReadLimit = nPos;
590          return true;
591      }
592  
593      template<typename T>
594      BufferedFile& operator>>(T&& obj) {
595          ::Unserialize(*this, obj);
596          return (*this);
597      }
598  
599      //! search for a given byte in the stream, and remain positioned on it
600      void FindByte(std::byte byte)
601      {
602          // For best performance, avoid mod operation within the loop.
603          size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))};
604          while (true) {
605              if (m_read_pos == nSrcPos) {
606                  // No more bytes available; read from the file into the buffer,
607                  // setting nSrcPos to one beyond the end of the new data.
608                  // Throws exception if end-of-file reached.
609                  Fill();
610              }
611              const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)};
612              const auto it_start{vchBuf.begin() + buf_offset};
613              const auto it_find{std::find(it_start, it_start + len, byte)};
614              const size_t inc{size_t(std::distance(it_start, it_find))};
615              m_read_pos += inc;
616              if (inc < len) break;
617              buf_offset += inc;
618              if (buf_offset >= vchBuf.size()) buf_offset = 0;
619          }
620      }
621  };
622  
623  /**
624   * Wrapper that buffers reads from an underlying stream.
625   * Requires underlying stream to support read() and detail_fread() calls
626   * to support fixed-size and variable-sized reads, respectively.
627   */
628  template <typename S>
629  class BufferedReader
630  {
631      S& m_src;
632      DataBuffer m_buf;
633      size_t m_buf_pos;
634  
635  public:
636      //! Requires stream ownership to prevent leaving the stream at an unexpected position after buffered reads.
637      explicit BufferedReader(S&& stream LIFETIMEBOUND, size_t size = 1 << 16)
638          requires std::is_rvalue_reference_v<S&&>
639          : m_src{stream}, m_buf(size), m_buf_pos{size} {}
640  
641      void read(std::span<std::byte> dst)
642      {
643          if (const auto available{std::min(dst.size(), m_buf.size() - m_buf_pos)}) {
644              std::copy_n(m_buf.begin() + m_buf_pos, available, dst.begin());
645              m_buf_pos += available;
646              dst = dst.subspan(available);
647          }
648          if (dst.size()) {
649              assert(m_buf_pos == m_buf.size());
650              m_src.read(dst);
651  
652              m_buf_pos = 0;
653              m_buf.resize(m_src.detail_fread(m_buf));
654          }
655      }
656  
657      template <typename T>
658      BufferedReader& operator>>(T&& obj)
659      {
660          Unserialize(*this, obj);
661          return *this;
662      }
663  };
664  
665  /**
666   * Wrapper that buffers writes to an underlying stream.
667   * Requires underlying stream to support write_buffer() method
668   * for efficient buffer flushing and obfuscation.
669   */
670  template <typename S>
671  class BufferedWriter
672  {
673      S& m_dst;
674      DataBuffer m_buf;
675      size_t m_buf_pos{0};
676  
677  public:
678      explicit BufferedWriter(S& stream LIFETIMEBOUND, size_t size = 1 << 16) : m_dst{stream}, m_buf(size) {}
679  
680      ~BufferedWriter() { flush(); }
681  
682      void flush()
683      {
684          if (m_buf_pos) m_dst.write_buffer(std::span{m_buf}.first(m_buf_pos));
685          m_buf_pos = 0;
686      }
687  
688      void write(std::span<const std::byte> src)
689      {
690          while (const auto available{std::min(src.size(), m_buf.size() - m_buf_pos)}) {
691              std::copy_n(src.begin(), available, m_buf.begin() + m_buf_pos);
692              m_buf_pos += available;
693              if (m_buf_pos == m_buf.size()) flush();
694              src = src.subspan(available);
695          }
696      }
697  
698      template <typename T>
699      BufferedWriter& operator<<(const T& obj)
700      {
701          Serialize(*this, obj);
702          return *this;
703      }
704  };
705  
706  #endif // BITCOIN_STREAMS_H