/ src / streams.h
streams.h
  1  // Copyright (c) 2009-2010 Satoshi Nakamoto
  2  // Copyright (c) 2009-2022 The Bitcoin Core developers
  3  // Distributed under the MIT software license, see the accompanying
  4  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  5  
  6  #ifndef BITCOIN_STREAMS_H
  7  #define BITCOIN_STREAMS_H
  8  
  9  #include <serialize.h>
 10  #include <span.h>
 11  #include <support/allocators/zeroafterfree.h>
 12  #include <util/overflow.h>
 13  
 14  #include <algorithm>
 15  #include <assert.h>
 16  #include <cstddef>
 17  #include <cstdio>
 18  #include <ios>
 19  #include <limits>
 20  #include <optional>
 21  #include <stdint.h>
 22  #include <string.h>
 23  #include <string>
 24  #include <utility>
 25  #include <vector>
 26  
 27  namespace util {
 28  inline void Xor(Span<std::byte> write, Span<const std::byte> key, size_t key_offset = 0)
 29  {
 30      if (key.size() == 0) {
 31          return;
 32      }
 33      key_offset %= key.size();
 34  
 35      for (size_t i = 0, j = key_offset; i != write.size(); i++) {
 36          write[i] ^= key[j++];
 37  
 38          // This potentially acts on very many bytes of data, so it's
 39          // important that we calculate `j`, i.e. the `key` index in this
 40          // way instead of doing a %, which would effectively be a division
 41          // for each byte Xor'd -- much slower than need be.
 42          if (j == key.size())
 43              j = 0;
 44      }
 45  }
 46  } // namespace util
 47  
 48  /* Minimal stream for overwriting and/or appending to an existing byte vector
 49   *
 50   * The referenced vector will grow as necessary
 51   */
 52  class VectorWriter
 53  {
 54  public:
 55  /*
 56   * @param[in]  vchDataIn  Referenced byte vector to overwrite/append
 57   * @param[in]  nPosIn Starting position. Vector index where writes should start. The vector will initially
 58   *                    grow as necessary to max(nPosIn, vec.size()). So to append, use vec.size().
 59  */
 60      VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn) : vchData{vchDataIn}, nPos{nPosIn}
 61      {
 62          if(nPos > vchData.size())
 63              vchData.resize(nPos);
 64      }
 65  /*
 66   * (other params same as above)
 67   * @param[in]  args  A list of items to serialize starting at nPosIn.
 68  */
 69      template <typename... Args>
 70      VectorWriter(std::vector<unsigned char>& vchDataIn, size_t nPosIn, Args&&... args) : VectorWriter{vchDataIn, nPosIn}
 71      {
 72          ::SerializeMany(*this, std::forward<Args>(args)...);
 73      }
 74      void write(Span<const std::byte> src)
 75      {
 76          assert(nPos <= vchData.size());
 77          size_t nOverwrite = std::min(src.size(), vchData.size() - nPos);
 78          if (nOverwrite) {
 79              memcpy(vchData.data() + nPos, src.data(), nOverwrite);
 80          }
 81          if (nOverwrite < src.size()) {
 82              vchData.insert(vchData.end(), UCharCast(src.data()) + nOverwrite, UCharCast(src.end()));
 83          }
 84          nPos += src.size();
 85      }
 86      template <typename T>
 87      VectorWriter& operator<<(const T& obj)
 88      {
 89          ::Serialize(*this, obj);
 90          return (*this);
 91      }
 92  
 93  private:
 94      std::vector<unsigned char>& vchData;
 95      size_t nPos;
 96  };
 97  
 98  /** Minimal stream for reading from an existing byte array by Span.
 99   */
100  class SpanReader
101  {
102  private:
103      Span<const unsigned char> m_data;
104  
105  public:
106      /**
107       * @param[in]  data Referenced byte vector to overwrite/append
108       */
109      explicit SpanReader(Span<const unsigned char> data) : m_data{data} {}
110  
111      template<typename T>
112      SpanReader& operator>>(T&& obj)
113      {
114          ::Unserialize(*this, obj);
115          return (*this);
116      }
117  
118      size_t size() const { return m_data.size(); }
119      bool empty() const { return m_data.empty(); }
120  
121      void read(Span<std::byte> dst)
122      {
123          if (dst.size() == 0) {
124              return;
125          }
126  
127          // Read from the beginning of the buffer
128          if (dst.size() > m_data.size()) {
129              throw std::ios_base::failure("SpanReader::read(): end of data");
130          }
131          memcpy(dst.data(), m_data.data(), dst.size());
132          m_data = m_data.subspan(dst.size());
133      }
134  
135      void ignore(size_t n)
136      {
137          m_data = m_data.subspan(n);
138      }
139  };
140  
141  /** Double ended buffer combining vector and stream-like interfaces.
142   *
143   * >> and << read and write unformatted data using the above serialization templates.
144   * Fills with data in linear time; some stringstream implementations take N^2 time.
145   */
146  class DataStream
147  {
148  protected:
149      using vector_type = SerializeData;
150      vector_type vch;
151      vector_type::size_type m_read_pos{0};
152  
153  public:
154      typedef vector_type::allocator_type   allocator_type;
155      typedef vector_type::size_type        size_type;
156      typedef vector_type::difference_type  difference_type;
157      typedef vector_type::reference        reference;
158      typedef vector_type::const_reference  const_reference;
159      typedef vector_type::value_type       value_type;
160      typedef vector_type::iterator         iterator;
161      typedef vector_type::const_iterator   const_iterator;
162      typedef vector_type::reverse_iterator reverse_iterator;
163  
164      explicit DataStream() {}
165      explicit DataStream(Span<const uint8_t> sp) : DataStream{AsBytes(sp)} {}
166      explicit DataStream(Span<const value_type> sp) : vch(sp.data(), sp.data() + sp.size()) {}
167  
168      std::string str() const
169      {
170          return std::string{UCharCast(data()), UCharCast(data() + size())};
171      }
172  
173  
174      //
175      // Vector subset
176      //
177      const_iterator begin() const                     { return vch.begin() + m_read_pos; }
178      iterator begin()                                 { return vch.begin() + m_read_pos; }
179      const_iterator end() const                       { return vch.end(); }
180      iterator end()                                   { return vch.end(); }
181      size_type size() const                           { return vch.size() - m_read_pos; }
182      bool empty() const                               { return vch.size() == m_read_pos; }
183      void resize(size_type n, value_type c = value_type{}) { vch.resize(n + m_read_pos, c); }
184      void reserve(size_type n)                        { vch.reserve(n + m_read_pos); }
185      const_reference operator[](size_type pos) const  { return vch[pos + m_read_pos]; }
186      reference operator[](size_type pos)              { return vch[pos + m_read_pos]; }
187      void clear()                                     { vch.clear(); m_read_pos = 0; }
188      value_type* data()                               { return vch.data() + m_read_pos; }
189      const value_type* data() const                   { return vch.data() + m_read_pos; }
190  
191      inline void Compact()
192      {
193          vch.erase(vch.begin(), vch.begin() + m_read_pos);
194          m_read_pos = 0;
195      }
196  
197      bool Rewind(std::optional<size_type> n = std::nullopt)
198      {
199          // Total rewind if no size is passed
200          if (!n) {
201              m_read_pos = 0;
202              return true;
203          }
204          // Rewind by n characters if the buffer hasn't been compacted yet
205          if (*n > m_read_pos)
206              return false;
207          m_read_pos -= *n;
208          return true;
209      }
210  
211  
212      //
213      // Stream subset
214      //
215      bool eof() const             { return size() == 0; }
216      int in_avail() const         { return size(); }
217  
218      void read(Span<value_type> dst)
219      {
220          if (dst.size() == 0) return;
221  
222          // Read from the beginning of the buffer
223          auto next_read_pos{CheckedAdd(m_read_pos, dst.size())};
224          if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) {
225              throw std::ios_base::failure("DataStream::read(): end of data");
226          }
227          memcpy(dst.data(), &vch[m_read_pos], dst.size());
228          if (next_read_pos.value() == vch.size()) {
229              m_read_pos = 0;
230              vch.clear();
231              return;
232          }
233          m_read_pos = next_read_pos.value();
234      }
235  
236      void ignore(size_t num_ignore)
237      {
238          // Ignore from the beginning of the buffer
239          auto next_read_pos{CheckedAdd(m_read_pos, num_ignore)};
240          if (!next_read_pos.has_value() || next_read_pos.value() > vch.size()) {
241              throw std::ios_base::failure("DataStream::ignore(): end of data");
242          }
243          if (next_read_pos.value() == vch.size()) {
244              m_read_pos = 0;
245              vch.clear();
246              return;
247          }
248          m_read_pos = next_read_pos.value();
249      }
250  
251      void write(Span<const value_type> src)
252      {
253          // Write to the end of the buffer
254          vch.insert(vch.end(), src.begin(), src.end());
255      }
256  
257      template<typename T>
258      DataStream& operator<<(const T& obj)
259      {
260          ::Serialize(*this, obj);
261          return (*this);
262      }
263  
264      template<typename T>
265      DataStream& operator>>(T&& obj)
266      {
267          ::Unserialize(*this, obj);
268          return (*this);
269      }
270  
271      /**
272       * XOR the contents of this stream with a certain key.
273       *
274       * @param[in] key    The key used to XOR the data in this stream.
275       */
276      void Xor(const std::vector<unsigned char>& key)
277      {
278          util::Xor(MakeWritableByteSpan(*this), MakeByteSpan(key));
279      }
280  };
281  
282  template <typename IStream>
283  class BitStreamReader
284  {
285  private:
286      IStream& m_istream;
287  
288      /// Buffered byte read in from the input stream. A new byte is read into the
289      /// buffer when m_offset reaches 8.
290      uint8_t m_buffer{0};
291  
292      /// Number of high order bits in m_buffer already returned by previous
293      /// Read() calls. The next bit to be returned is at this offset from the
294      /// most significant bit position.
295      int m_offset{8};
296  
297  public:
298      explicit BitStreamReader(IStream& istream) : m_istream(istream) {}
299  
300      /** Read the specified number of bits from the stream. The data is returned
301       * in the nbits least significant bits of a 64-bit uint.
302       */
303      uint64_t Read(int nbits) {
304          if (nbits < 0 || nbits > 64) {
305              throw std::out_of_range("nbits must be between 0 and 64");
306          }
307  
308          uint64_t data = 0;
309          while (nbits > 0) {
310              if (m_offset == 8) {
311                  m_istream >> m_buffer;
312                  m_offset = 0;
313              }
314  
315              int bits = std::min(8 - m_offset, nbits);
316              data <<= bits;
317              data |= static_cast<uint8_t>(m_buffer << m_offset) >> (8 - bits);
318              m_offset += bits;
319              nbits -= bits;
320          }
321          return data;
322      }
323  };
324  
325  template <typename OStream>
326  class BitStreamWriter
327  {
328  private:
329      OStream& m_ostream;
330  
331      /// Buffered byte waiting to be written to the output stream. The byte is
332      /// written buffer when m_offset reaches 8 or Flush() is called.
333      uint8_t m_buffer{0};
334  
335      /// Number of high order bits in m_buffer already written by previous
336      /// Write() calls and not yet flushed to the stream. The next bit to be
337      /// written to is at this offset from the most significant bit position.
338      int m_offset{0};
339  
340  public:
341      explicit BitStreamWriter(OStream& ostream) : m_ostream(ostream) {}
342  
343      ~BitStreamWriter()
344      {
345          Flush();
346      }
347  
348      /** Write the nbits least significant bits of a 64-bit int to the output
349       * stream. Data is buffered until it completes an octet.
350       */
351      void Write(uint64_t data, int nbits) {
352          if (nbits < 0 || nbits > 64) {
353              throw std::out_of_range("nbits must be between 0 and 64");
354          }
355  
356          while (nbits > 0) {
357              int bits = std::min(8 - m_offset, nbits);
358              m_buffer |= (data << (64 - nbits)) >> (64 - 8 + m_offset);
359              m_offset += bits;
360              nbits -= bits;
361  
362              if (m_offset == 8) {
363                  Flush();
364              }
365          }
366      }
367  
368      /** Flush any unwritten bits to the output stream, padding with 0's to the
369       * next byte boundary.
370       */
371      void Flush() {
372          if (m_offset == 0) {
373              return;
374          }
375  
376          m_ostream << m_buffer;
377          m_buffer = 0;
378          m_offset = 0;
379      }
380  };
381  
382  /** Non-refcounted RAII wrapper for FILE*
383   *
384   * Will automatically close the file when it goes out of scope if not null.
385   * If you're returning the file pointer, return file.release().
386   * If you need to close the file early, use file.fclose() instead of fclose(file).
387   */
388  class AutoFile
389  {
390  protected:
391      std::FILE* m_file;
392      std::vector<std::byte> m_xor;
393  
394  public:
395      explicit AutoFile(std::FILE* file, std::vector<std::byte> data_xor={}) : m_file{file}, m_xor{std::move(data_xor)} {}
396  
397      ~AutoFile() { fclose(); }
398  
399      // Disallow copies
400      AutoFile(const AutoFile&) = delete;
401      AutoFile& operator=(const AutoFile&) = delete;
402  
403      bool feof() const { return std::feof(m_file); }
404  
405      int fclose()
406      {
407          if (auto rel{release()}) return std::fclose(rel);
408          return 0;
409      }
410  
411      /** Get wrapped FILE* with transfer of ownership.
412       * @note This will invalidate the AutoFile object, and makes it the responsibility of the caller
413       * of this function to clean up the returned FILE*.
414       */
415      std::FILE* release()
416      {
417          std::FILE* ret{m_file};
418          m_file = nullptr;
419          return ret;
420      }
421  
422      /** Get wrapped FILE* without transfer of ownership.
423       * @note Ownership of the FILE* will remain with this class. Use this only if the scope of the
424       * AutoFile outlives use of the passed pointer.
425       */
426      std::FILE* Get() const { return m_file; }
427  
428      /** Return true if the wrapped FILE* is nullptr, false otherwise.
429       */
430      bool IsNull() const { return m_file == nullptr; }
431  
432      /** Continue with a different XOR key */
433      void SetXor(std::vector<std::byte> data_xor) { m_xor = data_xor; }
434  
435      /** Implementation detail, only used internally. */
436      std::size_t detail_fread(Span<std::byte> dst);
437  
438      //
439      // Stream subset
440      //
441      void read(Span<std::byte> dst);
442      void ignore(size_t nSize);
443      void write(Span<const std::byte> src);
444  
445      template <typename T>
446      AutoFile& operator<<(const T& obj)
447      {
448          ::Serialize(*this, obj);
449          return *this;
450      }
451  
452      template <typename T>
453      AutoFile& operator>>(T&& obj)
454      {
455          ::Unserialize(*this, obj);
456          return *this;
457      }
458  };
459  
460  /** Wrapper around an AutoFile& that implements a ring buffer to
461   *  deserialize from. It guarantees the ability to rewind a given number of bytes.
462   *
463   *  Will automatically close the file when it goes out of scope if not null.
464   *  If you need to close the file early, use file.fclose() instead of fclose(file).
465   */
466  class BufferedFile
467  {
468  private:
469      AutoFile& m_src;
470      uint64_t nSrcPos{0};  //!< how many bytes have been read from source
471      uint64_t m_read_pos{0}; //!< how many bytes have been read from this
472      uint64_t nReadLimit;  //!< up to which position we're allowed to read
473      uint64_t nRewind;     //!< how many bytes we guarantee to rewind
474      std::vector<std::byte> vchBuf; //!< the buffer
475  
476      //! read data from the source to fill the buffer
477      bool Fill() {
478          unsigned int pos = nSrcPos % vchBuf.size();
479          unsigned int readNow = vchBuf.size() - pos;
480          unsigned int nAvail = vchBuf.size() - (nSrcPos - m_read_pos) - nRewind;
481          if (nAvail < readNow)
482              readNow = nAvail;
483          if (readNow == 0)
484              return false;
485          size_t nBytes{m_src.detail_fread(Span{vchBuf}.subspan(pos, readNow))};
486          if (nBytes == 0) {
487              throw std::ios_base::failure{m_src.feof() ? "BufferedFile::Fill: end of file" : "BufferedFile::Fill: fread failed"};
488          }
489          nSrcPos += nBytes;
490          return true;
491      }
492  
493      //! Advance the stream's read pointer (m_read_pos) by up to 'length' bytes,
494      //! filling the buffer from the file so that at least one byte is available.
495      //! Return a pointer to the available buffer data and the number of bytes
496      //! (which may be less than the requested length) that may be accessed
497      //! beginning at that pointer.
498      std::pair<std::byte*, size_t> AdvanceStream(size_t length)
499      {
500          assert(m_read_pos <= nSrcPos);
501          if (m_read_pos + length > nReadLimit) {
502              throw std::ios_base::failure("Attempt to position past buffer limit");
503          }
504          // If there are no bytes available, read from the file.
505          if (m_read_pos == nSrcPos && length > 0) Fill();
506  
507          size_t buffer_offset{static_cast<size_t>(m_read_pos % vchBuf.size())};
508          size_t buffer_available{static_cast<size_t>(vchBuf.size() - buffer_offset)};
509          size_t bytes_until_source_pos{static_cast<size_t>(nSrcPos - m_read_pos)};
510          size_t advance{std::min({length, buffer_available, bytes_until_source_pos})};
511          m_read_pos += advance;
512          return std::make_pair(&vchBuf[buffer_offset], advance);
513      }
514  
515  public:
516      BufferedFile(AutoFile& file, uint64_t nBufSize, uint64_t nRewindIn)
517          : m_src{file}, nReadLimit{std::numeric_limits<uint64_t>::max()}, nRewind{nRewindIn}, vchBuf(nBufSize, std::byte{0})
518      {
519          if (nRewindIn >= nBufSize)
520              throw std::ios_base::failure("Rewind limit must be less than buffer size");
521      }
522  
523      //! check whether we're at the end of the source file
524      bool eof() const {
525          return m_read_pos == nSrcPos && m_src.feof();
526      }
527  
528      //! read a number of bytes
529      void read(Span<std::byte> dst)
530      {
531          while (dst.size() > 0) {
532              auto [buffer_pointer, length]{AdvanceStream(dst.size())};
533              memcpy(dst.data(), buffer_pointer, length);
534              dst = dst.subspan(length);
535          }
536      }
537  
538      //! Move the read position ahead in the stream to the given position.
539      //! Use SetPos() to back up in the stream, not SkipTo().
540      void SkipTo(const uint64_t file_pos)
541      {
542          assert(file_pos >= m_read_pos);
543          while (m_read_pos < file_pos) AdvanceStream(file_pos - m_read_pos);
544      }
545  
546      //! return the current reading position
547      uint64_t GetPos() const {
548          return m_read_pos;
549      }
550  
551      //! rewind to a given reading position
552      bool SetPos(uint64_t nPos) {
553          size_t bufsize = vchBuf.size();
554          if (nPos + bufsize < nSrcPos) {
555              // rewinding too far, rewind as far as possible
556              m_read_pos = nSrcPos - bufsize;
557              return false;
558          }
559          if (nPos > nSrcPos) {
560              // can't go this far forward, go as far as possible
561              m_read_pos = nSrcPos;
562              return false;
563          }
564          m_read_pos = nPos;
565          return true;
566      }
567  
568      //! prevent reading beyond a certain position
569      //! no argument removes the limit
570      bool SetLimit(uint64_t nPos = std::numeric_limits<uint64_t>::max()) {
571          if (nPos < m_read_pos)
572              return false;
573          nReadLimit = nPos;
574          return true;
575      }
576  
577      template<typename T>
578      BufferedFile& operator>>(T&& obj) {
579          ::Unserialize(*this, obj);
580          return (*this);
581      }
582  
583      //! search for a given byte in the stream, and remain positioned on it
584      void FindByte(std::byte byte)
585      {
586          // For best performance, avoid mod operation within the loop.
587          size_t buf_offset{size_t(m_read_pos % uint64_t(vchBuf.size()))};
588          while (true) {
589              if (m_read_pos == nSrcPos) {
590                  // No more bytes available; read from the file into the buffer,
591                  // setting nSrcPos to one beyond the end of the new data.
592                  // Throws exception if end-of-file reached.
593                  Fill();
594              }
595              const size_t len{std::min<size_t>(vchBuf.size() - buf_offset, nSrcPos - m_read_pos)};
596              const auto it_start{vchBuf.begin() + buf_offset};
597              const auto it_find{std::find(it_start, it_start + len, byte)};
598              const size_t inc{size_t(std::distance(it_start, it_find))};
599              m_read_pos += inc;
600              if (inc < len) break;
601              buf_offset += inc;
602              if (buf_offset >= vchBuf.size()) buf_offset = 0;
603          }
604      }
605  };
606  
607  #endif // BITCOIN_STREAMS_H