/ src / node / blockstorage.h
blockstorage.h
  1  // Copyright (c) 2011-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #ifndef BITCOIN_NODE_BLOCKSTORAGE_H
  6  #define BITCOIN_NODE_BLOCKSTORAGE_H
  7  
  8  #include <attributes.h>
  9  #include <chain.h>
 10  #include <dbwrapper.h>
 11  #include <flatfile.h>
 12  #include <kernel/blockmanager_opts.h>
 13  #include <kernel/chainparams.h>
 14  #include <kernel/cs_main.h>
 15  #include <kernel/messagestartchars.h>
 16  #include <primitives/block.h>
 17  #include <serialize.h>
 18  #include <streams.h>
 19  #include <sync.h>
 20  #include <uint256.h>
 21  #include <util/byte_units.h> // IWYU pragma: keep
 22  #include <util/expected.h>
 23  #include <util/fs.h>
 24  #include <util/hasher.h>
 25  #include <util/obfuscation.h>
 26  
 27  #include <algorithm>
 28  #include <array>
 29  #include <atomic>
 30  #include <cstddef>
 31  #include <cstdint>
 32  #include <functional>
 33  #include <iosfwd>
 34  #include <limits>
 35  #include <map>
 36  #include <memory>
 37  #include <optional>
 38  #include <set>
 39  #include <span>
 40  #include <string>
 41  #include <unordered_map>
 42  #include <utility>
 43  #include <vector>
 44  
 45  class BlockValidationState;
 46  class CBlockUndo;
 47  class Chainstate;
 48  class ChainstateManager;
 49  namespace Consensus {
 50  struct Params;
 51  }
 52  namespace util {
 53  class SignalInterrupt;
 54  } // namespace util
 55  
 56  namespace kernel {
 57  class CBlockFileInfo
 58  {
 59  public:
 60      uint32_t nBlocks{};      //!< number of blocks stored in file
 61      uint32_t nSize{};        //!< number of used bytes of block file
 62      uint32_t nUndoSize{};    //!< number of used bytes in the undo file
 63      uint32_t nHeightFirst{}; //!< lowest height of block in file
 64      uint32_t nHeightLast{};  //!< highest height of block in file
 65      uint64_t nTimeFirst{};   //!< earliest time of block in file
 66      uint64_t nTimeLast{};    //!< latest time of block in file
 67  
 68      SERIALIZE_METHODS(CBlockFileInfo, obj)
 69      {
 70          READWRITE(VARINT(obj.nBlocks));
 71          READWRITE(VARINT(obj.nSize));
 72          READWRITE(VARINT(obj.nUndoSize));
 73          READWRITE(VARINT(obj.nHeightFirst));
 74          READWRITE(VARINT(obj.nHeightLast));
 75          READWRITE(VARINT(obj.nTimeFirst));
 76          READWRITE(VARINT(obj.nTimeLast));
 77      }
 78  
 79      CBlockFileInfo() = default;
 80  
 81      std::string ToString() const;
 82  
 83      /** update statistics (does not update nSize) */
 84      void AddBlock(unsigned int nHeightIn, uint64_t nTimeIn)
 85      {
 86          if (nBlocks == 0 || nHeightFirst > nHeightIn)
 87              nHeightFirst = nHeightIn;
 88          if (nBlocks == 0 || nTimeFirst > nTimeIn)
 89              nTimeFirst = nTimeIn;
 90          nBlocks++;
 91          if (nHeightIn > nHeightLast)
 92              nHeightLast = nHeightIn;
 93          if (nTimeIn > nTimeLast)
 94              nTimeLast = nTimeIn;
 95      }
 96  };
 97  
 98  /** Access to the block database (blocks/index/) */
 99  class BlockTreeDB : public CDBWrapper
100  {
101  public:
102      using CDBWrapper::CDBWrapper;
103      void WriteBatchSync(const std::vector<std::pair<int, const CBlockFileInfo*>>& fileInfo, int nLastFile, const std::vector<const CBlockIndex*>& blockinfo);
104      bool ReadBlockFileInfo(int nFile, CBlockFileInfo& info);
105      bool ReadLastBlockFile(int& nFile);
106      void WriteReindexing(bool fReindexing);
107      void ReadReindexing(bool& fReindexing);
108      void WriteFlag(const std::string& name, bool fValue);
109      bool ReadFlag(const std::string& name, bool& fValue);
110      bool LoadBlockIndexGuts(const Consensus::Params& consensusParams, std::function<CBlockIndex*(const uint256&)> insertBlockIndex, const util::SignalInterrupt& interrupt)
111          EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
112  };
113  } // namespace kernel
114  
115  namespace node {
116  using kernel::CBlockFileInfo;
117  using kernel::BlockTreeDB;
118  
119  /** The pre-allocation chunk size for blk?????.dat files (since 0.8) */
120  static const unsigned int BLOCKFILE_CHUNK_SIZE{16_MiB};
121  /** The pre-allocation chunk size for rev?????.dat files (since 0.8) */
122  static const unsigned int UNDOFILE_CHUNK_SIZE{1_MiB};
123  /** The maximum size of a blk?????.dat file (since 0.8) */
124  static const unsigned int MAX_BLOCKFILE_SIZE{128_MiB};
125  
126  /** Size of header written by WriteBlock before a serialized CBlock (8 bytes) */
127  static constexpr uint32_t STORAGE_HEADER_BYTES{std::tuple_size_v<MessageStartChars> + sizeof(unsigned int)};
128  
129  /** Total overhead when writing undo data: header (8 bytes) plus checksum (32 bytes) */
130  static constexpr uint32_t UNDO_DATA_DISK_OVERHEAD{STORAGE_HEADER_BYTES + uint256::size()};
131  
132  // Because validation code takes pointers to the map's CBlockIndex objects, if
133  // we ever switch to another associative container, we need to either use a
134  // container that has stable addressing (true of all std associative
135  // containers), or make the key a `std::unique_ptr<CBlockIndex>`
136  using BlockMap = std::unordered_map<uint256, CBlockIndex, BlockHasher>;
137  
138  struct CBlockIndexWorkComparator {
139      bool operator()(const CBlockIndex* pa, const CBlockIndex* pb) const;
140      using is_transparent = void;
141  };
142  
143  struct CBlockIndexHeightOnlyComparator {
144      /* Only compares the height of two block indices, doesn't try to tie-break */
145      bool operator()(const CBlockIndex* pa, const CBlockIndex* pb) const;
146  };
147  
148  struct PruneLockInfo {
149      /// Height of earliest block that should be kept and not pruned
150      int height_first{std::numeric_limits<int>::max()};
151  };
152  
153  enum BlockfileType {
154      // Values used as array indexes - do not change carelessly.
155      NORMAL = 0,
156      ASSUMED = 1,
157      NUM_TYPES = 2,
158  };
159  
160  std::ostream& operator<<(std::ostream& os, const BlockfileType& type);
161  
162  struct BlockfileCursor {
163      // The latest blockfile number.
164      int file_num{0};
165  
166      // Track the height of the highest block in file_num whose undo
167      // data has been written. Block data is written to block files in download
168      // order, but is written to undo files in validation order, which is
169      // usually in order by height. To avoid wasting disk space, undo files will
170      // be trimmed whenever the corresponding block file is finalized and
171      // the height of the highest block written to the block file equals the
172      // height of the highest block written to the undo file. This is a
173      // heuristic and can sometimes preemptively trim undo files that will write
174      // more data later, and sometimes fail to trim undo files that can't have
175      // more data written later.
176      int undo_height{0};
177  };
178  
179  std::ostream& operator<<(std::ostream& os, const BlockfileCursor& cursor);
180  
181  enum class ReadRawError {
182      IO,
183      BadPartRange,
184  };
185  
186  /**
187   * Maintains a tree of blocks (stored in `m_block_index`) which is consulted
188   * to determine where the most-work tip is.
189   *
190   * This data is used mostly in `Chainstate` - information about, e.g.,
191   * candidate tips is not maintained here.
192   */
193  class BlockManager
194  {
195      friend Chainstate;
196      friend ChainstateManager;
197  
198  private:
199      const CChainParams& GetParams() const { return m_opts.chainparams; }
200      const Consensus::Params& GetConsensus() const { return m_opts.chainparams.GetConsensus(); }
201      /**
202       * Load the blocktree off disk and into memory. Populate certain metadata
203       * per index entry (nStatus, nChainWork, nTimeMax, etc.) as well as peripheral
204       * collections like m_dirty_blockindex.
205       */
206      bool LoadBlockIndex(const std::optional<uint256>& snapshot_blockhash)
207          EXCLUSIVE_LOCKS_REQUIRED(cs_main);
208  
209      /** Return false if block file or undo file flushing fails. */
210      [[nodiscard]] bool FlushBlockFile(int blockfile_num, bool fFinalize, bool finalize_undo);
211  
212      /** Return false if undo file flushing fails. */
213      [[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false);
214  
215      /**
216       * Helper function performing various preparations before a block can be saved to disk:
217       * Returns the correct position for the block to be saved, which may be in the current or a new
218       * block file depending on nAddSize. May flush the previous blockfile to disk if full, updates
219       * blockfile info, and checks if there is enough disk space to save the block.
220       *
221       * The nAddSize argument passed to this function should include not just the size of the serialized CBlock, but also the size of
222       * separator fields (STORAGE_HEADER_BYTES).
223       */
224      [[nodiscard]] FlatFilePos FindNextBlockPos(unsigned int nAddSize, unsigned int nHeight, uint64_t nTime);
225      [[nodiscard]] bool FlushChainstateBlockFile(int tip_height);
226      bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize);
227  
228      AutoFile OpenUndoFile(const FlatFilePos& pos, bool fReadOnly = false) const;
229  
230      /* Calculate the block/rev files to delete based on height specified by user with RPC command pruneblockchain */
231      void FindFilesToPruneManual(
232          std::set<int>& setFilesToPrune,
233          int nManualPruneHeight,
234          const Chainstate& chain);
235  
236      /**
237       * Prune block and undo files (blk???.dat and rev???.dat) so that the disk space used is less than a user-defined target.
238       * The user sets the target (in MB) on the command line or in config file.  This will be run on startup and whenever new
239       * space is allocated in a block or undo file, staying below the target. Changing back to unpruned requires a reindex
240       * (which in this case means the blockchain must be re-downloaded.)
241       *
242       * Pruning functions are called from FlushStateToDisk when the m_check_for_pruning flag has been set.
243       * Block and undo files are deleted in lock-step (when blk00003.dat is deleted, so is rev00003.dat.)
244       * Pruning cannot take place until the longest chain is at least a certain length (CChainParams::nPruneAfterHeight).
245       * Pruning will never delete a block within a defined distance (currently 288) from the active chain's tip.
246       * The block index is updated by unsetting HAVE_DATA and HAVE_UNDO for any blocks that were stored in the deleted files.
247       * A db flag records the fact that at least some block files have been pruned.
248       *
249       * @param[out]   setFilesToPrune   The set of file indices that can be unlinked will be returned
250       * @param        last_prune        The last height we're able to prune, according to the prune locks
251       */
252      void FindFilesToPrune(
253          std::set<int>& setFilesToPrune,
254          int last_prune,
255          const Chainstate& chain,
256          ChainstateManager& chainman);
257  
258      RecursiveMutex cs_LastBlockFile;
259  
260      //! Since assumedvalid chainstates may be syncing a range of the chain that is very
261      //! far away from the normal/background validation process, we should segment blockfiles
262      //! for assumed chainstates. Otherwise, we might have wildly different height ranges
263      //! mixed into the same block files, which would impair our ability to prune
264      //! effectively.
265      //!
266      //! This data structure maintains separate blockfile number cursors for each
267      //! BlockfileType. The ASSUMED state is initialized, when necessary, in FindNextBlockPos().
268      //!
269      //! The first element is the NORMAL cursor, second is ASSUMED.
270      std::array<std::optional<BlockfileCursor>, BlockfileType::NUM_TYPES>
271          m_blockfile_cursors GUARDED_BY(cs_LastBlockFile) = {
272              BlockfileCursor{},
273              std::nullopt,
274      };
275      int MaxBlockfileNum() const EXCLUSIVE_LOCKS_REQUIRED(cs_LastBlockFile)
276      {
277          static const BlockfileCursor empty_cursor;
278          const auto& normal = m_blockfile_cursors[BlockfileType::NORMAL].value_or(empty_cursor);
279          const auto& assumed = m_blockfile_cursors[BlockfileType::ASSUMED].value_or(empty_cursor);
280          return std::max(normal.file_num, assumed.file_num);
281      }
282  
283      /** Global flag to indicate we should check to see if there are
284       *  block/undo files that should be deleted.  Set on startup
285       *  or if we allocate more file space when we're in prune mode
286       */
287      bool m_check_for_pruning = false;
288  
289      const bool m_prune_mode;
290  
291      const Obfuscation m_obfuscation;
292  
293      /**
294       * Map from external index name to oldest block that must not be pruned.
295       *
296       * @note Internally, only blocks at height (height_first - PRUNE_LOCK_BUFFER - 1) and
297       * below will be pruned, but callers should avoid assuming any particular buffer size.
298       */
299      std::unordered_map<std::string, PruneLockInfo> m_prune_locks GUARDED_BY(::cs_main);
300  
301      BlockfileType BlockfileTypeForHeight(int height);
302  
303      const kernel::BlockManagerOpts m_opts;
304  
305      const FlatFileSeq m_block_file_seq;
306      const FlatFileSeq m_undo_file_seq;
307  
308  protected:
309      std::vector<CBlockFileInfo> m_blockfile_info;
310  
311      /** Dirty block index entries. */
312      std::set<CBlockIndex*> m_dirty_blockindex;
313  
314      /** Dirty block file entries. */
315      std::set<int> m_dirty_fileinfo;
316  
317  public:
318      using Options = kernel::BlockManagerOpts;
319      using ReadRawBlockResult = util::Expected<std::vector<std::byte>, ReadRawError>;
320  
321      explicit BlockManager(const util::SignalInterrupt& interrupt, Options opts);
322  
323      const util::SignalInterrupt& m_interrupt;
324      std::atomic<bool> m_importing{false};
325  
326      /**
327       * Whether all blockfiles have been added to the block tree database.
328       * Normally true, but set to false when a reindex is requested and the
329       * database is wiped. The value is persisted in the database across restarts
330       * and will be false until reindexing completes.
331       */
332      std::atomic_bool m_blockfiles_indexed{true};
333  
334      BlockMap m_block_index GUARDED_BY(cs_main);
335  
336      /**
337       * The height of the base block of an assumeutxo snapshot, if one is in use.
338       *
339       * This controls how blockfiles are segmented by chainstate type to avoid
340       * comingling different height regions of the chain when an assumedvalid chainstate
341       * is in use. If heights are drastically different in the same blockfile, pruning
342       * suffers.
343       *
344       * This is set during ActivateSnapshot() or upon LoadBlockIndex() if a snapshot
345       * had been previously loaded. After the snapshot is validated, this is unset to
346       * restore normal LoadBlockIndex behavior.
347       */
348      std::optional<int> m_snapshot_height;
349  
350      std::vector<CBlockIndex*> GetAllBlockIndices() EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
351  
352      /**
353       * All pairs A->B, where A (or one of its ancestors) misses transactions, but B has transactions.
354       * Pruned nodes may have entries where B is missing data.
355       */
356      std::multimap<CBlockIndex*, CBlockIndex*> m_blocks_unlinked;
357  
358      std::unique_ptr<BlockTreeDB> m_block_tree_db GUARDED_BY(::cs_main);
359  
360      void WriteBlockIndexDB() EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
361      bool LoadBlockIndexDB(const std::optional<uint256>& snapshot_blockhash)
362          EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
363  
364      /**
365       * Remove any pruned block & undo files that are still on disk.
366       * This could happen on some systems if the file was still being read while unlinked,
367       * or if we crash before unlinking.
368       */
369      void ScanAndUnlinkAlreadyPrunedFiles() EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
370  
371      CBlockIndex* AddToBlockIndex(const CBlockHeader& block, CBlockIndex*& best_header) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
372      /** Create a new block index entry for a given block hash */
373      CBlockIndex* InsertBlockIndex(const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
374  
375      //! Mark one block file as pruned (modify associated database entries)
376      void PruneOneBlockFile(int fileNumber) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
377  
378      CBlockIndex* LookupBlockIndex(const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(cs_main);
379      const CBlockIndex* LookupBlockIndex(const uint256& hash) const EXCLUSIVE_LOCKS_REQUIRED(cs_main);
380  
381      /** Get block file info entry for one block file */
382      CBlockFileInfo* GetBlockFileInfo(size_t n);
383  
384      bool WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex& block)
385          EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
386  
387      /** Store block on disk and update block file statistics.
388       *
389       * @param[in]  block        the block to be stored
390       * @param[in]  nHeight      the height of the block
391       *
392       * @returns in case of success, the position to which the block was written to
393       *          in case of an error, an empty FlatFilePos
394       */
395      FlatFilePos WriteBlock(const CBlock& block, int nHeight);
396  
397      /** Update blockfile info while processing a block during reindex. The block must be available on disk.
398       *
399       * @param[in]  block        the block being processed
400       * @param[in]  nHeight      the height of the block
401       * @param[in]  pos          the position of the serialized CBlock on disk
402       */
403      void UpdateBlockInfo(const CBlock& block, unsigned int nHeight, const FlatFilePos& pos);
404  
405      /** Whether running in -prune mode. */
406      [[nodiscard]] bool IsPruneMode() const { return m_prune_mode; }
407  
408      /** Attempt to stay below this number of bytes of block files. */
409      [[nodiscard]] uint64_t GetPruneTarget() const { return m_opts.prune_target; }
410      static constexpr auto PRUNE_TARGET_MANUAL{std::numeric_limits<uint64_t>::max()};
411  
412      [[nodiscard]] bool LoadingBlocks() const { return m_importing || !m_blockfiles_indexed; }
413  
414      /** Calculate the amount of disk space the block & undo files currently use */
415      uint64_t CalculateCurrentUsage();
416  
417      //! Check if all blocks in the [upper_block, lower_block] range have data available as
418      //! defined by the status mask.
419      //! The caller is responsible for ensuring that lower_block is an ancestor of upper_block
420      //! (part of the same chain).
421      bool CheckBlockDataAvailability(const CBlockIndex& upper_block, const CBlockIndex& lower_block, BlockStatus block_status = BLOCK_HAVE_DATA) EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
422  
423      /**
424       * @brief Returns the earliest block with specified `status_mask` flags set after
425       * the latest block _not_ having those flags.
426       *
427       * This function starts from `upper_block`, which must have all
428       * `status_mask` flags set, and iterates backwards through its ancestors. It
429       * continues as long as each block has all `status_mask` flags set, until
430       * reaching the oldest ancestor or `lower_block`.
431       *
432       * @pre `upper_block` must have all `status_mask` flags set.
433       * @pre `lower_block` must be null or an ancestor of `upper_block`
434       *
435       * @param upper_block The starting block for the search, which must have all
436       *                    `status_mask` flags set.
437       * @param status_mask Bitmask specifying required status flags.
438       * @param lower_block The earliest possible block to return. If null, the
439       *                    search can extend to the genesis block.
440       *
441       * @return A reference to the earliest block between `upper_block`
442       *         and `lower_block`, inclusive, such that every block between the
443       *         returned block and `upper_block` has `status_mask` flags set.
444       */
445      const CBlockIndex& GetFirstBlock(
446          const CBlockIndex& upper_block LIFETIMEBOUND,
447          uint32_t status_mask,
448          const CBlockIndex* lower_block LIFETIMEBOUND = nullptr
449      ) const EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
450  
451      /** True if any block files have ever been pruned. */
452      bool m_have_pruned = false;
453  
454      //! Check whether the block associated with this index entry is pruned or not.
455      bool IsBlockPruned(const CBlockIndex& block) const EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
456  
457      //! Create or update a prune lock identified by its name
458      void UpdatePruneLock(const std::string& name, const PruneLockInfo& lock_info) EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
459  
460      //! Delete a prune lock identified by its name. Returns true if the lock existed.
461      bool DeletePruneLock(const std::string& name) EXCLUSIVE_LOCKS_REQUIRED(::cs_main);
462  
463      /** Open a block file (blk?????.dat) */
464      AutoFile OpenBlockFile(const FlatFilePos& pos, bool fReadOnly) const;
465  
466      /** Translation to a filesystem path */
467      fs::path GetBlockPosFilename(const FlatFilePos& pos) const;
468  
469      /**
470       *  Actually unlink the specified files
471       */
472      void UnlinkPrunedFiles(const std::set<int>& setFilesToPrune) const;
473  
474      /** Functions for disk access for blocks */
475      bool ReadBlock(CBlock& block, const FlatFilePos& pos, const std::optional<uint256>& expected_hash) const;
476      bool ReadBlock(CBlock& block, const CBlockIndex& index) const;
477      ReadRawBlockResult ReadRawBlock(const FlatFilePos& pos, std::optional<std::pair<size_t, size_t>> block_part = std::nullopt) const;
478  
479      bool ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index) const;
480  
481      void CleanupBlockRevFiles() const;
482  };
483  
484  // Calls ActivateBestChain() even if no blocks are imported.
485  void ImportBlocks(ChainstateManager& chainman, std::span<const fs::path> import_paths);
486  } // namespace node
487  
488  #endif // BITCOIN_NODE_BLOCKSTORAGE_H