blockstorage.h
1 // Copyright (c) 2011-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #ifndef BITCOIN_NODE_BLOCKSTORAGE_H 6 #define BITCOIN_NODE_BLOCKSTORAGE_H 7 8 #include <attributes.h> 9 #include <chain.h> 10 #include <dbwrapper.h> 11 #include <flatfile.h> 12 #include <kernel/blockmanager_opts.h> 13 #include <kernel/chainparams.h> 14 #include <kernel/cs_main.h> 15 #include <kernel/messagestartchars.h> 16 #include <primitives/block.h> 17 #include <serialize.h> 18 #include <streams.h> 19 #include <sync.h> 20 #include <uint256.h> 21 #include <util/byte_units.h> // IWYU pragma: keep 22 #include <util/expected.h> 23 #include <util/fs.h> 24 #include <util/hasher.h> 25 #include <util/obfuscation.h> 26 27 #include <algorithm> 28 #include <array> 29 #include <atomic> 30 #include <cstddef> 31 #include <cstdint> 32 #include <functional> 33 #include <iosfwd> 34 #include <limits> 35 #include <map> 36 #include <memory> 37 #include <optional> 38 #include <set> 39 #include <span> 40 #include <string> 41 #include <unordered_map> 42 #include <utility> 43 #include <vector> 44 45 class BlockValidationState; 46 class CBlockUndo; 47 class Chainstate; 48 class ChainstateManager; 49 namespace Consensus { 50 struct Params; 51 } 52 namespace util { 53 class SignalInterrupt; 54 } // namespace util 55 56 namespace kernel { 57 class CBlockFileInfo 58 { 59 public: 60 uint32_t nBlocks{}; //!< number of blocks stored in file 61 uint32_t nSize{}; //!< number of used bytes of block file 62 uint32_t nUndoSize{}; //!< number of used bytes in the undo file 63 uint32_t nHeightFirst{}; //!< lowest height of block in file 64 uint32_t nHeightLast{}; //!< highest height of block in file 65 uint64_t nTimeFirst{}; //!< earliest time of block in file 66 uint64_t nTimeLast{}; //!< latest time of block in file 67 68 SERIALIZE_METHODS(CBlockFileInfo, obj) 69 { 70 READWRITE(VARINT(obj.nBlocks)); 71 READWRITE(VARINT(obj.nSize)); 72 READWRITE(VARINT(obj.nUndoSize)); 73 READWRITE(VARINT(obj.nHeightFirst)); 74 READWRITE(VARINT(obj.nHeightLast)); 75 READWRITE(VARINT(obj.nTimeFirst)); 76 READWRITE(VARINT(obj.nTimeLast)); 77 } 78 79 CBlockFileInfo() = default; 80 81 std::string ToString() const; 82 83 /** update statistics (does not update nSize) */ 84 void AddBlock(unsigned int nHeightIn, uint64_t nTimeIn) 85 { 86 if (nBlocks == 0 || nHeightFirst > nHeightIn) 87 nHeightFirst = nHeightIn; 88 if (nBlocks == 0 || nTimeFirst > nTimeIn) 89 nTimeFirst = nTimeIn; 90 nBlocks++; 91 if (nHeightIn > nHeightLast) 92 nHeightLast = nHeightIn; 93 if (nTimeIn > nTimeLast) 94 nTimeLast = nTimeIn; 95 } 96 }; 97 98 /** Access to the block database (blocks/index/) */ 99 class BlockTreeDB : public CDBWrapper 100 { 101 public: 102 using CDBWrapper::CDBWrapper; 103 void WriteBatchSync(const std::vector<std::pair<int, const CBlockFileInfo*>>& fileInfo, int nLastFile, const std::vector<const CBlockIndex*>& blockinfo); 104 bool ReadBlockFileInfo(int nFile, CBlockFileInfo& info); 105 bool ReadLastBlockFile(int& nFile); 106 void WriteReindexing(bool fReindexing); 107 void ReadReindexing(bool& fReindexing); 108 void WriteFlag(const std::string& name, bool fValue); 109 bool ReadFlag(const std::string& name, bool& fValue); 110 bool LoadBlockIndexGuts(const Consensus::Params& consensusParams, std::function<CBlockIndex*(const uint256&)> insertBlockIndex, const util::SignalInterrupt& interrupt) 111 EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 112 }; 113 } // namespace kernel 114 115 namespace node { 116 using kernel::CBlockFileInfo; 117 using kernel::BlockTreeDB; 118 119 /** The pre-allocation chunk size for blk?????.dat files (since 0.8) */ 120 static const unsigned int BLOCKFILE_CHUNK_SIZE{16_MiB}; 121 /** The pre-allocation chunk size for rev?????.dat files (since 0.8) */ 122 static const unsigned int UNDOFILE_CHUNK_SIZE{1_MiB}; 123 /** The maximum size of a blk?????.dat file (since 0.8) */ 124 static const unsigned int MAX_BLOCKFILE_SIZE{128_MiB}; 125 126 /** Size of header written by WriteBlock before a serialized CBlock (8 bytes) */ 127 static constexpr uint32_t STORAGE_HEADER_BYTES{std::tuple_size_v<MessageStartChars> + sizeof(unsigned int)}; 128 129 /** Total overhead when writing undo data: header (8 bytes) plus checksum (32 bytes) */ 130 static constexpr uint32_t UNDO_DATA_DISK_OVERHEAD{STORAGE_HEADER_BYTES + uint256::size()}; 131 132 // Because validation code takes pointers to the map's CBlockIndex objects, if 133 // we ever switch to another associative container, we need to either use a 134 // container that has stable addressing (true of all std associative 135 // containers), or make the key a `std::unique_ptr<CBlockIndex>` 136 using BlockMap = std::unordered_map<uint256, CBlockIndex, BlockHasher>; 137 138 struct CBlockIndexWorkComparator { 139 bool operator()(const CBlockIndex* pa, const CBlockIndex* pb) const; 140 using is_transparent = void; 141 }; 142 143 struct CBlockIndexHeightOnlyComparator { 144 /* Only compares the height of two block indices, doesn't try to tie-break */ 145 bool operator()(const CBlockIndex* pa, const CBlockIndex* pb) const; 146 }; 147 148 struct PruneLockInfo { 149 /// Height of earliest block that should be kept and not pruned 150 int height_first{std::numeric_limits<int>::max()}; 151 }; 152 153 enum BlockfileType { 154 // Values used as array indexes - do not change carelessly. 155 NORMAL = 0, 156 ASSUMED = 1, 157 NUM_TYPES = 2, 158 }; 159 160 std::ostream& operator<<(std::ostream& os, const BlockfileType& type); 161 162 struct BlockfileCursor { 163 // The latest blockfile number. 164 int file_num{0}; 165 166 // Track the height of the highest block in file_num whose undo 167 // data has been written. Block data is written to block files in download 168 // order, but is written to undo files in validation order, which is 169 // usually in order by height. To avoid wasting disk space, undo files will 170 // be trimmed whenever the corresponding block file is finalized and 171 // the height of the highest block written to the block file equals the 172 // height of the highest block written to the undo file. This is a 173 // heuristic and can sometimes preemptively trim undo files that will write 174 // more data later, and sometimes fail to trim undo files that can't have 175 // more data written later. 176 int undo_height{0}; 177 }; 178 179 std::ostream& operator<<(std::ostream& os, const BlockfileCursor& cursor); 180 181 enum class ReadRawError { 182 IO, 183 BadPartRange, 184 }; 185 186 /** 187 * Maintains a tree of blocks (stored in `m_block_index`) which is consulted 188 * to determine where the most-work tip is. 189 * 190 * This data is used mostly in `Chainstate` - information about, e.g., 191 * candidate tips is not maintained here. 192 */ 193 class BlockManager 194 { 195 friend Chainstate; 196 friend ChainstateManager; 197 198 private: 199 const CChainParams& GetParams() const { return m_opts.chainparams; } 200 const Consensus::Params& GetConsensus() const { return m_opts.chainparams.GetConsensus(); } 201 /** 202 * Load the blocktree off disk and into memory. Populate certain metadata 203 * per index entry (nStatus, nChainWork, nTimeMax, etc.) as well as peripheral 204 * collections like m_dirty_blockindex. 205 */ 206 bool LoadBlockIndex(const std::optional<uint256>& snapshot_blockhash) 207 EXCLUSIVE_LOCKS_REQUIRED(cs_main); 208 209 /** Return false if block file or undo file flushing fails. */ 210 [[nodiscard]] bool FlushBlockFile(int blockfile_num, bool fFinalize, bool finalize_undo); 211 212 /** Return false if undo file flushing fails. */ 213 [[nodiscard]] bool FlushUndoFile(int block_file, bool finalize = false); 214 215 /** 216 * Helper function performing various preparations before a block can be saved to disk: 217 * Returns the correct position for the block to be saved, which may be in the current or a new 218 * block file depending on nAddSize. May flush the previous blockfile to disk if full, updates 219 * blockfile info, and checks if there is enough disk space to save the block. 220 * 221 * The nAddSize argument passed to this function should include not just the size of the serialized CBlock, but also the size of 222 * separator fields (STORAGE_HEADER_BYTES). 223 */ 224 [[nodiscard]] FlatFilePos FindNextBlockPos(unsigned int nAddSize, unsigned int nHeight, uint64_t nTime); 225 [[nodiscard]] bool FlushChainstateBlockFile(int tip_height); 226 bool FindUndoPos(BlockValidationState& state, int nFile, FlatFilePos& pos, unsigned int nAddSize); 227 228 AutoFile OpenUndoFile(const FlatFilePos& pos, bool fReadOnly = false) const; 229 230 /* Calculate the block/rev files to delete based on height specified by user with RPC command pruneblockchain */ 231 void FindFilesToPruneManual( 232 std::set<int>& setFilesToPrune, 233 int nManualPruneHeight, 234 const Chainstate& chain); 235 236 /** 237 * Prune block and undo files (blk???.dat and rev???.dat) so that the disk space used is less than a user-defined target. 238 * The user sets the target (in MB) on the command line or in config file. This will be run on startup and whenever new 239 * space is allocated in a block or undo file, staying below the target. Changing back to unpruned requires a reindex 240 * (which in this case means the blockchain must be re-downloaded.) 241 * 242 * Pruning functions are called from FlushStateToDisk when the m_check_for_pruning flag has been set. 243 * Block and undo files are deleted in lock-step (when blk00003.dat is deleted, so is rev00003.dat.) 244 * Pruning cannot take place until the longest chain is at least a certain length (CChainParams::nPruneAfterHeight). 245 * Pruning will never delete a block within a defined distance (currently 288) from the active chain's tip. 246 * The block index is updated by unsetting HAVE_DATA and HAVE_UNDO for any blocks that were stored in the deleted files. 247 * A db flag records the fact that at least some block files have been pruned. 248 * 249 * @param[out] setFilesToPrune The set of file indices that can be unlinked will be returned 250 * @param last_prune The last height we're able to prune, according to the prune locks 251 */ 252 void FindFilesToPrune( 253 std::set<int>& setFilesToPrune, 254 int last_prune, 255 const Chainstate& chain, 256 ChainstateManager& chainman); 257 258 RecursiveMutex cs_LastBlockFile; 259 260 //! Since assumedvalid chainstates may be syncing a range of the chain that is very 261 //! far away from the normal/background validation process, we should segment blockfiles 262 //! for assumed chainstates. Otherwise, we might have wildly different height ranges 263 //! mixed into the same block files, which would impair our ability to prune 264 //! effectively. 265 //! 266 //! This data structure maintains separate blockfile number cursors for each 267 //! BlockfileType. The ASSUMED state is initialized, when necessary, in FindNextBlockPos(). 268 //! 269 //! The first element is the NORMAL cursor, second is ASSUMED. 270 std::array<std::optional<BlockfileCursor>, BlockfileType::NUM_TYPES> 271 m_blockfile_cursors GUARDED_BY(cs_LastBlockFile) = { 272 BlockfileCursor{}, 273 std::nullopt, 274 }; 275 int MaxBlockfileNum() const EXCLUSIVE_LOCKS_REQUIRED(cs_LastBlockFile) 276 { 277 static const BlockfileCursor empty_cursor; 278 const auto& normal = m_blockfile_cursors[BlockfileType::NORMAL].value_or(empty_cursor); 279 const auto& assumed = m_blockfile_cursors[BlockfileType::ASSUMED].value_or(empty_cursor); 280 return std::max(normal.file_num, assumed.file_num); 281 } 282 283 /** Global flag to indicate we should check to see if there are 284 * block/undo files that should be deleted. Set on startup 285 * or if we allocate more file space when we're in prune mode 286 */ 287 bool m_check_for_pruning = false; 288 289 const bool m_prune_mode; 290 291 const Obfuscation m_obfuscation; 292 293 /** 294 * Map from external index name to oldest block that must not be pruned. 295 * 296 * @note Internally, only blocks at height (height_first - PRUNE_LOCK_BUFFER - 1) and 297 * below will be pruned, but callers should avoid assuming any particular buffer size. 298 */ 299 std::unordered_map<std::string, PruneLockInfo> m_prune_locks GUARDED_BY(::cs_main); 300 301 BlockfileType BlockfileTypeForHeight(int height); 302 303 const kernel::BlockManagerOpts m_opts; 304 305 const FlatFileSeq m_block_file_seq; 306 const FlatFileSeq m_undo_file_seq; 307 308 protected: 309 std::vector<CBlockFileInfo> m_blockfile_info; 310 311 /** Dirty block index entries. */ 312 std::set<CBlockIndex*> m_dirty_blockindex; 313 314 /** Dirty block file entries. */ 315 std::set<int> m_dirty_fileinfo; 316 317 public: 318 using Options = kernel::BlockManagerOpts; 319 using ReadRawBlockResult = util::Expected<std::vector<std::byte>, ReadRawError>; 320 321 explicit BlockManager(const util::SignalInterrupt& interrupt, Options opts); 322 323 const util::SignalInterrupt& m_interrupt; 324 std::atomic<bool> m_importing{false}; 325 326 /** 327 * Whether all blockfiles have been added to the block tree database. 328 * Normally true, but set to false when a reindex is requested and the 329 * database is wiped. The value is persisted in the database across restarts 330 * and will be false until reindexing completes. 331 */ 332 std::atomic_bool m_blockfiles_indexed{true}; 333 334 BlockMap m_block_index GUARDED_BY(cs_main); 335 336 /** 337 * The height of the base block of an assumeutxo snapshot, if one is in use. 338 * 339 * This controls how blockfiles are segmented by chainstate type to avoid 340 * comingling different height regions of the chain when an assumedvalid chainstate 341 * is in use. If heights are drastically different in the same blockfile, pruning 342 * suffers. 343 * 344 * This is set during ActivateSnapshot() or upon LoadBlockIndex() if a snapshot 345 * had been previously loaded. After the snapshot is validated, this is unset to 346 * restore normal LoadBlockIndex behavior. 347 */ 348 std::optional<int> m_snapshot_height; 349 350 std::vector<CBlockIndex*> GetAllBlockIndices() EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 351 352 /** 353 * All pairs A->B, where A (or one of its ancestors) misses transactions, but B has transactions. 354 * Pruned nodes may have entries where B is missing data. 355 */ 356 std::multimap<CBlockIndex*, CBlockIndex*> m_blocks_unlinked; 357 358 std::unique_ptr<BlockTreeDB> m_block_tree_db GUARDED_BY(::cs_main); 359 360 void WriteBlockIndexDB() EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 361 bool LoadBlockIndexDB(const std::optional<uint256>& snapshot_blockhash) 362 EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 363 364 /** 365 * Remove any pruned block & undo files that are still on disk. 366 * This could happen on some systems if the file was still being read while unlinked, 367 * or if we crash before unlinking. 368 */ 369 void ScanAndUnlinkAlreadyPrunedFiles() EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 370 371 CBlockIndex* AddToBlockIndex(const CBlockHeader& block, CBlockIndex*& best_header) EXCLUSIVE_LOCKS_REQUIRED(cs_main); 372 /** Create a new block index entry for a given block hash */ 373 CBlockIndex* InsertBlockIndex(const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(cs_main); 374 375 //! Mark one block file as pruned (modify associated database entries) 376 void PruneOneBlockFile(int fileNumber) EXCLUSIVE_LOCKS_REQUIRED(cs_main); 377 378 CBlockIndex* LookupBlockIndex(const uint256& hash) EXCLUSIVE_LOCKS_REQUIRED(cs_main); 379 const CBlockIndex* LookupBlockIndex(const uint256& hash) const EXCLUSIVE_LOCKS_REQUIRED(cs_main); 380 381 /** Get block file info entry for one block file */ 382 CBlockFileInfo* GetBlockFileInfo(size_t n); 383 384 bool WriteBlockUndo(const CBlockUndo& blockundo, BlockValidationState& state, CBlockIndex& block) 385 EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 386 387 /** Store block on disk and update block file statistics. 388 * 389 * @param[in] block the block to be stored 390 * @param[in] nHeight the height of the block 391 * 392 * @returns in case of success, the position to which the block was written to 393 * in case of an error, an empty FlatFilePos 394 */ 395 FlatFilePos WriteBlock(const CBlock& block, int nHeight); 396 397 /** Update blockfile info while processing a block during reindex. The block must be available on disk. 398 * 399 * @param[in] block the block being processed 400 * @param[in] nHeight the height of the block 401 * @param[in] pos the position of the serialized CBlock on disk 402 */ 403 void UpdateBlockInfo(const CBlock& block, unsigned int nHeight, const FlatFilePos& pos); 404 405 /** Whether running in -prune mode. */ 406 [[nodiscard]] bool IsPruneMode() const { return m_prune_mode; } 407 408 /** Attempt to stay below this number of bytes of block files. */ 409 [[nodiscard]] uint64_t GetPruneTarget() const { return m_opts.prune_target; } 410 static constexpr auto PRUNE_TARGET_MANUAL{std::numeric_limits<uint64_t>::max()}; 411 412 [[nodiscard]] bool LoadingBlocks() const { return m_importing || !m_blockfiles_indexed; } 413 414 /** Calculate the amount of disk space the block & undo files currently use */ 415 uint64_t CalculateCurrentUsage(); 416 417 //! Check if all blocks in the [upper_block, lower_block] range have data available as 418 //! defined by the status mask. 419 //! The caller is responsible for ensuring that lower_block is an ancestor of upper_block 420 //! (part of the same chain). 421 bool CheckBlockDataAvailability(const CBlockIndex& upper_block, const CBlockIndex& lower_block, BlockStatus block_status = BLOCK_HAVE_DATA) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 422 423 /** 424 * @brief Returns the earliest block with specified `status_mask` flags set after 425 * the latest block _not_ having those flags. 426 * 427 * This function starts from `upper_block`, which must have all 428 * `status_mask` flags set, and iterates backwards through its ancestors. It 429 * continues as long as each block has all `status_mask` flags set, until 430 * reaching the oldest ancestor or `lower_block`. 431 * 432 * @pre `upper_block` must have all `status_mask` flags set. 433 * @pre `lower_block` must be null or an ancestor of `upper_block` 434 * 435 * @param upper_block The starting block for the search, which must have all 436 * `status_mask` flags set. 437 * @param status_mask Bitmask specifying required status flags. 438 * @param lower_block The earliest possible block to return. If null, the 439 * search can extend to the genesis block. 440 * 441 * @return A reference to the earliest block between `upper_block` 442 * and `lower_block`, inclusive, such that every block between the 443 * returned block and `upper_block` has `status_mask` flags set. 444 */ 445 const CBlockIndex& GetFirstBlock( 446 const CBlockIndex& upper_block LIFETIMEBOUND, 447 uint32_t status_mask, 448 const CBlockIndex* lower_block LIFETIMEBOUND = nullptr 449 ) const EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 450 451 /** True if any block files have ever been pruned. */ 452 bool m_have_pruned = false; 453 454 //! Check whether the block associated with this index entry is pruned or not. 455 bool IsBlockPruned(const CBlockIndex& block) const EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 456 457 //! Create or update a prune lock identified by its name 458 void UpdatePruneLock(const std::string& name, const PruneLockInfo& lock_info) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 459 460 //! Delete a prune lock identified by its name. Returns true if the lock existed. 461 bool DeletePruneLock(const std::string& name) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); 462 463 /** Open a block file (blk?????.dat) */ 464 AutoFile OpenBlockFile(const FlatFilePos& pos, bool fReadOnly) const; 465 466 /** Translation to a filesystem path */ 467 fs::path GetBlockPosFilename(const FlatFilePos& pos) const; 468 469 /** 470 * Actually unlink the specified files 471 */ 472 void UnlinkPrunedFiles(const std::set<int>& setFilesToPrune) const; 473 474 /** Functions for disk access for blocks */ 475 bool ReadBlock(CBlock& block, const FlatFilePos& pos, const std::optional<uint256>& expected_hash) const; 476 bool ReadBlock(CBlock& block, const CBlockIndex& index) const; 477 ReadRawBlockResult ReadRawBlock(const FlatFilePos& pos, std::optional<std::pair<size_t, size_t>> block_part = std::nullopt) const; 478 479 bool ReadBlockUndo(CBlockUndo& blockundo, const CBlockIndex& index) const; 480 481 void CleanupBlockRevFiles() const; 482 }; 483 484 // Calls ActivateBestChain() even if no blocks are imported. 485 void ImportBlocks(ChainstateManager& chainman, std::span<const fs::path> import_paths); 486 } // namespace node 487 488 #endif // BITCOIN_NODE_BLOCKSTORAGE_H