blockfilterindex.cpp
1 // Copyright (c) 2018-2022 The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <map> 6 7 #include <clientversion.h> 8 #include <common/args.h> 9 #include <dbwrapper.h> 10 #include <hash.h> 11 #include <index/blockfilterindex.h> 12 #include <logging.h> 13 #include <node/blockstorage.h> 14 #include <undo.h> 15 #include <util/fs_helpers.h> 16 #include <validation.h> 17 18 /* The index database stores three items for each block: the disk location of the encoded filter, 19 * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by 20 * height, and those belonging to blocks that have been reorganized out of the active chain are 21 * indexed by block hash. This ensures that filter data for any block that becomes part of the 22 * active chain can always be retrieved, alleviating timing concerns. 23 * 24 * The filters themselves are stored in flat files and referenced by the LevelDB entries. This 25 * minimizes the amount of data written to LevelDB and keeps the database values constant size. The 26 * disk location of the next block filter to be written (represented as a FlatFilePos) is stored 27 * under the DB_FILTER_POS key. 28 * 29 * Keys for the height index have the type [DB_BLOCK_HEIGHT, uint32 (BE)]. The height is represented 30 * as big-endian so that sequential reads of filters by height are fast. 31 * Keys for the hash index have the type [DB_BLOCK_HASH, uint256]. 32 */ 33 constexpr uint8_t DB_BLOCK_HASH{'s'}; 34 constexpr uint8_t DB_BLOCK_HEIGHT{'t'}; 35 constexpr uint8_t DB_FILTER_POS{'P'}; 36 37 constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB 38 /** The pre-allocation chunk size for fltr?????.dat files */ 39 constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB 40 /** Maximum size of the cfheaders cache 41 * We have a limit to prevent a bug in filling this cache 42 * potentially turning into an OOM. At 2000 entries, this cache 43 * is big enough for a 2,000,000 length block chain, which 44 * we should be enough until ~2047. */ 45 constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000}; 46 47 namespace { 48 49 struct DBVal { 50 uint256 hash; 51 uint256 header; 52 FlatFilePos pos; 53 54 SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); } 55 }; 56 57 struct DBHeightKey { 58 int height; 59 60 explicit DBHeightKey(int height_in) : height(height_in) {} 61 62 template<typename Stream> 63 void Serialize(Stream& s) const 64 { 65 ser_writedata8(s, DB_BLOCK_HEIGHT); 66 ser_writedata32be(s, height); 67 } 68 69 template<typename Stream> 70 void Unserialize(Stream& s) 71 { 72 const uint8_t prefix{ser_readdata8(s)}; 73 if (prefix != DB_BLOCK_HEIGHT) { 74 throw std::ios_base::failure("Invalid format for block filter index DB height key"); 75 } 76 height = ser_readdata32be(s); 77 } 78 }; 79 80 struct DBHashKey { 81 uint256 hash; 82 83 explicit DBHashKey(const uint256& hash_in) : hash(hash_in) {} 84 85 SERIALIZE_METHODS(DBHashKey, obj) { 86 uint8_t prefix{DB_BLOCK_HASH}; 87 READWRITE(prefix); 88 if (prefix != DB_BLOCK_HASH) { 89 throw std::ios_base::failure("Invalid format for block filter index DB hash key"); 90 } 91 92 READWRITE(obj.hash); 93 } 94 }; 95 96 }; // namespace 97 98 static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes; 99 100 BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type, 101 size_t n_cache_size, bool f_memory, bool f_wipe) 102 : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index") 103 , m_filter_type(filter_type) 104 { 105 const std::string& filter_name = BlockFilterTypeName(filter_type); 106 if (filter_name.empty()) throw std::invalid_argument("unknown filter_type"); 107 108 fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name); 109 fs::create_directories(path); 110 111 m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe); 112 m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE); 113 } 114 115 bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockKey>& block) 116 { 117 if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) { 118 // Check that the cause of the read failure is that the key does not exist. Any other errors 119 // indicate database corruption or a disk failure, and starting the index would cause 120 // further corruption. 121 if (m_db->Exists(DB_FILTER_POS)) { 122 LogError("%s: Cannot read current %s state; index may be corrupted\n", 123 __func__, GetName()); 124 return false; 125 } 126 127 // If the DB_FILTER_POS is not set, then initialize to the first location. 128 m_next_filter_pos.nFile = 0; 129 m_next_filter_pos.nPos = 0; 130 } 131 132 if (block) { 133 auto op_last_header = ReadFilterHeader(block->height, block->hash); 134 if (!op_last_header) { 135 LogError("Cannot read last block filter header; index may be corrupted\n"); 136 return false; 137 } 138 m_last_header = *op_last_header; 139 } 140 141 return true; 142 } 143 144 bool BlockFilterIndex::CustomCommit(CDBBatch& batch) 145 { 146 const FlatFilePos& pos = m_next_filter_pos; 147 148 // Flush current filter file to disk. 149 AutoFile file{m_filter_fileseq->Open(pos)}; 150 if (file.IsNull()) { 151 LogError("%s: Failed to open filter file %d\n", __func__, pos.nFile); 152 return false; 153 } 154 if (!FileCommit(file.Get())) { 155 LogError("%s: Failed to commit filter file %d\n", __func__, pos.nFile); 156 return false; 157 } 158 159 batch.Write(DB_FILTER_POS, pos); 160 return true; 161 } 162 163 bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const 164 { 165 AutoFile filein{m_filter_fileseq->Open(pos, true)}; 166 if (filein.IsNull()) { 167 return false; 168 } 169 170 // Check that the hash of the encoded_filter matches the one stored in the db. 171 uint256 block_hash; 172 std::vector<uint8_t> encoded_filter; 173 try { 174 filein >> block_hash >> encoded_filter; 175 if (Hash(encoded_filter) != hash) { 176 LogError("Checksum mismatch in filter decode.\n"); 177 return false; 178 } 179 filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true); 180 } 181 catch (const std::exception& e) { 182 LogError("%s: Failed to deserialize block filter from disk: %s\n", __func__, e.what()); 183 return false; 184 } 185 186 return true; 187 } 188 189 size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter) 190 { 191 assert(filter.GetFilterType() == GetFilterType()); 192 193 size_t data_size = 194 GetSerializeSize(filter.GetBlockHash()) + 195 GetSerializeSize(filter.GetEncodedFilter()); 196 197 // If writing the filter would overflow the file, flush and move to the next one. 198 if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) { 199 AutoFile last_file{m_filter_fileseq->Open(pos)}; 200 if (last_file.IsNull()) { 201 LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile); 202 return 0; 203 } 204 if (!TruncateFile(last_file.Get(), pos.nPos)) { 205 LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile); 206 return 0; 207 } 208 if (!FileCommit(last_file.Get())) { 209 LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile); 210 return 0; 211 } 212 213 pos.nFile++; 214 pos.nPos = 0; 215 } 216 217 // Pre-allocate sufficient space for filter data. 218 bool out_of_space; 219 m_filter_fileseq->Allocate(pos, data_size, out_of_space); 220 if (out_of_space) { 221 LogPrintf("%s: out of disk space\n", __func__); 222 return 0; 223 } 224 225 AutoFile fileout{m_filter_fileseq->Open(pos)}; 226 if (fileout.IsNull()) { 227 LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile); 228 return 0; 229 } 230 231 fileout << filter.GetBlockHash() << filter.GetEncodedFilter(); 232 return data_size; 233 } 234 235 std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash) 236 { 237 std::pair<uint256, DBVal> read_out; 238 if (!m_db->Read(DBHeightKey(height), read_out)) { 239 return std::nullopt; 240 } 241 242 if (read_out.first != expected_block_hash) { 243 LogError("%s: previous block header belongs to unexpected block %s; expected %s\n", 244 __func__, read_out.first.ToString(), expected_block_hash.ToString()); 245 return std::nullopt; 246 } 247 248 return read_out.second.header; 249 } 250 251 bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block) 252 { 253 CBlockUndo block_undo; 254 255 if (block.height > 0) { 256 // pindex variable gives indexing code access to node internals. It 257 // will be removed in upcoming commit 258 const CBlockIndex* pindex = WITH_LOCK(cs_main, return m_chainstate->m_blockman.LookupBlockIndex(block.hash)); 259 if (!m_chainstate->m_blockman.UndoReadFromDisk(block_undo, *pindex)) { 260 return false; 261 } 262 } 263 264 BlockFilter filter(m_filter_type, *Assert(block.data), block_undo); 265 266 const uint256& header = filter.ComputeHeader(m_last_header); 267 bool res = Write(filter, block.height, header); 268 if (res) m_last_header = header; // update last header 269 return res; 270 } 271 272 bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header) 273 { 274 size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter); 275 if (bytes_written == 0) return false; 276 277 std::pair<uint256, DBVal> value; 278 value.first = filter.GetBlockHash(); 279 value.second.hash = filter.GetHash(); 280 value.second.header = filter_header; 281 value.second.pos = m_next_filter_pos; 282 283 if (!m_db->Write(DBHeightKey(block_height), value)) { 284 return false; 285 } 286 287 m_next_filter_pos.nPos += bytes_written; 288 return true; 289 } 290 291 [[nodiscard]] static bool CopyHeightIndexToHashIndex(CDBIterator& db_it, CDBBatch& batch, 292 const std::string& index_name, 293 int start_height, int stop_height) 294 { 295 DBHeightKey key(start_height); 296 db_it.Seek(key); 297 298 for (int height = start_height; height <= stop_height; ++height) { 299 if (!db_it.GetKey(key) || key.height != height) { 300 LogError("%s: unexpected key in %s: expected (%c, %d)\n", 301 __func__, index_name, DB_BLOCK_HEIGHT, height); 302 return false; 303 } 304 305 std::pair<uint256, DBVal> value; 306 if (!db_it.GetValue(value)) { 307 LogError("%s: unable to read value in %s at key (%c, %d)\n", 308 __func__, index_name, DB_BLOCK_HEIGHT, height); 309 return false; 310 } 311 312 batch.Write(DBHashKey(value.first), std::move(value.second)); 313 314 db_it.Next(); 315 } 316 return true; 317 } 318 319 bool BlockFilterIndex::CustomRewind(const interfaces::BlockKey& current_tip, const interfaces::BlockKey& new_tip) 320 { 321 CDBBatch batch(*m_db); 322 std::unique_ptr<CDBIterator> db_it(m_db->NewIterator()); 323 324 // During a reorg, we need to copy all filters for blocks that are getting disconnected from the 325 // height index to the hash index so we can still find them when the height index entries are 326 // overwritten. 327 if (!CopyHeightIndexToHashIndex(*db_it, batch, m_name, new_tip.height, current_tip.height)) { 328 return false; 329 } 330 331 // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind. 332 // But since this creates new references to the filter, the position should get updated here 333 // atomically as well in case Commit fails. 334 batch.Write(DB_FILTER_POS, m_next_filter_pos); 335 if (!m_db->WriteBatch(batch)) return false; 336 337 // Update cached header 338 m_last_header = *Assert(ReadFilterHeader(new_tip.height, new_tip.hash)); 339 return true; 340 } 341 342 static bool LookupOne(const CDBWrapper& db, const CBlockIndex* block_index, DBVal& result) 343 { 344 // First check if the result is stored under the height index and the value there matches the 345 // block hash. This should be the case if the block is on the active chain. 346 std::pair<uint256, DBVal> read_out; 347 if (!db.Read(DBHeightKey(block_index->nHeight), read_out)) { 348 return false; 349 } 350 if (read_out.first == block_index->GetBlockHash()) { 351 result = std::move(read_out.second); 352 return true; 353 } 354 355 // If value at the height index corresponds to an different block, the result will be stored in 356 // the hash index. 357 return db.Read(DBHashKey(block_index->GetBlockHash()), result); 358 } 359 360 static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height, 361 const CBlockIndex* stop_index, std::vector<DBVal>& results) 362 { 363 if (start_height < 0) { 364 LogError("%s: start height (%d) is negative\n", __func__, start_height); 365 return false; 366 } 367 if (start_height > stop_index->nHeight) { 368 LogError("%s: start height (%d) is greater than stop height (%d)\n", 369 __func__, start_height, stop_index->nHeight); 370 return false; 371 } 372 373 size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1); 374 std::vector<std::pair<uint256, DBVal>> values(results_size); 375 376 DBHeightKey key(start_height); 377 std::unique_ptr<CDBIterator> db_it(db.NewIterator()); 378 db_it->Seek(DBHeightKey(start_height)); 379 for (int height = start_height; height <= stop_index->nHeight; ++height) { 380 if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) { 381 return false; 382 } 383 384 size_t i = static_cast<size_t>(height - start_height); 385 if (!db_it->GetValue(values[i])) { 386 LogError("%s: unable to read value in %s at key (%c, %d)\n", 387 __func__, index_name, DB_BLOCK_HEIGHT, height); 388 return false; 389 } 390 391 db_it->Next(); 392 } 393 394 results.resize(results_size); 395 396 // Iterate backwards through block indexes collecting results in order to access the block hash 397 // of each entry in case we need to look it up in the hash index. 398 for (const CBlockIndex* block_index = stop_index; 399 block_index && block_index->nHeight >= start_height; 400 block_index = block_index->pprev) { 401 uint256 block_hash = block_index->GetBlockHash(); 402 403 size_t i = static_cast<size_t>(block_index->nHeight - start_height); 404 if (block_hash == values[i].first) { 405 results[i] = std::move(values[i].second); 406 continue; 407 } 408 409 if (!db.Read(DBHashKey(block_hash), results[i])) { 410 LogError("%s: unable to read value in %s at key (%c, %s)\n", 411 __func__, index_name, DB_BLOCK_HASH, block_hash.ToString()); 412 return false; 413 } 414 } 415 416 return true; 417 } 418 419 bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const 420 { 421 DBVal entry; 422 if (!LookupOne(*m_db, block_index, entry)) { 423 return false; 424 } 425 426 return ReadFilterFromDisk(entry.pos, entry.hash, filter_out); 427 } 428 429 bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out) 430 { 431 LOCK(m_cs_headers_cache); 432 433 bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0}; 434 435 if (is_checkpoint) { 436 // Try to find the block in the headers cache if this is a checkpoint height. 437 auto header = m_headers_cache.find(block_index->GetBlockHash()); 438 if (header != m_headers_cache.end()) { 439 header_out = header->second; 440 return true; 441 } 442 } 443 444 DBVal entry; 445 if (!LookupOne(*m_db, block_index, entry)) { 446 return false; 447 } 448 449 if (is_checkpoint && 450 m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) { 451 // Add to the headers cache if this is a checkpoint height. 452 m_headers_cache.emplace(block_index->GetBlockHash(), entry.header); 453 } 454 455 header_out = entry.header; 456 return true; 457 } 458 459 bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index, 460 std::vector<BlockFilter>& filters_out) const 461 { 462 std::vector<DBVal> entries; 463 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 464 return false; 465 } 466 467 filters_out.resize(entries.size()); 468 auto filter_pos_it = filters_out.begin(); 469 for (const auto& entry : entries) { 470 if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) { 471 return false; 472 } 473 ++filter_pos_it; 474 } 475 476 return true; 477 } 478 479 bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index, 480 std::vector<uint256>& hashes_out) const 481 482 { 483 std::vector<DBVal> entries; 484 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 485 return false; 486 } 487 488 hashes_out.clear(); 489 hashes_out.reserve(entries.size()); 490 for (const auto& entry : entries) { 491 hashes_out.push_back(entry.hash); 492 } 493 return true; 494 } 495 496 BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type) 497 { 498 auto it = g_filter_indexes.find(filter_type); 499 return it != g_filter_indexes.end() ? &it->second : nullptr; 500 } 501 502 void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn) 503 { 504 for (auto& entry : g_filter_indexes) fn(entry.second); 505 } 506 507 bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type, 508 size_t n_cache_size, bool f_memory, bool f_wipe) 509 { 510 auto result = g_filter_indexes.emplace(std::piecewise_construct, 511 std::forward_as_tuple(filter_type), 512 std::forward_as_tuple(make_chain(), filter_type, 513 n_cache_size, f_memory, f_wipe)); 514 return result.second; 515 } 516 517 bool DestroyBlockFilterIndex(BlockFilterType filter_type) 518 { 519 return g_filter_indexes.erase(filter_type); 520 } 521 522 void DestroyAllBlockFilterIndexes() 523 { 524 g_filter_indexes.clear(); 525 }