blockfilterindex.cpp
1 // Copyright (c) 2018-2022 The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <index/blockfilterindex.h> 6 7 #include <blockfilter.h> 8 #include <chain.h> 9 #include <common/args.h> 10 #include <dbwrapper.h> 11 #include <flatfile.h> 12 #include <hash.h> 13 #include <index/base.h> 14 #include <interfaces/chain.h> 15 #include <interfaces/types.h> 16 #include <logging.h> 17 #include <serialize.h> 18 #include <streams.h> 19 #include <sync.h> 20 #include <uint256.h> 21 #include <util/check.h> 22 #include <util/fs.h> 23 #include <util/hasher.h> 24 #include <util/syserror.h> 25 26 #include <cerrno> 27 #include <exception> 28 #include <ios> 29 #include <map> 30 #include <optional> 31 #include <span> 32 #include <stdexcept> 33 #include <string> 34 #include <tuple> 35 #include <utility> 36 #include <vector> 37 38 /* The index database stores three items for each block: the disk location of the encoded filter, 39 * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by 40 * height, and those belonging to blocks that have been reorganized out of the active chain are 41 * indexed by block hash. This ensures that filter data for any block that becomes part of the 42 * active chain can always be retrieved, alleviating timing concerns. 43 * 44 * The filters themselves are stored in flat files and referenced by the LevelDB entries. This 45 * minimizes the amount of data written to LevelDB and keeps the database values constant size. The 46 * disk location of the next block filter to be written (represented as a FlatFilePos) is stored 47 * under the DB_FILTER_POS key. 48 * 49 * Keys for the height index have the type [DB_BLOCK_HEIGHT, uint32 (BE)]. The height is represented 50 * as big-endian so that sequential reads of filters by height are fast. 51 * Keys for the hash index have the type [DB_BLOCK_HASH, uint256]. 52 */ 53 constexpr uint8_t DB_BLOCK_HASH{'s'}; 54 constexpr uint8_t DB_BLOCK_HEIGHT{'t'}; 55 constexpr uint8_t DB_FILTER_POS{'P'}; 56 57 constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB 58 /** The pre-allocation chunk size for fltr?????.dat files */ 59 constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB 60 /** Maximum size of the cfheaders cache 61 * We have a limit to prevent a bug in filling this cache 62 * potentially turning into an OOM. At 2000 entries, this cache 63 * is big enough for a 2,000,000 length block chain, which 64 * we should be enough until ~2047. */ 65 constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000}; 66 67 namespace { 68 69 struct DBVal { 70 uint256 hash; 71 uint256 header; 72 FlatFilePos pos; 73 74 SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); } 75 }; 76 77 struct DBHeightKey { 78 int height; 79 80 explicit DBHeightKey(int height_in) : height(height_in) {} 81 82 template<typename Stream> 83 void Serialize(Stream& s) const 84 { 85 ser_writedata8(s, DB_BLOCK_HEIGHT); 86 ser_writedata32be(s, height); 87 } 88 89 template<typename Stream> 90 void Unserialize(Stream& s) 91 { 92 const uint8_t prefix{ser_readdata8(s)}; 93 if (prefix != DB_BLOCK_HEIGHT) { 94 throw std::ios_base::failure("Invalid format for block filter index DB height key"); 95 } 96 height = ser_readdata32be(s); 97 } 98 }; 99 100 struct DBHashKey { 101 uint256 hash; 102 103 explicit DBHashKey(const uint256& hash_in) : hash(hash_in) {} 104 105 SERIALIZE_METHODS(DBHashKey, obj) { 106 uint8_t prefix{DB_BLOCK_HASH}; 107 READWRITE(prefix); 108 if (prefix != DB_BLOCK_HASH) { 109 throw std::ios_base::failure("Invalid format for block filter index DB hash key"); 110 } 111 112 READWRITE(obj.hash); 113 } 114 }; 115 116 }; // namespace 117 118 static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes; 119 120 BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type, 121 size_t n_cache_size, bool f_memory, bool f_wipe) 122 : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index") 123 , m_filter_type(filter_type) 124 { 125 const std::string& filter_name = BlockFilterTypeName(filter_type); 126 if (filter_name.empty()) throw std::invalid_argument("unknown filter_type"); 127 128 fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name); 129 fs::create_directories(path); 130 131 m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe); 132 m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE); 133 } 134 135 interfaces::Chain::NotifyOptions BlockFilterIndex::CustomOptions() 136 { 137 interfaces::Chain::NotifyOptions options; 138 options.connect_undo_data = true; 139 return options; 140 } 141 142 bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockRef>& block) 143 { 144 if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) { 145 // Check that the cause of the read failure is that the key does not exist. Any other errors 146 // indicate database corruption or a disk failure, and starting the index would cause 147 // further corruption. 148 if (m_db->Exists(DB_FILTER_POS)) { 149 LogError("Cannot read current %s state; index may be corrupted", 150 GetName()); 151 return false; 152 } 153 154 // If the DB_FILTER_POS is not set, then initialize to the first location. 155 m_next_filter_pos.nFile = 0; 156 m_next_filter_pos.nPos = 0; 157 } 158 159 if (block) { 160 auto op_last_header = ReadFilterHeader(block->height, block->hash); 161 if (!op_last_header) { 162 LogError("Cannot read last block filter header; index may be corrupted"); 163 return false; 164 } 165 m_last_header = *op_last_header; 166 } 167 168 return true; 169 } 170 171 bool BlockFilterIndex::CustomCommit(CDBBatch& batch) 172 { 173 const FlatFilePos& pos = m_next_filter_pos; 174 175 // Flush current filter file to disk. 176 AutoFile file{m_filter_fileseq->Open(pos)}; 177 if (file.IsNull()) { 178 LogError("Failed to open filter file %d", pos.nFile); 179 return false; 180 } 181 if (!file.Commit()) { 182 LogError("Failed to commit filter file %d", pos.nFile); 183 (void)file.fclose(); 184 return false; 185 } 186 if (file.fclose() != 0) { 187 LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno)); 188 return false; 189 } 190 191 batch.Write(DB_FILTER_POS, pos); 192 return true; 193 } 194 195 bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const 196 { 197 AutoFile filein{m_filter_fileseq->Open(pos, true)}; 198 if (filein.IsNull()) { 199 return false; 200 } 201 202 // Check that the hash of the encoded_filter matches the one stored in the db. 203 uint256 block_hash; 204 std::vector<uint8_t> encoded_filter; 205 try { 206 filein >> block_hash >> encoded_filter; 207 if (Hash(encoded_filter) != hash) { 208 LogError("Checksum mismatch in filter decode."); 209 return false; 210 } 211 filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true); 212 } 213 catch (const std::exception& e) { 214 LogError("Failed to deserialize block filter from disk: %s", e.what()); 215 return false; 216 } 217 218 return true; 219 } 220 221 size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter) 222 { 223 assert(filter.GetFilterType() == GetFilterType()); 224 225 uint64_t data_size{ 226 GetSerializeSize(filter.GetBlockHash()) + 227 GetSerializeSize(filter.GetEncodedFilter())}; 228 229 // If writing the filter would overflow the file, flush and move to the next one. 230 if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) { 231 AutoFile last_file{m_filter_fileseq->Open(pos)}; 232 if (last_file.IsNull()) { 233 LogError("Failed to open filter file %d", pos.nFile); 234 return 0; 235 } 236 if (!last_file.Truncate(pos.nPos)) { 237 LogError("Failed to truncate filter file %d", pos.nFile); 238 return 0; 239 } 240 if (!last_file.Commit()) { 241 LogError("Failed to commit filter file %d", pos.nFile); 242 (void)last_file.fclose(); 243 return 0; 244 } 245 if (last_file.fclose() != 0) { 246 LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno)); 247 return 0; 248 } 249 250 pos.nFile++; 251 pos.nPos = 0; 252 } 253 254 // Pre-allocate sufficient space for filter data. 255 bool out_of_space; 256 m_filter_fileseq->Allocate(pos, data_size, out_of_space); 257 if (out_of_space) { 258 LogError("out of disk space"); 259 return 0; 260 } 261 262 AutoFile fileout{m_filter_fileseq->Open(pos)}; 263 if (fileout.IsNull()) { 264 LogError("Failed to open filter file %d", pos.nFile); 265 return 0; 266 } 267 268 fileout << filter.GetBlockHash() << filter.GetEncodedFilter(); 269 270 if (fileout.fclose() != 0) { 271 LogError("Failed to close filter file %d: %s", pos.nFile, SysErrorString(errno)); 272 return 0; 273 } 274 275 return data_size; 276 } 277 278 std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash) 279 { 280 std::pair<uint256, DBVal> read_out; 281 if (!m_db->Read(DBHeightKey(height), read_out)) { 282 return std::nullopt; 283 } 284 285 if (read_out.first != expected_block_hash) { 286 LogError("previous block header belongs to unexpected block %s; expected %s", 287 read_out.first.ToString(), expected_block_hash.ToString()); 288 return std::nullopt; 289 } 290 291 return read_out.second.header; 292 } 293 294 bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block) 295 { 296 BlockFilter filter(m_filter_type, *Assert(block.data), *Assert(block.undo_data)); 297 const uint256& header = filter.ComputeHeader(m_last_header); 298 bool res = Write(filter, block.height, header); 299 if (res) m_last_header = header; // update last header 300 return res; 301 } 302 303 bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header) 304 { 305 size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter); 306 if (bytes_written == 0) return false; 307 308 std::pair<uint256, DBVal> value; 309 value.first = filter.GetBlockHash(); 310 value.second.hash = filter.GetHash(); 311 value.second.header = filter_header; 312 value.second.pos = m_next_filter_pos; 313 314 m_db->Write(DBHeightKey(block_height), value); 315 316 m_next_filter_pos.nPos += bytes_written; 317 return true; 318 } 319 320 [[nodiscard]] static bool CopyHeightIndexToHashIndex(CDBIterator& db_it, CDBBatch& batch, 321 const std::string& index_name, int height) 322 { 323 DBHeightKey key(height); 324 db_it.Seek(key); 325 326 if (!db_it.GetKey(key) || key.height != height) { 327 LogError("unexpected key in %s: expected (%c, %d)", 328 index_name, DB_BLOCK_HEIGHT, height); 329 return false; 330 } 331 332 std::pair<uint256, DBVal> value; 333 if (!db_it.GetValue(value)) { 334 LogError("unable to read value in %s at key (%c, %d)", 335 index_name, DB_BLOCK_HEIGHT, height); 336 return false; 337 } 338 339 batch.Write(DBHashKey(value.first), std::move(value.second)); 340 return true; 341 } 342 343 bool BlockFilterIndex::CustomRemove(const interfaces::BlockInfo& block) 344 { 345 CDBBatch batch(*m_db); 346 std::unique_ptr<CDBIterator> db_it(m_db->NewIterator()); 347 348 // During a reorg, we need to copy block filter that is getting disconnected from the 349 // height index to the hash index so we can still find it when the height index entry 350 // is overwritten. 351 if (!CopyHeightIndexToHashIndex(*db_it, batch, m_name, block.height)) { 352 return false; 353 } 354 355 // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind. 356 // But since this creates new references to the filter, the position should get updated here 357 // atomically as well in case Commit fails. 358 batch.Write(DB_FILTER_POS, m_next_filter_pos); 359 m_db->WriteBatch(batch); 360 361 // Update cached header to the previous block hash 362 m_last_header = *Assert(ReadFilterHeader(block.height - 1, *Assert(block.prev_hash))); 363 return true; 364 } 365 366 static bool LookupOne(const CDBWrapper& db, const CBlockIndex* block_index, DBVal& result) 367 { 368 // First check if the result is stored under the height index and the value there matches the 369 // block hash. This should be the case if the block is on the active chain. 370 std::pair<uint256, DBVal> read_out; 371 if (!db.Read(DBHeightKey(block_index->nHeight), read_out)) { 372 return false; 373 } 374 if (read_out.first == block_index->GetBlockHash()) { 375 result = std::move(read_out.second); 376 return true; 377 } 378 379 // If value at the height index corresponds to an different block, the result will be stored in 380 // the hash index. 381 return db.Read(DBHashKey(block_index->GetBlockHash()), result); 382 } 383 384 static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height, 385 const CBlockIndex* stop_index, std::vector<DBVal>& results) 386 { 387 if (start_height < 0) { 388 LogError("start height (%d) is negative", start_height); 389 return false; 390 } 391 if (start_height > stop_index->nHeight) { 392 LogError("start height (%d) is greater than stop height (%d)", 393 start_height, stop_index->nHeight); 394 return false; 395 } 396 397 size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1); 398 std::vector<std::pair<uint256, DBVal>> values(results_size); 399 400 DBHeightKey key(start_height); 401 std::unique_ptr<CDBIterator> db_it(db.NewIterator()); 402 db_it->Seek(DBHeightKey(start_height)); 403 for (int height = start_height; height <= stop_index->nHeight; ++height) { 404 if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) { 405 return false; 406 } 407 408 size_t i = static_cast<size_t>(height - start_height); 409 if (!db_it->GetValue(values[i])) { 410 LogError("unable to read value in %s at key (%c, %d)", 411 index_name, DB_BLOCK_HEIGHT, height); 412 return false; 413 } 414 415 db_it->Next(); 416 } 417 418 results.resize(results_size); 419 420 // Iterate backwards through block indexes collecting results in order to access the block hash 421 // of each entry in case we need to look it up in the hash index. 422 for (const CBlockIndex* block_index = stop_index; 423 block_index && block_index->nHeight >= start_height; 424 block_index = block_index->pprev) { 425 uint256 block_hash = block_index->GetBlockHash(); 426 427 size_t i = static_cast<size_t>(block_index->nHeight - start_height); 428 if (block_hash == values[i].first) { 429 results[i] = std::move(values[i].second); 430 continue; 431 } 432 433 if (!db.Read(DBHashKey(block_hash), results[i])) { 434 LogError("unable to read value in %s at key (%c, %s)", 435 index_name, DB_BLOCK_HASH, block_hash.ToString()); 436 return false; 437 } 438 } 439 440 return true; 441 } 442 443 bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const 444 { 445 DBVal entry; 446 if (!LookupOne(*m_db, block_index, entry)) { 447 return false; 448 } 449 450 return ReadFilterFromDisk(entry.pos, entry.hash, filter_out); 451 } 452 453 bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out) 454 { 455 LOCK(m_cs_headers_cache); 456 457 bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0}; 458 459 if (is_checkpoint) { 460 // Try to find the block in the headers cache if this is a checkpoint height. 461 auto header = m_headers_cache.find(block_index->GetBlockHash()); 462 if (header != m_headers_cache.end()) { 463 header_out = header->second; 464 return true; 465 } 466 } 467 468 DBVal entry; 469 if (!LookupOne(*m_db, block_index, entry)) { 470 return false; 471 } 472 473 if (is_checkpoint && 474 m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) { 475 // Add to the headers cache if this is a checkpoint height. 476 m_headers_cache.emplace(block_index->GetBlockHash(), entry.header); 477 } 478 479 header_out = entry.header; 480 return true; 481 } 482 483 bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index, 484 std::vector<BlockFilter>& filters_out) const 485 { 486 std::vector<DBVal> entries; 487 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 488 return false; 489 } 490 491 filters_out.resize(entries.size()); 492 auto filter_pos_it = filters_out.begin(); 493 for (const auto& entry : entries) { 494 if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) { 495 return false; 496 } 497 ++filter_pos_it; 498 } 499 500 return true; 501 } 502 503 bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index, 504 std::vector<uint256>& hashes_out) const 505 506 { 507 std::vector<DBVal> entries; 508 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 509 return false; 510 } 511 512 hashes_out.clear(); 513 hashes_out.reserve(entries.size()); 514 for (const auto& entry : entries) { 515 hashes_out.push_back(entry.hash); 516 } 517 return true; 518 } 519 520 BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type) 521 { 522 auto it = g_filter_indexes.find(filter_type); 523 return it != g_filter_indexes.end() ? &it->second : nullptr; 524 } 525 526 void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn) 527 { 528 for (auto& entry : g_filter_indexes) fn(entry.second); 529 } 530 531 bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type, 532 size_t n_cache_size, bool f_memory, bool f_wipe) 533 { 534 auto result = g_filter_indexes.emplace(std::piecewise_construct, 535 std::forward_as_tuple(filter_type), 536 std::forward_as_tuple(make_chain(), filter_type, 537 n_cache_size, f_memory, f_wipe)); 538 return result.second; 539 } 540 541 bool DestroyBlockFilterIndex(BlockFilterType filter_type) 542 { 543 return g_filter_indexes.erase(filter_type); 544 } 545 546 void DestroyAllBlockFilterIndexes() 547 { 548 g_filter_indexes.clear(); 549 }