blockfilterindex.cpp
1 // Copyright (c) 2018-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <index/blockfilterindex.h> 6 7 #include <blockfilter.h> 8 #include <chain.h> 9 #include <common/args.h> 10 #include <dbwrapper.h> 11 #include <flatfile.h> 12 #include <hash.h> 13 #include <index/base.h> 14 #include <index/db_key.h> 15 #include <interfaces/chain.h> 16 #include <interfaces/types.h> 17 #include <serialize.h> 18 #include <streams.h> 19 #include <sync.h> 20 #include <uint256.h> 21 #include <util/check.h> 22 #include <util/fs.h> 23 #include <util/hasher.h> 24 #include <util/log.h> 25 #include <util/syserror.h> 26 27 #include <cerrno> 28 #include <exception> 29 #include <map> 30 #include <optional> 31 #include <span> 32 #include <stdexcept> 33 #include <string> 34 #include <tuple> 35 #include <utility> 36 #include <vector> 37 38 /* The index database stores three items for each block: the disk location of the encoded filter, 39 * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by 40 * height, and those belonging to blocks that have been reorganized out of the active chain are 41 * indexed by block hash. This ensures that filter data for any block that becomes part of the 42 * active chain can always be retrieved, alleviating timing concerns. 43 * 44 * The filters themselves are stored in flat files and referenced by the LevelDB entries. This 45 * minimizes the amount of data written to LevelDB and keeps the database values constant size. The 46 * disk location of the next block filter to be written (represented as a FlatFilePos) is stored 47 * under the DB_FILTER_POS key. 48 * 49 * The logic for keys is shared with other indexes, see index/db_key.h. 50 */ 51 constexpr uint8_t DB_FILTER_POS{'P'}; 52 53 constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB 54 /** The pre-allocation chunk size for fltr?????.dat files */ 55 constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB 56 /** Maximum size of the cfheaders cache 57 * We have a limit to prevent a bug in filling this cache 58 * potentially turning into an OOM. At 2000 entries, this cache 59 * is big enough for a 2,000,000 length block chain, which 60 * we should be enough until ~2047. */ 61 constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000}; 62 63 namespace { 64 65 struct DBVal { 66 uint256 hash; 67 uint256 header; 68 FlatFilePos pos; 69 70 SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); } 71 }; 72 73 }; // namespace 74 75 static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes; 76 77 BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type, 78 size_t n_cache_size, bool f_memory, bool f_wipe) 79 : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index") 80 , m_filter_type(filter_type) 81 { 82 const std::string& filter_name = BlockFilterTypeName(filter_type); 83 if (filter_name.empty()) throw std::invalid_argument("unknown filter_type"); 84 85 fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name); 86 fs::create_directories(path); 87 88 m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe); 89 m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE); 90 } 91 92 interfaces::Chain::NotifyOptions BlockFilterIndex::CustomOptions() 93 { 94 interfaces::Chain::NotifyOptions options; 95 options.connect_undo_data = true; 96 return options; 97 } 98 99 bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockRef>& block) 100 { 101 if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) { 102 // Check that the cause of the read failure is that the key does not exist. Any other errors 103 // indicate database corruption or a disk failure, and starting the index would cause 104 // further corruption. 105 if (m_db->Exists(DB_FILTER_POS)) { 106 LogError("Cannot read current %s state; index may be corrupted", 107 GetName()); 108 return false; 109 } 110 111 // If the DB_FILTER_POS is not set, then initialize to the first location. 112 m_next_filter_pos.nFile = 0; 113 m_next_filter_pos.nPos = 0; 114 } 115 116 if (block) { 117 auto op_last_header = ReadFilterHeader(block->height, block->hash); 118 if (!op_last_header) { 119 LogError("Cannot read last block filter header; index may be corrupted"); 120 return false; 121 } 122 m_last_header = *op_last_header; 123 } 124 125 return true; 126 } 127 128 bool BlockFilterIndex::CustomCommit(CDBBatch& batch) 129 { 130 const FlatFilePos& pos = m_next_filter_pos; 131 132 // Flush current filter file to disk. 133 AutoFile file{m_filter_fileseq->Open(pos)}; 134 if (file.IsNull()) { 135 LogError("Failed to open filter file %d", pos.nFile); 136 return false; 137 } 138 if (!file.Commit()) { 139 LogError("Failed to commit filter file %d", pos.nFile); 140 (void)file.fclose(); 141 return false; 142 } 143 if (file.fclose() != 0) { 144 LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno)); 145 return false; 146 } 147 148 batch.Write(DB_FILTER_POS, pos); 149 return true; 150 } 151 152 bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const 153 { 154 AutoFile filein{m_filter_fileseq->Open(pos, true)}; 155 if (filein.IsNull()) { 156 return false; 157 } 158 159 // Check that the hash of the encoded_filter matches the one stored in the db. 160 uint256 block_hash; 161 std::vector<uint8_t> encoded_filter; 162 try { 163 filein >> block_hash >> encoded_filter; 164 if (Hash(encoded_filter) != hash) { 165 LogError("Checksum mismatch in filter decode."); 166 return false; 167 } 168 filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true); 169 } 170 catch (const std::exception& e) { 171 LogError("Failed to deserialize block filter from disk: %s", e.what()); 172 return false; 173 } 174 175 return true; 176 } 177 178 size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter) 179 { 180 assert(filter.GetFilterType() == GetFilterType()); 181 182 uint64_t data_size{ 183 GetSerializeSize(filter.GetBlockHash()) + 184 GetSerializeSize(filter.GetEncodedFilter())}; 185 186 // If writing the filter would overflow the file, flush and move to the next one. 187 if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) { 188 AutoFile last_file{m_filter_fileseq->Open(pos)}; 189 if (last_file.IsNull()) { 190 LogError("Failed to open filter file %d", pos.nFile); 191 return 0; 192 } 193 if (!last_file.Truncate(pos.nPos)) { 194 LogError("Failed to truncate filter file %d", pos.nFile); 195 return 0; 196 } 197 if (!last_file.Commit()) { 198 LogError("Failed to commit filter file %d", pos.nFile); 199 (void)last_file.fclose(); 200 return 0; 201 } 202 if (last_file.fclose() != 0) { 203 LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno)); 204 return 0; 205 } 206 207 pos.nFile++; 208 pos.nPos = 0; 209 } 210 211 // Pre-allocate sufficient space for filter data. 212 bool out_of_space; 213 m_filter_fileseq->Allocate(pos, data_size, out_of_space); 214 if (out_of_space) { 215 LogError("out of disk space"); 216 return 0; 217 } 218 219 AutoFile fileout{m_filter_fileseq->Open(pos)}; 220 if (fileout.IsNull()) { 221 LogError("Failed to open filter file %d", pos.nFile); 222 return 0; 223 } 224 225 fileout << filter.GetBlockHash() << filter.GetEncodedFilter(); 226 227 if (fileout.fclose() != 0) { 228 LogError("Failed to close filter file %d: %s", pos.nFile, SysErrorString(errno)); 229 return 0; 230 } 231 232 return data_size; 233 } 234 235 std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash) 236 { 237 std::pair<uint256, DBVal> read_out; 238 if (!m_db->Read(index_util::DBHeightKey(height), read_out)) { 239 return std::nullopt; 240 } 241 242 if (read_out.first != expected_block_hash) { 243 LogError("previous block header belongs to unexpected block %s; expected %s", 244 read_out.first.ToString(), expected_block_hash.ToString()); 245 return std::nullopt; 246 } 247 248 return read_out.second.header; 249 } 250 251 bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block) 252 { 253 BlockFilter filter(m_filter_type, *Assert(block.data), *Assert(block.undo_data)); 254 const uint256& header = filter.ComputeHeader(m_last_header); 255 bool res = Write(filter, block.height, header); 256 if (res) m_last_header = header; // update last header 257 return res; 258 } 259 260 bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header) 261 { 262 size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter); 263 if (bytes_written == 0) return false; 264 265 std::pair<uint256, DBVal> value; 266 value.first = filter.GetBlockHash(); 267 value.second.hash = filter.GetHash(); 268 value.second.header = filter_header; 269 value.second.pos = m_next_filter_pos; 270 271 m_db->Write(index_util::DBHeightKey(block_height), value); 272 273 m_next_filter_pos.nPos += bytes_written; 274 return true; 275 } 276 277 bool BlockFilterIndex::CustomRemove(const interfaces::BlockInfo& block) 278 { 279 CDBBatch batch(*m_db); 280 std::unique_ptr<CDBIterator> db_it(m_db->NewIterator()); 281 282 // During a reorg, we need to copy block filter that is getting disconnected from the 283 // height index to the hash index so we can still find it when the height index entry 284 // is overwritten. 285 if (!index_util::CopyHeightIndexToHashIndex<DBVal>(*db_it, batch, m_name, block.height)) { 286 return false; 287 } 288 289 // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind. 290 // But since this creates new references to the filter, the position should get updated here 291 // atomically as well in case Commit fails. 292 batch.Write(DB_FILTER_POS, m_next_filter_pos); 293 m_db->WriteBatch(batch); 294 295 // Update cached header to the previous block hash 296 m_last_header = *Assert(ReadFilterHeader(block.height - 1, *Assert(block.prev_hash))); 297 return true; 298 } 299 300 static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height, 301 const CBlockIndex* stop_index, std::vector<DBVal>& results) 302 { 303 if (start_height < 0) { 304 LogError("start height (%d) is negative", start_height); 305 return false; 306 } 307 if (start_height > stop_index->nHeight) { 308 LogError("start height (%d) is greater than stop height (%d)", 309 start_height, stop_index->nHeight); 310 return false; 311 } 312 313 size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1); 314 std::vector<std::pair<uint256, DBVal>> values(results_size); 315 316 index_util::DBHeightKey key(start_height); 317 std::unique_ptr<CDBIterator> db_it(db.NewIterator()); 318 db_it->Seek(index_util::DBHeightKey(start_height)); 319 for (int height = start_height; height <= stop_index->nHeight; ++height) { 320 if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) { 321 return false; 322 } 323 324 size_t i = static_cast<size_t>(height - start_height); 325 if (!db_it->GetValue(values[i])) { 326 LogError("unable to read value in %s at key (%c, %d)", 327 index_name, index_util::DB_BLOCK_HEIGHT, height); 328 return false; 329 } 330 331 db_it->Next(); 332 } 333 334 results.resize(results_size); 335 336 // Iterate backwards through block indexes collecting results in order to access the block hash 337 // of each entry in case we need to look it up in the hash index. 338 for (const CBlockIndex* block_index = stop_index; 339 block_index && block_index->nHeight >= start_height; 340 block_index = block_index->pprev) { 341 uint256 block_hash = block_index->GetBlockHash(); 342 343 size_t i = static_cast<size_t>(block_index->nHeight - start_height); 344 if (block_hash == values[i].first) { 345 results[i] = std::move(values[i].second); 346 continue; 347 } 348 349 if (!db.Read(index_util::DBHashKey(block_hash), results[i])) { 350 LogError("unable to read value in %s at key (%c, %s)", 351 index_name, index_util::DB_BLOCK_HASH, block_hash.ToString()); 352 return false; 353 } 354 } 355 356 return true; 357 } 358 359 bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const 360 { 361 DBVal entry; 362 if (!index_util::LookUpOne(*m_db, {block_index->GetBlockHash(), block_index->nHeight}, entry)) { 363 return false; 364 } 365 366 return ReadFilterFromDisk(entry.pos, entry.hash, filter_out); 367 } 368 369 bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out) 370 { 371 LOCK(m_cs_headers_cache); 372 373 bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0}; 374 375 if (is_checkpoint) { 376 // Try to find the block in the headers cache if this is a checkpoint height. 377 auto header = m_headers_cache.find(block_index->GetBlockHash()); 378 if (header != m_headers_cache.end()) { 379 header_out = header->second; 380 return true; 381 } 382 } 383 384 DBVal entry; 385 if (!index_util::LookUpOne(*m_db, {block_index->GetBlockHash(), block_index->nHeight}, entry)) { 386 return false; 387 } 388 389 if (is_checkpoint && 390 m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) { 391 // Add to the headers cache if this is a checkpoint height. 392 m_headers_cache.emplace(block_index->GetBlockHash(), entry.header); 393 } 394 395 header_out = entry.header; 396 return true; 397 } 398 399 bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index, 400 std::vector<BlockFilter>& filters_out) const 401 { 402 std::vector<DBVal> entries; 403 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 404 return false; 405 } 406 407 filters_out.resize(entries.size()); 408 auto filter_pos_it = filters_out.begin(); 409 for (const auto& entry : entries) { 410 if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) { 411 return false; 412 } 413 ++filter_pos_it; 414 } 415 416 return true; 417 } 418 419 bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index, 420 std::vector<uint256>& hashes_out) const 421 422 { 423 std::vector<DBVal> entries; 424 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 425 return false; 426 } 427 428 hashes_out.clear(); 429 hashes_out.reserve(entries.size()); 430 for (const auto& entry : entries) { 431 hashes_out.push_back(entry.hash); 432 } 433 return true; 434 } 435 436 BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type) 437 { 438 auto it = g_filter_indexes.find(filter_type); 439 return it != g_filter_indexes.end() ? &it->second : nullptr; 440 } 441 442 void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn) 443 { 444 for (auto& entry : g_filter_indexes) fn(entry.second); 445 } 446 447 bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type, 448 size_t n_cache_size, bool f_memory, bool f_wipe) 449 { 450 auto result = g_filter_indexes.emplace(std::piecewise_construct, 451 std::forward_as_tuple(filter_type), 452 std::forward_as_tuple(make_chain(), filter_type, 453 n_cache_size, f_memory, f_wipe)); 454 return result.second; 455 } 456 457 bool DestroyBlockFilterIndex(BlockFilterType filter_type) 458 { 459 return g_filter_indexes.erase(filter_type); 460 } 461 462 void DestroyAllBlockFilterIndexes() 463 { 464 g_filter_indexes.clear(); 465 }