blockfilterindex.cpp
1 // Copyright (c) 2018-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <index/blockfilterindex.h> 6 7 #include <blockfilter.h> 8 #include <chain.h> 9 #include <common/args.h> 10 #include <dbwrapper.h> 11 #include <flatfile.h> 12 #include <hash.h> 13 #include <index/base.h> 14 #include <index/db_key.h> 15 #include <interfaces/chain.h> 16 #include <interfaces/types.h> 17 #include <serialize.h> 18 #include <streams.h> 19 #include <sync.h> 20 #include <uint256.h> 21 #include <util/check.h> 22 #include <util/fs.h> 23 #include <util/hasher.h> 24 #include <util/log.h> 25 #include <util/syserror.h> 26 27 #include <cerrno> 28 #include <exception> 29 #include <map> 30 #include <optional> 31 #include <stdexcept> 32 #include <string> 33 #include <tuple> 34 #include <utility> 35 #include <vector> 36 37 /* The index database stores three items for each block: the disk location of the encoded filter, 38 * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by 39 * height, and those belonging to blocks that have been reorganized out of the active chain are 40 * indexed by block hash. This ensures that filter data for any block that becomes part of the 41 * active chain can always be retrieved, alleviating timing concerns. 42 * 43 * The filters themselves are stored in flat files and referenced by the LevelDB entries. This 44 * minimizes the amount of data written to LevelDB and keeps the database values constant size. The 45 * disk location of the next block filter to be written (represented as a FlatFilePos) is stored 46 * under the DB_FILTER_POS key. 47 * 48 * The logic for keys is shared with other indexes, see index/db_key.h. 49 */ 50 constexpr uint8_t DB_FILTER_POS{'P'}; 51 52 constexpr unsigned int MAX_FLTR_FILE_SIZE{16_MiB}; 53 /** The pre-allocation chunk size for fltr?????.dat files */ 54 constexpr unsigned int FLTR_FILE_CHUNK_SIZE{1_MiB}; 55 /** Maximum size of the cfheaders cache 56 * We have a limit to prevent a bug in filling this cache 57 * potentially turning into an OOM. At 2000 entries, this cache 58 * is big enough for a 2,000,000 length block chain, which 59 * we should be enough until ~2047. */ 60 constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000}; 61 62 namespace { 63 64 struct DBVal { 65 uint256 hash; 66 uint256 header; 67 FlatFilePos pos; 68 69 SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); } 70 }; 71 72 }; // namespace 73 74 static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes; 75 76 BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type, 77 size_t n_cache_size, bool f_memory, bool f_wipe) 78 : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index") 79 , m_filter_type(filter_type) 80 { 81 const std::string& filter_name = BlockFilterTypeName(filter_type); 82 if (filter_name.empty()) throw std::invalid_argument("unknown filter_type"); 83 84 fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name); 85 fs::create_directories(path); 86 87 m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe); 88 m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE); 89 } 90 91 interfaces::Chain::NotifyOptions BlockFilterIndex::CustomOptions() 92 { 93 interfaces::Chain::NotifyOptions options; 94 options.connect_undo_data = true; 95 return options; 96 } 97 98 bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockRef>& block) 99 { 100 if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) { 101 // Check that the cause of the read failure is that the key does not exist. Any other errors 102 // indicate database corruption or a disk failure, and starting the index would cause 103 // further corruption. 104 if (m_db->Exists(DB_FILTER_POS)) { 105 LogError("Cannot read current %s state; index may be corrupted", 106 GetName()); 107 return false; 108 } 109 110 // If the DB_FILTER_POS is not set, then initialize to the first location. 111 m_next_filter_pos.nFile = 0; 112 m_next_filter_pos.nPos = 0; 113 } 114 115 if (block) { 116 auto op_last_header = ReadFilterHeader(block->height, block->hash); 117 if (!op_last_header) { 118 LogError("Cannot read last block filter header; index may be corrupted"); 119 return false; 120 } 121 m_last_header = *op_last_header; 122 } 123 124 return true; 125 } 126 127 bool BlockFilterIndex::CustomCommit(CDBBatch& batch) 128 { 129 const FlatFilePos& pos = m_next_filter_pos; 130 131 // Flush current filter file to disk. 132 AutoFile file{m_filter_fileseq->Open(pos)}; 133 if (file.IsNull()) { 134 LogError("Failed to open filter file %d", pos.nFile); 135 return false; 136 } 137 if (!file.Commit()) { 138 LogError("Failed to commit filter file %d", pos.nFile); 139 (void)file.fclose(); 140 return false; 141 } 142 if (file.fclose() != 0) { 143 LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno)); 144 return false; 145 } 146 147 batch.Write(DB_FILTER_POS, pos); 148 return true; 149 } 150 151 bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const 152 { 153 AutoFile filein{m_filter_fileseq->Open(pos, true)}; 154 if (filein.IsNull()) { 155 return false; 156 } 157 158 // Check that the hash of the encoded_filter matches the one stored in the db. 159 uint256 block_hash; 160 std::vector<uint8_t> encoded_filter; 161 try { 162 filein >> block_hash >> encoded_filter; 163 if (Hash(encoded_filter) != hash) { 164 LogError("Checksum mismatch in filter decode."); 165 return false; 166 } 167 filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true); 168 } 169 catch (const std::exception& e) { 170 LogError("Failed to deserialize block filter from disk: %s", e.what()); 171 return false; 172 } 173 174 return true; 175 } 176 177 size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter) 178 { 179 assert(filter.GetFilterType() == GetFilterType()); 180 181 uint64_t data_size{ 182 GetSerializeSize(filter.GetBlockHash()) + 183 GetSerializeSize(filter.GetEncodedFilter())}; 184 185 // If writing the filter would overflow the file, flush and move to the next one. 186 if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) { 187 AutoFile last_file{m_filter_fileseq->Open(pos)}; 188 if (last_file.IsNull()) { 189 LogError("Failed to open filter file %d", pos.nFile); 190 return 0; 191 } 192 if (!last_file.Truncate(pos.nPos)) { 193 LogError("Failed to truncate filter file %d", pos.nFile); 194 return 0; 195 } 196 if (!last_file.Commit()) { 197 LogError("Failed to commit filter file %d", pos.nFile); 198 (void)last_file.fclose(); 199 return 0; 200 } 201 if (last_file.fclose() != 0) { 202 LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno)); 203 return 0; 204 } 205 206 pos.nFile++; 207 pos.nPos = 0; 208 } 209 210 // Pre-allocate sufficient space for filter data. 211 bool out_of_space; 212 m_filter_fileseq->Allocate(pos, data_size, out_of_space); 213 if (out_of_space) { 214 LogError("out of disk space"); 215 return 0; 216 } 217 218 AutoFile fileout{m_filter_fileseq->Open(pos)}; 219 if (fileout.IsNull()) { 220 LogError("Failed to open filter file %d", pos.nFile); 221 return 0; 222 } 223 224 fileout << filter.GetBlockHash() << filter.GetEncodedFilter(); 225 226 if (fileout.fclose() != 0) { 227 LogError("Failed to close filter file %d: %s", pos.nFile, SysErrorString(errno)); 228 return 0; 229 } 230 231 return data_size; 232 } 233 234 std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash) 235 { 236 std::pair<uint256, DBVal> read_out; 237 if (!m_db->Read(index_util::DBHeightKey(height), read_out)) { 238 return std::nullopt; 239 } 240 241 if (read_out.first != expected_block_hash) { 242 LogError("previous block header belongs to unexpected block %s; expected %s", 243 read_out.first.ToString(), expected_block_hash.ToString()); 244 return std::nullopt; 245 } 246 247 return read_out.second.header; 248 } 249 250 bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block) 251 { 252 BlockFilter filter(m_filter_type, *Assert(block.data), *Assert(block.undo_data)); 253 const uint256& header = filter.ComputeHeader(m_last_header); 254 bool res = Write(filter, block.height, header); 255 if (res) m_last_header = header; // update last header 256 return res; 257 } 258 259 bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header) 260 { 261 size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter); 262 if (bytes_written == 0) return false; 263 264 std::pair<uint256, DBVal> value; 265 value.first = filter.GetBlockHash(); 266 value.second.hash = filter.GetHash(); 267 value.second.header = filter_header; 268 value.second.pos = m_next_filter_pos; 269 270 m_db->Write(index_util::DBHeightKey(block_height), value); 271 272 m_next_filter_pos.nPos += bytes_written; 273 return true; 274 } 275 276 bool BlockFilterIndex::CustomRemove(const interfaces::BlockInfo& block) 277 { 278 CDBBatch batch(*m_db); 279 std::unique_ptr<CDBIterator> db_it(m_db->NewIterator()); 280 281 // During a reorg, we need to copy block filter that is getting disconnected from the 282 // height index to the hash index so we can still find it when the height index entry 283 // is overwritten. 284 if (!index_util::CopyHeightIndexToHashIndex<DBVal>(*db_it, batch, m_name, block.height)) { 285 return false; 286 } 287 288 // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind. 289 // But since this creates new references to the filter, the position should get updated here 290 // atomically as well in case Commit fails. 291 batch.Write(DB_FILTER_POS, m_next_filter_pos); 292 m_db->WriteBatch(batch); 293 294 // Update cached header to the previous block hash 295 m_last_header = *Assert(ReadFilterHeader(block.height - 1, *Assert(block.prev_hash))); 296 return true; 297 } 298 299 static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height, 300 const CBlockIndex* stop_index, std::vector<DBVal>& results) 301 { 302 if (start_height < 0) { 303 LogError("start height (%d) is negative", start_height); 304 return false; 305 } 306 if (start_height > stop_index->nHeight) { 307 LogError("start height (%d) is greater than stop height (%d)", 308 start_height, stop_index->nHeight); 309 return false; 310 } 311 312 size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1); 313 std::vector<std::pair<uint256, DBVal>> values(results_size); 314 315 index_util::DBHeightKey key(start_height); 316 std::unique_ptr<CDBIterator> db_it(db.NewIterator()); 317 db_it->Seek(index_util::DBHeightKey(start_height)); 318 for (int height = start_height; height <= stop_index->nHeight; ++height) { 319 if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) { 320 return false; 321 } 322 323 size_t i = static_cast<size_t>(height - start_height); 324 if (!db_it->GetValue(values[i])) { 325 LogError("unable to read value in %s at key (%c, %d)", 326 index_name, index_util::DB_BLOCK_HEIGHT, height); 327 return false; 328 } 329 330 db_it->Next(); 331 } 332 333 results.resize(results_size); 334 335 // Iterate backwards through block indexes collecting results in order to access the block hash 336 // of each entry in case we need to look it up in the hash index. 337 for (const CBlockIndex* block_index = stop_index; 338 block_index && block_index->nHeight >= start_height; 339 block_index = block_index->pprev) { 340 uint256 block_hash = block_index->GetBlockHash(); 341 342 size_t i = static_cast<size_t>(block_index->nHeight - start_height); 343 if (block_hash == values[i].first) { 344 results[i] = std::move(values[i].second); 345 continue; 346 } 347 348 if (!db.Read(index_util::DBHashKey(block_hash), results[i])) { 349 LogError("unable to read value in %s at key (%c, %s)", 350 index_name, index_util::DB_BLOCK_HASH, block_hash.ToString()); 351 return false; 352 } 353 } 354 355 return true; 356 } 357 358 bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const 359 { 360 DBVal entry; 361 if (!index_util::LookUpOne(*m_db, {block_index->GetBlockHash(), block_index->nHeight}, entry)) { 362 return false; 363 } 364 365 return ReadFilterFromDisk(entry.pos, entry.hash, filter_out); 366 } 367 368 bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out) 369 { 370 LOCK(m_cs_headers_cache); 371 372 bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0}; 373 374 if (is_checkpoint) { 375 // Try to find the block in the headers cache if this is a checkpoint height. 376 auto header = m_headers_cache.find(block_index->GetBlockHash()); 377 if (header != m_headers_cache.end()) { 378 header_out = header->second; 379 return true; 380 } 381 } 382 383 DBVal entry; 384 if (!index_util::LookUpOne(*m_db, {block_index->GetBlockHash(), block_index->nHeight}, entry)) { 385 return false; 386 } 387 388 if (is_checkpoint && 389 m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) { 390 // Add to the headers cache if this is a checkpoint height. 391 m_headers_cache.emplace(block_index->GetBlockHash(), entry.header); 392 } 393 394 header_out = entry.header; 395 return true; 396 } 397 398 bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index, 399 std::vector<BlockFilter>& filters_out) const 400 { 401 std::vector<DBVal> entries; 402 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 403 return false; 404 } 405 406 filters_out.resize(entries.size()); 407 auto filter_pos_it = filters_out.begin(); 408 for (const auto& entry : entries) { 409 if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) { 410 return false; 411 } 412 ++filter_pos_it; 413 } 414 415 return true; 416 } 417 418 bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index, 419 std::vector<uint256>& hashes_out) const 420 421 { 422 std::vector<DBVal> entries; 423 if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) { 424 return false; 425 } 426 427 hashes_out.clear(); 428 hashes_out.reserve(entries.size()); 429 for (const auto& entry : entries) { 430 hashes_out.push_back(entry.hash); 431 } 432 return true; 433 } 434 435 BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type) 436 { 437 auto it = g_filter_indexes.find(filter_type); 438 return it != g_filter_indexes.end() ? &it->second : nullptr; 439 } 440 441 void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn) 442 { 443 for (auto& entry : g_filter_indexes) fn(entry.second); 444 } 445 446 bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type, 447 size_t n_cache_size, bool f_memory, bool f_wipe) 448 { 449 auto result = g_filter_indexes.emplace(std::piecewise_construct, 450 std::forward_as_tuple(filter_type), 451 std::forward_as_tuple(make_chain(), filter_type, 452 n_cache_size, f_memory, f_wipe)); 453 return result.second; 454 } 455 456 bool DestroyBlockFilterIndex(BlockFilterType filter_type) 457 { 458 return g_filter_indexes.erase(filter_type); 459 } 460 461 void DestroyAllBlockFilterIndexes() 462 { 463 g_filter_indexes.clear(); 464 }