/ src / index / blockfilterindex.cpp
blockfilterindex.cpp
  1  // Copyright (c) 2018-2022 The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #include <map>
  6  
  7  #include <clientversion.h>
  8  #include <common/args.h>
  9  #include <dbwrapper.h>
 10  #include <hash.h>
 11  #include <index/blockfilterindex.h>
 12  #include <logging.h>
 13  #include <node/blockstorage.h>
 14  #include <undo.h>
 15  #include <util/fs_helpers.h>
 16  #include <validation.h>
 17  
 18  /* The index database stores three items for each block: the disk location of the encoded filter,
 19   * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by
 20   * height, and those belonging to blocks that have been reorganized out of the active chain are
 21   * indexed by block hash. This ensures that filter data for any block that becomes part of the
 22   * active chain can always be retrieved, alleviating timing concerns.
 23   *
 24   * The filters themselves are stored in flat files and referenced by the LevelDB entries. This
 25   * minimizes the amount of data written to LevelDB and keeps the database values constant size. The
 26   * disk location of the next block filter to be written (represented as a FlatFilePos) is stored
 27   * under the DB_FILTER_POS key.
 28   *
 29   * Keys for the height index have the type [DB_BLOCK_HEIGHT, uint32 (BE)]. The height is represented
 30   * as big-endian so that sequential reads of filters by height are fast.
 31   * Keys for the hash index have the type [DB_BLOCK_HASH, uint256].
 32   */
 33  constexpr uint8_t DB_BLOCK_HASH{'s'};
 34  constexpr uint8_t DB_BLOCK_HEIGHT{'t'};
 35  constexpr uint8_t DB_FILTER_POS{'P'};
 36  
 37  constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB
 38  /** The pre-allocation chunk size for fltr?????.dat files */
 39  constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB
 40  /** Maximum size of the cfheaders cache
 41   *  We have a limit to prevent a bug in filling this cache
 42   *  potentially turning into an OOM. At 2000 entries, this cache
 43   *  is big enough for a 2,000,000 length block chain, which
 44   *  we should be enough until ~2047. */
 45  constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000};
 46  
 47  namespace {
 48  
 49  struct DBVal {
 50      uint256 hash;
 51      uint256 header;
 52      FlatFilePos pos;
 53  
 54      SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); }
 55  };
 56  
 57  struct DBHeightKey {
 58      int height;
 59  
 60      explicit DBHeightKey(int height_in) : height(height_in) {}
 61  
 62      template<typename Stream>
 63      void Serialize(Stream& s) const
 64      {
 65          ser_writedata8(s, DB_BLOCK_HEIGHT);
 66          ser_writedata32be(s, height);
 67      }
 68  
 69      template<typename Stream>
 70      void Unserialize(Stream& s)
 71      {
 72          const uint8_t prefix{ser_readdata8(s)};
 73          if (prefix != DB_BLOCK_HEIGHT) {
 74              throw std::ios_base::failure("Invalid format for block filter index DB height key");
 75          }
 76          height = ser_readdata32be(s);
 77      }
 78  };
 79  
 80  struct DBHashKey {
 81      uint256 hash;
 82  
 83      explicit DBHashKey(const uint256& hash_in) : hash(hash_in) {}
 84  
 85      SERIALIZE_METHODS(DBHashKey, obj) {
 86          uint8_t prefix{DB_BLOCK_HASH};
 87          READWRITE(prefix);
 88          if (prefix != DB_BLOCK_HASH) {
 89              throw std::ios_base::failure("Invalid format for block filter index DB hash key");
 90          }
 91  
 92          READWRITE(obj.hash);
 93      }
 94  };
 95  
 96  }; // namespace
 97  
 98  static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes;
 99  
100  BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type,
101                                     size_t n_cache_size, bool f_memory, bool f_wipe)
102      : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index")
103      , m_filter_type(filter_type)
104  {
105      const std::string& filter_name = BlockFilterTypeName(filter_type);
106      if (filter_name.empty()) throw std::invalid_argument("unknown filter_type");
107  
108      fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name);
109      fs::create_directories(path);
110  
111      m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe);
112      m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE);
113  }
114  
115  bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockRef>& block)
116  {
117      if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) {
118          // Check that the cause of the read failure is that the key does not exist. Any other errors
119          // indicate database corruption or a disk failure, and starting the index would cause
120          // further corruption.
121          if (m_db->Exists(DB_FILTER_POS)) {
122              LogError("%s: Cannot read current %s state; index may be corrupted\n",
123                           __func__, GetName());
124              return false;
125          }
126  
127          // If the DB_FILTER_POS is not set, then initialize to the first location.
128          m_next_filter_pos.nFile = 0;
129          m_next_filter_pos.nPos = 0;
130      }
131  
132      if (block) {
133          auto op_last_header = ReadFilterHeader(block->height, block->hash);
134          if (!op_last_header) {
135              LogError("Cannot read last block filter header; index may be corrupted\n");
136              return false;
137          }
138          m_last_header = *op_last_header;
139      }
140  
141      return true;
142  }
143  
144  bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
145  {
146      const FlatFilePos& pos = m_next_filter_pos;
147  
148      // Flush current filter file to disk.
149      AutoFile file{m_filter_fileseq->Open(pos)};
150      if (file.IsNull()) {
151          LogError("%s: Failed to open filter file %d\n", __func__, pos.nFile);
152          return false;
153      }
154      if (!file.Commit()) {
155          LogError("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
156          return false;
157      }
158  
159      batch.Write(DB_FILTER_POS, pos);
160      return true;
161  }
162  
163  bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
164  {
165      AutoFile filein{m_filter_fileseq->Open(pos, true)};
166      if (filein.IsNull()) {
167          return false;
168      }
169  
170      // Check that the hash of the encoded_filter matches the one stored in the db.
171      uint256 block_hash;
172      std::vector<uint8_t> encoded_filter;
173      try {
174          filein >> block_hash >> encoded_filter;
175          if (Hash(encoded_filter) != hash) {
176              LogError("Checksum mismatch in filter decode.\n");
177              return false;
178          }
179          filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
180      }
181      catch (const std::exception& e) {
182          LogError("%s: Failed to deserialize block filter from disk: %s\n", __func__, e.what());
183          return false;
184      }
185  
186      return true;
187  }
188  
189  size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter)
190  {
191      assert(filter.GetFilterType() == GetFilterType());
192  
193      size_t data_size =
194          GetSerializeSize(filter.GetBlockHash()) +
195          GetSerializeSize(filter.GetEncodedFilter());
196  
197      // If writing the filter would overflow the file, flush and move to the next one.
198      if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) {
199          AutoFile last_file{m_filter_fileseq->Open(pos)};
200          if (last_file.IsNull()) {
201              LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
202              return 0;
203          }
204          if (!last_file.Truncate(pos.nPos)) {
205              LogPrintf("%s: Failed to truncate filter file %d\n", __func__, pos.nFile);
206              return 0;
207          }
208          if (!last_file.Commit()) {
209              LogPrintf("%s: Failed to commit filter file %d\n", __func__, pos.nFile);
210              return 0;
211          }
212  
213          pos.nFile++;
214          pos.nPos = 0;
215      }
216  
217      // Pre-allocate sufficient space for filter data.
218      bool out_of_space;
219      m_filter_fileseq->Allocate(pos, data_size, out_of_space);
220      if (out_of_space) {
221          LogPrintf("%s: out of disk space\n", __func__);
222          return 0;
223      }
224  
225      AutoFile fileout{m_filter_fileseq->Open(pos)};
226      if (fileout.IsNull()) {
227          LogPrintf("%s: Failed to open filter file %d\n", __func__, pos.nFile);
228          return 0;
229      }
230  
231      fileout << filter.GetBlockHash() << filter.GetEncodedFilter();
232      return data_size;
233  }
234  
235  std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash)
236  {
237      std::pair<uint256, DBVal> read_out;
238      if (!m_db->Read(DBHeightKey(height), read_out)) {
239          return std::nullopt;
240      }
241  
242      if (read_out.first != expected_block_hash) {
243          LogError("%s: previous block header belongs to unexpected block %s; expected %s\n",
244                           __func__, read_out.first.ToString(), expected_block_hash.ToString());
245          return std::nullopt;
246      }
247  
248      return read_out.second.header;
249  }
250  
251  bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block)
252  {
253      CBlockUndo block_undo;
254  
255      if (block.height > 0) {
256          // pindex variable gives indexing code access to node internals. It
257          // will be removed in upcoming commit
258          const CBlockIndex* pindex = WITH_LOCK(cs_main, return m_chainstate->m_blockman.LookupBlockIndex(block.hash));
259          if (!m_chainstate->m_blockman.ReadBlockUndo(block_undo, *pindex)) {
260              return false;
261          }
262      }
263  
264      BlockFilter filter(m_filter_type, *Assert(block.data), block_undo);
265  
266      const uint256& header = filter.ComputeHeader(m_last_header);
267      bool res = Write(filter, block.height, header);
268      if (res) m_last_header = header; // update last header
269      return res;
270  }
271  
272  bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header)
273  {
274      size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter);
275      if (bytes_written == 0) return false;
276  
277      std::pair<uint256, DBVal> value;
278      value.first = filter.GetBlockHash();
279      value.second.hash = filter.GetHash();
280      value.second.header = filter_header;
281      value.second.pos = m_next_filter_pos;
282  
283      if (!m_db->Write(DBHeightKey(block_height), value)) {
284          return false;
285      }
286  
287      m_next_filter_pos.nPos += bytes_written;
288      return true;
289  }
290  
291  [[nodiscard]] static bool CopyHeightIndexToHashIndex(CDBIterator& db_it, CDBBatch& batch,
292                                         const std::string& index_name,
293                                         int start_height, int stop_height)
294  {
295      DBHeightKey key(start_height);
296      db_it.Seek(key);
297  
298      for (int height = start_height; height <= stop_height; ++height) {
299          if (!db_it.GetKey(key) || key.height != height) {
300              LogError("%s: unexpected key in %s: expected (%c, %d)\n",
301                           __func__, index_name, DB_BLOCK_HEIGHT, height);
302              return false;
303          }
304  
305          std::pair<uint256, DBVal> value;
306          if (!db_it.GetValue(value)) {
307              LogError("%s: unable to read value in %s at key (%c, %d)\n",
308                           __func__, index_name, DB_BLOCK_HEIGHT, height);
309              return false;
310          }
311  
312          batch.Write(DBHashKey(value.first), std::move(value.second));
313  
314          db_it.Next();
315      }
316      return true;
317  }
318  
319  bool BlockFilterIndex::CustomRewind(const interfaces::BlockRef& current_tip, const interfaces::BlockRef& new_tip)
320  {
321      CDBBatch batch(*m_db);
322      std::unique_ptr<CDBIterator> db_it(m_db->NewIterator());
323  
324      // During a reorg, we need to copy all filters for blocks that are getting disconnected from the
325      // height index to the hash index so we can still find them when the height index entries are
326      // overwritten.
327      if (!CopyHeightIndexToHashIndex(*db_it, batch, m_name, new_tip.height, current_tip.height)) {
328          return false;
329      }
330  
331      // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind.
332      // But since this creates new references to the filter, the position should get updated here
333      // atomically as well in case Commit fails.
334      batch.Write(DB_FILTER_POS, m_next_filter_pos);
335      if (!m_db->WriteBatch(batch)) return false;
336  
337      // Update cached header
338      m_last_header = *Assert(ReadFilterHeader(new_tip.height, new_tip.hash));
339      return true;
340  }
341  
342  static bool LookupOne(const CDBWrapper& db, const CBlockIndex* block_index, DBVal& result)
343  {
344      // First check if the result is stored under the height index and the value there matches the
345      // block hash. This should be the case if the block is on the active chain.
346      std::pair<uint256, DBVal> read_out;
347      if (!db.Read(DBHeightKey(block_index->nHeight), read_out)) {
348          return false;
349      }
350      if (read_out.first == block_index->GetBlockHash()) {
351          result = std::move(read_out.second);
352          return true;
353      }
354  
355      // If value at the height index corresponds to an different block, the result will be stored in
356      // the hash index.
357      return db.Read(DBHashKey(block_index->GetBlockHash()), result);
358  }
359  
360  static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height,
361                          const CBlockIndex* stop_index, std::vector<DBVal>& results)
362  {
363      if (start_height < 0) {
364          LogError("%s: start height (%d) is negative\n", __func__, start_height);
365          return false;
366      }
367      if (start_height > stop_index->nHeight) {
368          LogError("%s: start height (%d) is greater than stop height (%d)\n",
369                       __func__, start_height, stop_index->nHeight);
370          return false;
371      }
372  
373      size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1);
374      std::vector<std::pair<uint256, DBVal>> values(results_size);
375  
376      DBHeightKey key(start_height);
377      std::unique_ptr<CDBIterator> db_it(db.NewIterator());
378      db_it->Seek(DBHeightKey(start_height));
379      for (int height = start_height; height <= stop_index->nHeight; ++height) {
380          if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) {
381              return false;
382          }
383  
384          size_t i = static_cast<size_t>(height - start_height);
385          if (!db_it->GetValue(values[i])) {
386              LogError("%s: unable to read value in %s at key (%c, %d)\n",
387                           __func__, index_name, DB_BLOCK_HEIGHT, height);
388              return false;
389          }
390  
391          db_it->Next();
392      }
393  
394      results.resize(results_size);
395  
396      // Iterate backwards through block indexes collecting results in order to access the block hash
397      // of each entry in case we need to look it up in the hash index.
398      for (const CBlockIndex* block_index = stop_index;
399           block_index && block_index->nHeight >= start_height;
400           block_index = block_index->pprev) {
401          uint256 block_hash = block_index->GetBlockHash();
402  
403          size_t i = static_cast<size_t>(block_index->nHeight - start_height);
404          if (block_hash == values[i].first) {
405              results[i] = std::move(values[i].second);
406              continue;
407          }
408  
409          if (!db.Read(DBHashKey(block_hash), results[i])) {
410              LogError("%s: unable to read value in %s at key (%c, %s)\n",
411                           __func__, index_name, DB_BLOCK_HASH, block_hash.ToString());
412              return false;
413          }
414      }
415  
416      return true;
417  }
418  
419  bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const
420  {
421      DBVal entry;
422      if (!LookupOne(*m_db, block_index, entry)) {
423          return false;
424      }
425  
426      return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
427  }
428  
429  bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
430  {
431      LOCK(m_cs_headers_cache);
432  
433      bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0};
434  
435      if (is_checkpoint) {
436          // Try to find the block in the headers cache if this is a checkpoint height.
437          auto header = m_headers_cache.find(block_index->GetBlockHash());
438          if (header != m_headers_cache.end()) {
439              header_out = header->second;
440              return true;
441          }
442      }
443  
444      DBVal entry;
445      if (!LookupOne(*m_db, block_index, entry)) {
446          return false;
447      }
448  
449      if (is_checkpoint &&
450          m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) {
451          // Add to the headers cache if this is a checkpoint height.
452          m_headers_cache.emplace(block_index->GetBlockHash(), entry.header);
453      }
454  
455      header_out = entry.header;
456      return true;
457  }
458  
459  bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index,
460                                           std::vector<BlockFilter>& filters_out) const
461  {
462      std::vector<DBVal> entries;
463      if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
464          return false;
465      }
466  
467      filters_out.resize(entries.size());
468      auto filter_pos_it = filters_out.begin();
469      for (const auto& entry : entries) {
470          if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
471              return false;
472          }
473          ++filter_pos_it;
474      }
475  
476      return true;
477  }
478  
479  bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index,
480                                               std::vector<uint256>& hashes_out) const
481  
482  {
483      std::vector<DBVal> entries;
484      if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
485          return false;
486      }
487  
488      hashes_out.clear();
489      hashes_out.reserve(entries.size());
490      for (const auto& entry : entries) {
491          hashes_out.push_back(entry.hash);
492      }
493      return true;
494  }
495  
496  BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type)
497  {
498      auto it = g_filter_indexes.find(filter_type);
499      return it != g_filter_indexes.end() ? &it->second : nullptr;
500  }
501  
502  void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn)
503  {
504      for (auto& entry : g_filter_indexes) fn(entry.second);
505  }
506  
507  bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type,
508                            size_t n_cache_size, bool f_memory, bool f_wipe)
509  {
510      auto result = g_filter_indexes.emplace(std::piecewise_construct,
511                                             std::forward_as_tuple(filter_type),
512                                             std::forward_as_tuple(make_chain(), filter_type,
513                                                                   n_cache_size, f_memory, f_wipe));
514      return result.second;
515  }
516  
517  bool DestroyBlockFilterIndex(BlockFilterType filter_type)
518  {
519      return g_filter_indexes.erase(filter_type);
520  }
521  
522  void DestroyAllBlockFilterIndexes()
523  {
524      g_filter_indexes.clear();
525  }