/ src / index / blockfilterindex.cpp
blockfilterindex.cpp
  1  // Copyright (c) 2018-2022 The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #include <index/blockfilterindex.h>
  6  
  7  #include <blockfilter.h>
  8  #include <chain.h>
  9  #include <common/args.h>
 10  #include <dbwrapper.h>
 11  #include <flatfile.h>
 12  #include <hash.h>
 13  #include <index/base.h>
 14  #include <interfaces/chain.h>
 15  #include <interfaces/types.h>
 16  #include <logging.h>
 17  #include <serialize.h>
 18  #include <streams.h>
 19  #include <sync.h>
 20  #include <uint256.h>
 21  #include <util/check.h>
 22  #include <util/fs.h>
 23  #include <util/hasher.h>
 24  #include <util/syserror.h>
 25  
 26  #include <cerrno>
 27  #include <exception>
 28  #include <ios>
 29  #include <map>
 30  #include <optional>
 31  #include <span>
 32  #include <stdexcept>
 33  #include <string>
 34  #include <tuple>
 35  #include <utility>
 36  #include <vector>
 37  
 38  /* The index database stores three items for each block: the disk location of the encoded filter,
 39   * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by
 40   * height, and those belonging to blocks that have been reorganized out of the active chain are
 41   * indexed by block hash. This ensures that filter data for any block that becomes part of the
 42   * active chain can always be retrieved, alleviating timing concerns.
 43   *
 44   * The filters themselves are stored in flat files and referenced by the LevelDB entries. This
 45   * minimizes the amount of data written to LevelDB and keeps the database values constant size. The
 46   * disk location of the next block filter to be written (represented as a FlatFilePos) is stored
 47   * under the DB_FILTER_POS key.
 48   *
 49   * Keys for the height index have the type [DB_BLOCK_HEIGHT, uint32 (BE)]. The height is represented
 50   * as big-endian so that sequential reads of filters by height are fast.
 51   * Keys for the hash index have the type [DB_BLOCK_HASH, uint256].
 52   */
 53  constexpr uint8_t DB_BLOCK_HASH{'s'};
 54  constexpr uint8_t DB_BLOCK_HEIGHT{'t'};
 55  constexpr uint8_t DB_FILTER_POS{'P'};
 56  
 57  constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB
 58  /** The pre-allocation chunk size for fltr?????.dat files */
 59  constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB
 60  /** Maximum size of the cfheaders cache
 61   *  We have a limit to prevent a bug in filling this cache
 62   *  potentially turning into an OOM. At 2000 entries, this cache
 63   *  is big enough for a 2,000,000 length block chain, which
 64   *  we should be enough until ~2047. */
 65  constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000};
 66  
 67  namespace {
 68  
 69  struct DBVal {
 70      uint256 hash;
 71      uint256 header;
 72      FlatFilePos pos;
 73  
 74      SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); }
 75  };
 76  
 77  struct DBHeightKey {
 78      int height;
 79  
 80      explicit DBHeightKey(int height_in) : height(height_in) {}
 81  
 82      template<typename Stream>
 83      void Serialize(Stream& s) const
 84      {
 85          ser_writedata8(s, DB_BLOCK_HEIGHT);
 86          ser_writedata32be(s, height);
 87      }
 88  
 89      template<typename Stream>
 90      void Unserialize(Stream& s)
 91      {
 92          const uint8_t prefix{ser_readdata8(s)};
 93          if (prefix != DB_BLOCK_HEIGHT) {
 94              throw std::ios_base::failure("Invalid format for block filter index DB height key");
 95          }
 96          height = ser_readdata32be(s);
 97      }
 98  };
 99  
100  struct DBHashKey {
101      uint256 hash;
102  
103      explicit DBHashKey(const uint256& hash_in) : hash(hash_in) {}
104  
105      SERIALIZE_METHODS(DBHashKey, obj) {
106          uint8_t prefix{DB_BLOCK_HASH};
107          READWRITE(prefix);
108          if (prefix != DB_BLOCK_HASH) {
109              throw std::ios_base::failure("Invalid format for block filter index DB hash key");
110          }
111  
112          READWRITE(obj.hash);
113      }
114  };
115  
116  }; // namespace
117  
118  static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes;
119  
120  BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type,
121                                     size_t n_cache_size, bool f_memory, bool f_wipe)
122      : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index")
123      , m_filter_type(filter_type)
124  {
125      const std::string& filter_name = BlockFilterTypeName(filter_type);
126      if (filter_name.empty()) throw std::invalid_argument("unknown filter_type");
127  
128      fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name);
129      fs::create_directories(path);
130  
131      m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe);
132      m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE);
133  }
134  
135  interfaces::Chain::NotifyOptions BlockFilterIndex::CustomOptions()
136  {
137      interfaces::Chain::NotifyOptions options;
138      options.connect_undo_data = true;
139      return options;
140  }
141  
142  bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockRef>& block)
143  {
144      if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) {
145          // Check that the cause of the read failure is that the key does not exist. Any other errors
146          // indicate database corruption or a disk failure, and starting the index would cause
147          // further corruption.
148          if (m_db->Exists(DB_FILTER_POS)) {
149              LogError("Cannot read current %s state; index may be corrupted",
150                        GetName());
151              return false;
152          }
153  
154          // If the DB_FILTER_POS is not set, then initialize to the first location.
155          m_next_filter_pos.nFile = 0;
156          m_next_filter_pos.nPos = 0;
157      }
158  
159      if (block) {
160          auto op_last_header = ReadFilterHeader(block->height, block->hash);
161          if (!op_last_header) {
162              LogError("Cannot read last block filter header; index may be corrupted");
163              return false;
164          }
165          m_last_header = *op_last_header;
166      }
167  
168      return true;
169  }
170  
171  bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
172  {
173      const FlatFilePos& pos = m_next_filter_pos;
174  
175      // Flush current filter file to disk.
176      AutoFile file{m_filter_fileseq->Open(pos)};
177      if (file.IsNull()) {
178          LogError("Failed to open filter file %d", pos.nFile);
179          return false;
180      }
181      if (!file.Commit()) {
182          LogError("Failed to commit filter file %d", pos.nFile);
183          (void)file.fclose();
184          return false;
185      }
186      if (file.fclose() != 0) {
187          LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno));
188          return false;
189      }
190  
191      batch.Write(DB_FILTER_POS, pos);
192      return true;
193  }
194  
195  bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
196  {
197      AutoFile filein{m_filter_fileseq->Open(pos, true)};
198      if (filein.IsNull()) {
199          return false;
200      }
201  
202      // Check that the hash of the encoded_filter matches the one stored in the db.
203      uint256 block_hash;
204      std::vector<uint8_t> encoded_filter;
205      try {
206          filein >> block_hash >> encoded_filter;
207          if (Hash(encoded_filter) != hash) {
208              LogError("Checksum mismatch in filter decode.");
209              return false;
210          }
211          filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
212      }
213      catch (const std::exception& e) {
214          LogError("Failed to deserialize block filter from disk: %s", e.what());
215          return false;
216      }
217  
218      return true;
219  }
220  
221  size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter)
222  {
223      assert(filter.GetFilterType() == GetFilterType());
224  
225      uint64_t data_size{
226          GetSerializeSize(filter.GetBlockHash()) +
227          GetSerializeSize(filter.GetEncodedFilter())};
228  
229      // If writing the filter would overflow the file, flush and move to the next one.
230      if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) {
231          AutoFile last_file{m_filter_fileseq->Open(pos)};
232          if (last_file.IsNull()) {
233              LogError("Failed to open filter file %d", pos.nFile);
234              return 0;
235          }
236          if (!last_file.Truncate(pos.nPos)) {
237              LogError("Failed to truncate filter file %d", pos.nFile);
238              return 0;
239          }
240          if (!last_file.Commit()) {
241              LogError("Failed to commit filter file %d", pos.nFile);
242              (void)last_file.fclose();
243              return 0;
244          }
245          if (last_file.fclose() != 0) {
246              LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno));
247              return 0;
248          }
249  
250          pos.nFile++;
251          pos.nPos = 0;
252      }
253  
254      // Pre-allocate sufficient space for filter data.
255      bool out_of_space;
256      m_filter_fileseq->Allocate(pos, data_size, out_of_space);
257      if (out_of_space) {
258          LogError("out of disk space");
259          return 0;
260      }
261  
262      AutoFile fileout{m_filter_fileseq->Open(pos)};
263      if (fileout.IsNull()) {
264          LogError("Failed to open filter file %d", pos.nFile);
265          return 0;
266      }
267  
268      fileout << filter.GetBlockHash() << filter.GetEncodedFilter();
269  
270      if (fileout.fclose() != 0) {
271          LogError("Failed to close filter file %d: %s", pos.nFile, SysErrorString(errno));
272          return 0;
273      }
274  
275      return data_size;
276  }
277  
278  std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash)
279  {
280      std::pair<uint256, DBVal> read_out;
281      if (!m_db->Read(DBHeightKey(height), read_out)) {
282          return std::nullopt;
283      }
284  
285      if (read_out.first != expected_block_hash) {
286          LogError("previous block header belongs to unexpected block %s; expected %s",
287                   read_out.first.ToString(), expected_block_hash.ToString());
288          return std::nullopt;
289      }
290  
291      return read_out.second.header;
292  }
293  
294  bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block)
295  {
296      BlockFilter filter(m_filter_type, *Assert(block.data), *Assert(block.undo_data));
297      const uint256& header = filter.ComputeHeader(m_last_header);
298      bool res = Write(filter, block.height, header);
299      if (res) m_last_header = header; // update last header
300      return res;
301  }
302  
303  bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header)
304  {
305      size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter);
306      if (bytes_written == 0) return false;
307  
308      std::pair<uint256, DBVal> value;
309      value.first = filter.GetBlockHash();
310      value.second.hash = filter.GetHash();
311      value.second.header = filter_header;
312      value.second.pos = m_next_filter_pos;
313  
314      m_db->Write(DBHeightKey(block_height), value);
315  
316      m_next_filter_pos.nPos += bytes_written;
317      return true;
318  }
319  
320  [[nodiscard]] static bool CopyHeightIndexToHashIndex(CDBIterator& db_it, CDBBatch& batch,
321                                                       const std::string& index_name, int height)
322  {
323      DBHeightKey key(height);
324      db_it.Seek(key);
325  
326      if (!db_it.GetKey(key) || key.height != height) {
327          LogError("unexpected key in %s: expected (%c, %d)",
328                    index_name, DB_BLOCK_HEIGHT, height);
329          return false;
330      }
331  
332      std::pair<uint256, DBVal> value;
333      if (!db_it.GetValue(value)) {
334          LogError("unable to read value in %s at key (%c, %d)",
335                   index_name, DB_BLOCK_HEIGHT, height);
336          return false;
337      }
338  
339      batch.Write(DBHashKey(value.first), std::move(value.second));
340      return true;
341  }
342  
343  bool BlockFilterIndex::CustomRemove(const interfaces::BlockInfo& block)
344  {
345      CDBBatch batch(*m_db);
346      std::unique_ptr<CDBIterator> db_it(m_db->NewIterator());
347  
348      // During a reorg, we need to copy block filter that is getting disconnected from the
349      // height index to the hash index so we can still find it when the height index entry
350      // is overwritten.
351      if (!CopyHeightIndexToHashIndex(*db_it, batch, m_name, block.height)) {
352          return false;
353      }
354  
355      // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind.
356      // But since this creates new references to the filter, the position should get updated here
357      // atomically as well in case Commit fails.
358      batch.Write(DB_FILTER_POS, m_next_filter_pos);
359      m_db->WriteBatch(batch);
360  
361      // Update cached header to the previous block hash
362      m_last_header = *Assert(ReadFilterHeader(block.height - 1, *Assert(block.prev_hash)));
363      return true;
364  }
365  
366  static bool LookupOne(const CDBWrapper& db, const CBlockIndex* block_index, DBVal& result)
367  {
368      // First check if the result is stored under the height index and the value there matches the
369      // block hash. This should be the case if the block is on the active chain.
370      std::pair<uint256, DBVal> read_out;
371      if (!db.Read(DBHeightKey(block_index->nHeight), read_out)) {
372          return false;
373      }
374      if (read_out.first == block_index->GetBlockHash()) {
375          result = std::move(read_out.second);
376          return true;
377      }
378  
379      // If value at the height index corresponds to an different block, the result will be stored in
380      // the hash index.
381      return db.Read(DBHashKey(block_index->GetBlockHash()), result);
382  }
383  
384  static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height,
385                          const CBlockIndex* stop_index, std::vector<DBVal>& results)
386  {
387      if (start_height < 0) {
388          LogError("start height (%d) is negative", start_height);
389          return false;
390      }
391      if (start_height > stop_index->nHeight) {
392          LogError("start height (%d) is greater than stop height (%d)",
393                   start_height, stop_index->nHeight);
394          return false;
395      }
396  
397      size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1);
398      std::vector<std::pair<uint256, DBVal>> values(results_size);
399  
400      DBHeightKey key(start_height);
401      std::unique_ptr<CDBIterator> db_it(db.NewIterator());
402      db_it->Seek(DBHeightKey(start_height));
403      for (int height = start_height; height <= stop_index->nHeight; ++height) {
404          if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) {
405              return false;
406          }
407  
408          size_t i = static_cast<size_t>(height - start_height);
409          if (!db_it->GetValue(values[i])) {
410              LogError("unable to read value in %s at key (%c, %d)",
411                       index_name, DB_BLOCK_HEIGHT, height);
412              return false;
413          }
414  
415          db_it->Next();
416      }
417  
418      results.resize(results_size);
419  
420      // Iterate backwards through block indexes collecting results in order to access the block hash
421      // of each entry in case we need to look it up in the hash index.
422      for (const CBlockIndex* block_index = stop_index;
423           block_index && block_index->nHeight >= start_height;
424           block_index = block_index->pprev) {
425          uint256 block_hash = block_index->GetBlockHash();
426  
427          size_t i = static_cast<size_t>(block_index->nHeight - start_height);
428          if (block_hash == values[i].first) {
429              results[i] = std::move(values[i].second);
430              continue;
431          }
432  
433          if (!db.Read(DBHashKey(block_hash), results[i])) {
434              LogError("unable to read value in %s at key (%c, %s)",
435                       index_name, DB_BLOCK_HASH, block_hash.ToString());
436              return false;
437          }
438      }
439  
440      return true;
441  }
442  
443  bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const
444  {
445      DBVal entry;
446      if (!LookupOne(*m_db, block_index, entry)) {
447          return false;
448      }
449  
450      return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
451  }
452  
453  bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
454  {
455      LOCK(m_cs_headers_cache);
456  
457      bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0};
458  
459      if (is_checkpoint) {
460          // Try to find the block in the headers cache if this is a checkpoint height.
461          auto header = m_headers_cache.find(block_index->GetBlockHash());
462          if (header != m_headers_cache.end()) {
463              header_out = header->second;
464              return true;
465          }
466      }
467  
468      DBVal entry;
469      if (!LookupOne(*m_db, block_index, entry)) {
470          return false;
471      }
472  
473      if (is_checkpoint &&
474          m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) {
475          // Add to the headers cache if this is a checkpoint height.
476          m_headers_cache.emplace(block_index->GetBlockHash(), entry.header);
477      }
478  
479      header_out = entry.header;
480      return true;
481  }
482  
483  bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index,
484                                           std::vector<BlockFilter>& filters_out) const
485  {
486      std::vector<DBVal> entries;
487      if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
488          return false;
489      }
490  
491      filters_out.resize(entries.size());
492      auto filter_pos_it = filters_out.begin();
493      for (const auto& entry : entries) {
494          if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
495              return false;
496          }
497          ++filter_pos_it;
498      }
499  
500      return true;
501  }
502  
503  bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index,
504                                               std::vector<uint256>& hashes_out) const
505  
506  {
507      std::vector<DBVal> entries;
508      if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
509          return false;
510      }
511  
512      hashes_out.clear();
513      hashes_out.reserve(entries.size());
514      for (const auto& entry : entries) {
515          hashes_out.push_back(entry.hash);
516      }
517      return true;
518  }
519  
520  BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type)
521  {
522      auto it = g_filter_indexes.find(filter_type);
523      return it != g_filter_indexes.end() ? &it->second : nullptr;
524  }
525  
526  void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn)
527  {
528      for (auto& entry : g_filter_indexes) fn(entry.second);
529  }
530  
531  bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type,
532                            size_t n_cache_size, bool f_memory, bool f_wipe)
533  {
534      auto result = g_filter_indexes.emplace(std::piecewise_construct,
535                                             std::forward_as_tuple(filter_type),
536                                             std::forward_as_tuple(make_chain(), filter_type,
537                                                                   n_cache_size, f_memory, f_wipe));
538      return result.second;
539  }
540  
541  bool DestroyBlockFilterIndex(BlockFilterType filter_type)
542  {
543      return g_filter_indexes.erase(filter_type);
544  }
545  
546  void DestroyAllBlockFilterIndexes()
547  {
548      g_filter_indexes.clear();
549  }