/ src / index / blockfilterindex.cpp
blockfilterindex.cpp
  1  // Copyright (c) 2018-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #include <index/blockfilterindex.h>
  6  
  7  #include <blockfilter.h>
  8  #include <chain.h>
  9  #include <common/args.h>
 10  #include <dbwrapper.h>
 11  #include <flatfile.h>
 12  #include <hash.h>
 13  #include <index/base.h>
 14  #include <index/db_key.h>
 15  #include <interfaces/chain.h>
 16  #include <interfaces/types.h>
 17  #include <serialize.h>
 18  #include <streams.h>
 19  #include <sync.h>
 20  #include <uint256.h>
 21  #include <util/check.h>
 22  #include <util/fs.h>
 23  #include <util/hasher.h>
 24  #include <util/log.h>
 25  #include <util/syserror.h>
 26  
 27  #include <cerrno>
 28  #include <exception>
 29  #include <map>
 30  #include <optional>
 31  #include <span>
 32  #include <stdexcept>
 33  #include <string>
 34  #include <tuple>
 35  #include <utility>
 36  #include <vector>
 37  
 38  /* The index database stores three items for each block: the disk location of the encoded filter,
 39   * its dSHA256 hash, and the header. Those belonging to blocks on the active chain are indexed by
 40   * height, and those belonging to blocks that have been reorganized out of the active chain are
 41   * indexed by block hash. This ensures that filter data for any block that becomes part of the
 42   * active chain can always be retrieved, alleviating timing concerns.
 43   *
 44   * The filters themselves are stored in flat files and referenced by the LevelDB entries. This
 45   * minimizes the amount of data written to LevelDB and keeps the database values constant size. The
 46   * disk location of the next block filter to be written (represented as a FlatFilePos) is stored
 47   * under the DB_FILTER_POS key.
 48   *
 49   * The logic for keys is shared with other indexes, see index/db_key.h.
 50   */
 51  constexpr uint8_t DB_FILTER_POS{'P'};
 52  
 53  constexpr unsigned int MAX_FLTR_FILE_SIZE = 0x1000000; // 16 MiB
 54  /** The pre-allocation chunk size for fltr?????.dat files */
 55  constexpr unsigned int FLTR_FILE_CHUNK_SIZE = 0x100000; // 1 MiB
 56  /** Maximum size of the cfheaders cache
 57   *  We have a limit to prevent a bug in filling this cache
 58   *  potentially turning into an OOM. At 2000 entries, this cache
 59   *  is big enough for a 2,000,000 length block chain, which
 60   *  we should be enough until ~2047. */
 61  constexpr size_t CF_HEADERS_CACHE_MAX_SZ{2000};
 62  
 63  namespace {
 64  
 65  struct DBVal {
 66      uint256 hash;
 67      uint256 header;
 68      FlatFilePos pos;
 69  
 70      SERIALIZE_METHODS(DBVal, obj) { READWRITE(obj.hash, obj.header, obj.pos); }
 71  };
 72  
 73  }; // namespace
 74  
 75  static std::map<BlockFilterType, BlockFilterIndex> g_filter_indexes;
 76  
 77  BlockFilterIndex::BlockFilterIndex(std::unique_ptr<interfaces::Chain> chain, BlockFilterType filter_type,
 78                                     size_t n_cache_size, bool f_memory, bool f_wipe)
 79      : BaseIndex(std::move(chain), BlockFilterTypeName(filter_type) + " block filter index")
 80      , m_filter_type(filter_type)
 81  {
 82      const std::string& filter_name = BlockFilterTypeName(filter_type);
 83      if (filter_name.empty()) throw std::invalid_argument("unknown filter_type");
 84  
 85      fs::path path = gArgs.GetDataDirNet() / "indexes" / "blockfilter" / fs::u8path(filter_name);
 86      fs::create_directories(path);
 87  
 88      m_db = std::make_unique<BaseIndex::DB>(path / "db", n_cache_size, f_memory, f_wipe);
 89      m_filter_fileseq = std::make_unique<FlatFileSeq>(std::move(path), "fltr", FLTR_FILE_CHUNK_SIZE);
 90  }
 91  
 92  interfaces::Chain::NotifyOptions BlockFilterIndex::CustomOptions()
 93  {
 94      interfaces::Chain::NotifyOptions options;
 95      options.connect_undo_data = true;
 96      return options;
 97  }
 98  
 99  bool BlockFilterIndex::CustomInit(const std::optional<interfaces::BlockRef>& block)
100  {
101      if (!m_db->Read(DB_FILTER_POS, m_next_filter_pos)) {
102          // Check that the cause of the read failure is that the key does not exist. Any other errors
103          // indicate database corruption or a disk failure, and starting the index would cause
104          // further corruption.
105          if (m_db->Exists(DB_FILTER_POS)) {
106              LogError("Cannot read current %s state; index may be corrupted",
107                        GetName());
108              return false;
109          }
110  
111          // If the DB_FILTER_POS is not set, then initialize to the first location.
112          m_next_filter_pos.nFile = 0;
113          m_next_filter_pos.nPos = 0;
114      }
115  
116      if (block) {
117          auto op_last_header = ReadFilterHeader(block->height, block->hash);
118          if (!op_last_header) {
119              LogError("Cannot read last block filter header; index may be corrupted");
120              return false;
121          }
122          m_last_header = *op_last_header;
123      }
124  
125      return true;
126  }
127  
128  bool BlockFilterIndex::CustomCommit(CDBBatch& batch)
129  {
130      const FlatFilePos& pos = m_next_filter_pos;
131  
132      // Flush current filter file to disk.
133      AutoFile file{m_filter_fileseq->Open(pos)};
134      if (file.IsNull()) {
135          LogError("Failed to open filter file %d", pos.nFile);
136          return false;
137      }
138      if (!file.Commit()) {
139          LogError("Failed to commit filter file %d", pos.nFile);
140          (void)file.fclose();
141          return false;
142      }
143      if (file.fclose() != 0) {
144          LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno));
145          return false;
146      }
147  
148      batch.Write(DB_FILTER_POS, pos);
149      return true;
150  }
151  
152  bool BlockFilterIndex::ReadFilterFromDisk(const FlatFilePos& pos, const uint256& hash, BlockFilter& filter) const
153  {
154      AutoFile filein{m_filter_fileseq->Open(pos, true)};
155      if (filein.IsNull()) {
156          return false;
157      }
158  
159      // Check that the hash of the encoded_filter matches the one stored in the db.
160      uint256 block_hash;
161      std::vector<uint8_t> encoded_filter;
162      try {
163          filein >> block_hash >> encoded_filter;
164          if (Hash(encoded_filter) != hash) {
165              LogError("Checksum mismatch in filter decode.");
166              return false;
167          }
168          filter = BlockFilter(GetFilterType(), block_hash, std::move(encoded_filter), /*skip_decode_check=*/true);
169      }
170      catch (const std::exception& e) {
171          LogError("Failed to deserialize block filter from disk: %s", e.what());
172          return false;
173      }
174  
175      return true;
176  }
177  
178  size_t BlockFilterIndex::WriteFilterToDisk(FlatFilePos& pos, const BlockFilter& filter)
179  {
180      assert(filter.GetFilterType() == GetFilterType());
181  
182      uint64_t data_size{
183          GetSerializeSize(filter.GetBlockHash()) +
184          GetSerializeSize(filter.GetEncodedFilter())};
185  
186      // If writing the filter would overflow the file, flush and move to the next one.
187      if (pos.nPos + data_size > MAX_FLTR_FILE_SIZE) {
188          AutoFile last_file{m_filter_fileseq->Open(pos)};
189          if (last_file.IsNull()) {
190              LogError("Failed to open filter file %d", pos.nFile);
191              return 0;
192          }
193          if (!last_file.Truncate(pos.nPos)) {
194              LogError("Failed to truncate filter file %d", pos.nFile);
195              return 0;
196          }
197          if (!last_file.Commit()) {
198              LogError("Failed to commit filter file %d", pos.nFile);
199              (void)last_file.fclose();
200              return 0;
201          }
202          if (last_file.fclose() != 0) {
203              LogError("Failed to close filter file %d after commit: %s", pos.nFile, SysErrorString(errno));
204              return 0;
205          }
206  
207          pos.nFile++;
208          pos.nPos = 0;
209      }
210  
211      // Pre-allocate sufficient space for filter data.
212      bool out_of_space;
213      m_filter_fileseq->Allocate(pos, data_size, out_of_space);
214      if (out_of_space) {
215          LogError("out of disk space");
216          return 0;
217      }
218  
219      AutoFile fileout{m_filter_fileseq->Open(pos)};
220      if (fileout.IsNull()) {
221          LogError("Failed to open filter file %d", pos.nFile);
222          return 0;
223      }
224  
225      fileout << filter.GetBlockHash() << filter.GetEncodedFilter();
226  
227      if (fileout.fclose() != 0) {
228          LogError("Failed to close filter file %d: %s", pos.nFile, SysErrorString(errno));
229          return 0;
230      }
231  
232      return data_size;
233  }
234  
235  std::optional<uint256> BlockFilterIndex::ReadFilterHeader(int height, const uint256& expected_block_hash)
236  {
237      std::pair<uint256, DBVal> read_out;
238      if (!m_db->Read(index_util::DBHeightKey(height), read_out)) {
239          return std::nullopt;
240      }
241  
242      if (read_out.first != expected_block_hash) {
243          LogError("previous block header belongs to unexpected block %s; expected %s",
244                   read_out.first.ToString(), expected_block_hash.ToString());
245          return std::nullopt;
246      }
247  
248      return read_out.second.header;
249  }
250  
251  bool BlockFilterIndex::CustomAppend(const interfaces::BlockInfo& block)
252  {
253      BlockFilter filter(m_filter_type, *Assert(block.data), *Assert(block.undo_data));
254      const uint256& header = filter.ComputeHeader(m_last_header);
255      bool res = Write(filter, block.height, header);
256      if (res) m_last_header = header; // update last header
257      return res;
258  }
259  
260  bool BlockFilterIndex::Write(const BlockFilter& filter, uint32_t block_height, const uint256& filter_header)
261  {
262      size_t bytes_written = WriteFilterToDisk(m_next_filter_pos, filter);
263      if (bytes_written == 0) return false;
264  
265      std::pair<uint256, DBVal> value;
266      value.first = filter.GetBlockHash();
267      value.second.hash = filter.GetHash();
268      value.second.header = filter_header;
269      value.second.pos = m_next_filter_pos;
270  
271      m_db->Write(index_util::DBHeightKey(block_height), value);
272  
273      m_next_filter_pos.nPos += bytes_written;
274      return true;
275  }
276  
277  bool BlockFilterIndex::CustomRemove(const interfaces::BlockInfo& block)
278  {
279      CDBBatch batch(*m_db);
280      std::unique_ptr<CDBIterator> db_it(m_db->NewIterator());
281  
282      // During a reorg, we need to copy block filter that is getting disconnected from the
283      // height index to the hash index so we can still find it when the height index entry
284      // is overwritten.
285      if (!index_util::CopyHeightIndexToHashIndex<DBVal>(*db_it, batch, m_name, block.height)) {
286          return false;
287      }
288  
289      // The latest filter position gets written in Commit by the call to the BaseIndex::Rewind.
290      // But since this creates new references to the filter, the position should get updated here
291      // atomically as well in case Commit fails.
292      batch.Write(DB_FILTER_POS, m_next_filter_pos);
293      m_db->WriteBatch(batch);
294  
295      // Update cached header to the previous block hash
296      m_last_header = *Assert(ReadFilterHeader(block.height - 1, *Assert(block.prev_hash)));
297      return true;
298  }
299  
300  static bool LookupRange(CDBWrapper& db, const std::string& index_name, int start_height,
301                          const CBlockIndex* stop_index, std::vector<DBVal>& results)
302  {
303      if (start_height < 0) {
304          LogError("start height (%d) is negative", start_height);
305          return false;
306      }
307      if (start_height > stop_index->nHeight) {
308          LogError("start height (%d) is greater than stop height (%d)",
309                   start_height, stop_index->nHeight);
310          return false;
311      }
312  
313      size_t results_size = static_cast<size_t>(stop_index->nHeight - start_height + 1);
314      std::vector<std::pair<uint256, DBVal>> values(results_size);
315  
316      index_util::DBHeightKey key(start_height);
317      std::unique_ptr<CDBIterator> db_it(db.NewIterator());
318      db_it->Seek(index_util::DBHeightKey(start_height));
319      for (int height = start_height; height <= stop_index->nHeight; ++height) {
320          if (!db_it->Valid() || !db_it->GetKey(key) || key.height != height) {
321              return false;
322          }
323  
324          size_t i = static_cast<size_t>(height - start_height);
325          if (!db_it->GetValue(values[i])) {
326              LogError("unable to read value in %s at key (%c, %d)",
327                       index_name, index_util::DB_BLOCK_HEIGHT, height);
328              return false;
329          }
330  
331          db_it->Next();
332      }
333  
334      results.resize(results_size);
335  
336      // Iterate backwards through block indexes collecting results in order to access the block hash
337      // of each entry in case we need to look it up in the hash index.
338      for (const CBlockIndex* block_index = stop_index;
339           block_index && block_index->nHeight >= start_height;
340           block_index = block_index->pprev) {
341          uint256 block_hash = block_index->GetBlockHash();
342  
343          size_t i = static_cast<size_t>(block_index->nHeight - start_height);
344          if (block_hash == values[i].first) {
345              results[i] = std::move(values[i].second);
346              continue;
347          }
348  
349          if (!db.Read(index_util::DBHashKey(block_hash), results[i])) {
350              LogError("unable to read value in %s at key (%c, %s)",
351                       index_name, index_util::DB_BLOCK_HASH, block_hash.ToString());
352              return false;
353          }
354      }
355  
356      return true;
357  }
358  
359  bool BlockFilterIndex::LookupFilter(const CBlockIndex* block_index, BlockFilter& filter_out) const
360  {
361      DBVal entry;
362      if (!index_util::LookUpOne(*m_db, {block_index->GetBlockHash(), block_index->nHeight}, entry)) {
363          return false;
364      }
365  
366      return ReadFilterFromDisk(entry.pos, entry.hash, filter_out);
367  }
368  
369  bool BlockFilterIndex::LookupFilterHeader(const CBlockIndex* block_index, uint256& header_out)
370  {
371      LOCK(m_cs_headers_cache);
372  
373      bool is_checkpoint{block_index->nHeight % CFCHECKPT_INTERVAL == 0};
374  
375      if (is_checkpoint) {
376          // Try to find the block in the headers cache if this is a checkpoint height.
377          auto header = m_headers_cache.find(block_index->GetBlockHash());
378          if (header != m_headers_cache.end()) {
379              header_out = header->second;
380              return true;
381          }
382      }
383  
384      DBVal entry;
385      if (!index_util::LookUpOne(*m_db, {block_index->GetBlockHash(), block_index->nHeight}, entry)) {
386          return false;
387      }
388  
389      if (is_checkpoint &&
390          m_headers_cache.size() < CF_HEADERS_CACHE_MAX_SZ) {
391          // Add to the headers cache if this is a checkpoint height.
392          m_headers_cache.emplace(block_index->GetBlockHash(), entry.header);
393      }
394  
395      header_out = entry.header;
396      return true;
397  }
398  
399  bool BlockFilterIndex::LookupFilterRange(int start_height, const CBlockIndex* stop_index,
400                                           std::vector<BlockFilter>& filters_out) const
401  {
402      std::vector<DBVal> entries;
403      if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
404          return false;
405      }
406  
407      filters_out.resize(entries.size());
408      auto filter_pos_it = filters_out.begin();
409      for (const auto& entry : entries) {
410          if (!ReadFilterFromDisk(entry.pos, entry.hash, *filter_pos_it)) {
411              return false;
412          }
413          ++filter_pos_it;
414      }
415  
416      return true;
417  }
418  
419  bool BlockFilterIndex::LookupFilterHashRange(int start_height, const CBlockIndex* stop_index,
420                                               std::vector<uint256>& hashes_out) const
421  
422  {
423      std::vector<DBVal> entries;
424      if (!LookupRange(*m_db, m_name, start_height, stop_index, entries)) {
425          return false;
426      }
427  
428      hashes_out.clear();
429      hashes_out.reserve(entries.size());
430      for (const auto& entry : entries) {
431          hashes_out.push_back(entry.hash);
432      }
433      return true;
434  }
435  
436  BlockFilterIndex* GetBlockFilterIndex(BlockFilterType filter_type)
437  {
438      auto it = g_filter_indexes.find(filter_type);
439      return it != g_filter_indexes.end() ? &it->second : nullptr;
440  }
441  
442  void ForEachBlockFilterIndex(std::function<void (BlockFilterIndex&)> fn)
443  {
444      for (auto& entry : g_filter_indexes) fn(entry.second);
445  }
446  
447  bool InitBlockFilterIndex(std::function<std::unique_ptr<interfaces::Chain>()> make_chain, BlockFilterType filter_type,
448                            size_t n_cache_size, bool f_memory, bool f_wipe)
449  {
450      auto result = g_filter_indexes.emplace(std::piecewise_construct,
451                                             std::forward_as_tuple(filter_type),
452                                             std::forward_as_tuple(make_chain(), filter_type,
453                                                                   n_cache_size, f_memory, f_wipe));
454      return result.second;
455  }
456  
457  bool DestroyBlockFilterIndex(BlockFilterType filter_type)
458  {
459      return g_filter_indexes.erase(filter_type);
460  }
461  
462  void DestroyAllBlockFilterIndexes()
463  {
464      g_filter_indexes.clear();
465  }