/ src / common / bloom.h
bloom.h
  1  // Copyright (c) 2012-present The Bitcoin Core developers
  2  // Distributed under the MIT software license, see the accompanying
  3  // file COPYING or http://www.opensource.org/licenses/mit-license.php.
  4  
  5  #ifndef BITCOIN_COMMON_BLOOM_H
  6  #define BITCOIN_COMMON_BLOOM_H
  7  
  8  #include <serialize.h>
  9  #include <span.h>
 10  
 11  #include <vector>
 12  
 13  class COutPoint;
 14  class CTransaction;
 15  
 16  //! 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
 17  static constexpr unsigned int MAX_BLOOM_FILTER_SIZE = 36000; // bytes
 18  static constexpr unsigned int MAX_HASH_FUNCS = 50;
 19  
 20  /**
 21   * First two bits of nFlags control how much IsRelevantAndUpdate actually updates
 22   * The remaining bits are reserved
 23   */
 24  enum bloomflags
 25  {
 26      BLOOM_UPDATE_NONE = 0,
 27      BLOOM_UPDATE_ALL = 1,
 28      // Only adds outpoints to the filter if the output is a pay-to-pubkey/pay-to-multisig script
 29      BLOOM_UPDATE_P2PUBKEY_ONLY = 2,
 30      BLOOM_UPDATE_MASK = 3,
 31  };
 32  
 33  /**
 34   * BloomFilter is a probabilistic filter which SPV clients provide
 35   * so that we can filter the transactions we send them.
 36   *
 37   * This allows for significantly more efficient transaction and block downloads.
 38   *
 39   * Because bloom filters are probabilistic, a SPV node can increase the false-
 40   * positive rate, making us send it transactions which aren't actually its,
 41   * allowing clients to trade more bandwidth for more privacy by obfuscating which
 42   * keys are controlled by them.
 43   */
 44  class CBloomFilter
 45  {
 46  private:
 47      std::vector<unsigned char> vData;
 48      unsigned int nHashFuncs;
 49      unsigned int nTweak;
 50      unsigned char nFlags;
 51  
 52      unsigned int Hash(unsigned int nHashNum, std::span<const unsigned char> vDataToHash) const;
 53  
 54  public:
 55      /**
 56       * Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
 57       * Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
 58       * the filter created will be as close to the given parameters as possible within the protocol limits.
 59       * This will apply if nFPRate is very low or nElements is unreasonably high.
 60       * nTweak is a constant which is added to the seed value passed to the hash function
 61       * It should generally always be a random value (and is largely only exposed for unit testing)
 62       * nFlags should be one of the BLOOM_UPDATE_* enums (not _MASK)
 63       */
 64      CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak, unsigned char nFlagsIn);
 65      CBloomFilter() : nHashFuncs(0), nTweak(0), nFlags(0) {}
 66  
 67      SERIALIZE_METHODS(CBloomFilter, obj) { READWRITE(obj.vData, obj.nHashFuncs, obj.nTweak, obj.nFlags); }
 68  
 69      void insert(std::span<const unsigned char> vKey);
 70      void insert(const COutPoint& outpoint);
 71  
 72      bool contains(std::span<const unsigned char> vKey) const;
 73      bool contains(const COutPoint& outpoint) const;
 74  
 75      //! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
 76      //! (catch a filter which was just deserialized which was too big)
 77      bool IsWithinSizeConstraints() const;
 78  
 79      //! Also adds any outputs which match the filter to the filter (to match their spending txes)
 80      bool IsRelevantAndUpdate(const CTransaction& tx);
 81  };
 82  
 83  /**
 84   * RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
 85   * Construct it with the number of items to keep track of, and a false-positive
 86   * rate. Unlike CBloomFilter, by default nTweak is set to a cryptographically
 87   * secure random value for you. Similarly rather than clear() the method
 88   * reset() is provided, which also changes nTweak to decrease the impact of
 89   * false-positives.
 90   *
 91   * contains(item) will always return true if item was one of the last N to 1.5*N
 92   * insert()'ed ... but may also return true for items that were not inserted.
 93   *
 94   * It needs around 1.8 bytes per element per factor 0.1 of false positive rate.
 95   * For example, if we want 1000 elements, we'd need:
 96   * - ~1800 bytes for a false positive rate of 0.1
 97   * - ~3600 bytes for a false positive rate of 0.01
 98   * - ~5400 bytes for a false positive rate of 0.001
 99   *
100   * If we make these simplifying assumptions:
101   * - logFpRate / log(0.5) doesn't get rounded or clamped in the nHashFuncs calculation
102   * - nElements is even, so that nEntriesPerGeneration == nElements / 2
103   *
104   * Then we get a more accurate estimate for filter bytes:
105   *
106   *     3/(log(256)*log(2)) * log(1/fpRate) * nElements
107   */
108  class CRollingBloomFilter
109  {
110  public:
111      CRollingBloomFilter(unsigned int nElements, double nFPRate);
112  
113      void insert(std::span<const unsigned char> vKey);
114      bool contains(std::span<const unsigned char> vKey) const;
115  
116      void reset();
117  
118  private:
119      int nEntriesPerGeneration;
120      int nEntriesThisGeneration;
121      int nGeneration;
122      std::vector<uint64_t> data;
123      unsigned int nTweak;
124      int nHashFuncs;
125  };
126  
127  #endif // BITCOIN_COMMON_BLOOM_H