bloom.cpp
1 // Copyright (c) 2012-present The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <common/bloom.h> 6 7 #include <hash.h> 8 #include <primitives/transaction.h> 9 #include <random.h> 10 #include <script/script.h> 11 #include <script/solver.h> 12 #include <span.h> 13 #include <streams.h> 14 #include <util/fastrange.h> 15 #include <util/overflow.h> 16 17 #include <algorithm> 18 #include <cmath> 19 #include <cstdlib> 20 #include <limits> 21 #include <vector> 22 23 static constexpr double LN2SQUARED = 0.4804530139182014246671025263266649717305529515945455; 24 static constexpr double LN2 = 0.6931471805599453094172321214581765680755001343602552; 25 26 CBloomFilter::CBloomFilter(const unsigned int nElements, const double nFPRate, const unsigned int nTweakIn, unsigned char nFlagsIn) : 27 /** 28 * The ideal size for a bloom filter with a given number of elements and false positive rate is: 29 * - nElements * log(fp rate) / ln(2)^2 30 * We ignore filter parameters which will create a bloom filter larger than the protocol limits 31 */ 32 vData(std::min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM_FILTER_SIZE * 8) / 8), 33 /** 34 * The ideal number of hash functions is filter size * ln(2) / number of elements 35 * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits 36 * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas 37 */ 38 nHashFuncs(std::min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)), 39 nTweak(nTweakIn), 40 nFlags(nFlagsIn) 41 { 42 } 43 44 inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, std::span<const unsigned char> vDataToHash) const 45 { 46 // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values. 47 return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8); 48 } 49 50 void CBloomFilter::insert(std::span<const unsigned char> vKey) 51 { 52 if (vData.empty()) // Avoid divide-by-zero (CVE-2013-5700) 53 return; 54 for (unsigned int i = 0; i < nHashFuncs; i++) 55 { 56 unsigned int nIndex = Hash(i, vKey); 57 // Sets bit nIndex of vData 58 vData[nIndex >> 3] |= (1 << (7 & nIndex)); 59 } 60 } 61 62 void CBloomFilter::insert(const COutPoint& outpoint) 63 { 64 DataStream stream{}; 65 stream << outpoint; 66 insert(MakeUCharSpan(stream)); 67 } 68 69 bool CBloomFilter::contains(std::span<const unsigned char> vKey) const 70 { 71 if (vData.empty()) // Avoid divide-by-zero (CVE-2013-5700) 72 return true; 73 for (unsigned int i = 0; i < nHashFuncs; i++) 74 { 75 unsigned int nIndex = Hash(i, vKey); 76 // Checks bit nIndex of vData 77 if (!(vData[nIndex >> 3] & (1 << (7 & nIndex)))) 78 return false; 79 } 80 return true; 81 } 82 83 bool CBloomFilter::contains(const COutPoint& outpoint) const 84 { 85 DataStream stream{}; 86 stream << outpoint; 87 return contains(MakeUCharSpan(stream)); 88 } 89 90 bool CBloomFilter::IsWithinSizeConstraints() const 91 { 92 return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS; 93 } 94 95 bool CBloomFilter::IsRelevantAndUpdate(const CTransaction& tx) 96 { 97 bool fFound = false; 98 // Match if the filter contains the hash of tx 99 // for finding tx when they appear in a block 100 if (vData.empty()) // zero-size = "match-all" filter 101 return true; 102 const Txid& hash = tx.GetHash(); 103 if (contains(hash.ToUint256())) 104 fFound = true; 105 106 for (unsigned int i = 0; i < tx.vout.size(); i++) 107 { 108 const CTxOut& txout = tx.vout[i]; 109 // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx 110 // If this matches, also add the specific output that was matched. 111 // This means clients don't have to update the filter themselves when a new relevant tx 112 // is discovered in order to find spending transactions, which avoids round-tripping and race conditions. 113 CScript::const_iterator pc = txout.scriptPubKey.begin(); 114 std::vector<unsigned char> data; 115 while (pc < txout.scriptPubKey.end()) 116 { 117 opcodetype opcode; 118 if (!txout.scriptPubKey.GetOp(pc, opcode, data)) 119 break; 120 if (data.size() != 0 && contains(data)) 121 { 122 fFound = true; 123 if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_ALL) 124 insert(COutPoint(hash, i)); 125 else if ((nFlags & BLOOM_UPDATE_MASK) == BLOOM_UPDATE_P2PUBKEY_ONLY) 126 { 127 std::vector<std::vector<unsigned char> > vSolutions; 128 TxoutType type = Solver(txout.scriptPubKey, vSolutions); 129 if (type == TxoutType::PUBKEY || type == TxoutType::MULTISIG) { 130 insert(COutPoint(hash, i)); 131 } 132 } 133 break; 134 } 135 } 136 } 137 138 if (fFound) 139 return true; 140 141 for (const CTxIn& txin : tx.vin) 142 { 143 // Match if the filter contains an outpoint tx spends 144 if (contains(txin.prevout)) 145 return true; 146 147 // Match if the filter contains any arbitrary script data element in any scriptSig in tx 148 CScript::const_iterator pc = txin.scriptSig.begin(); 149 std::vector<unsigned char> data; 150 while (pc < txin.scriptSig.end()) 151 { 152 opcodetype opcode; 153 if (!txin.scriptSig.GetOp(pc, opcode, data)) 154 break; 155 if (data.size() != 0 && contains(data)) 156 return true; 157 } 158 } 159 160 return false; 161 } 162 163 CRollingBloomFilter::CRollingBloomFilter(const unsigned int nElements, const double fpRate) 164 { 165 double logFpRate = log(fpRate); 166 /* The optimal number of hash functions is log(fpRate) / log(0.5), but 167 * restrict it to the range 1-50. */ 168 nHashFuncs = std::max(1, std::min((int)round(logFpRate / log(0.5)), 50)); 169 /* In this rolling bloom filter, we'll store between 2 and 3 generations of nElements / 2 entries. */ 170 nEntriesPerGeneration = CeilDiv(nElements, 2u); 171 uint32_t nMaxElements = nEntriesPerGeneration * 3; 172 /* The maximum fpRate = pow(1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits), nHashFuncs) 173 * => pow(fpRate, 1.0 / nHashFuncs) = 1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits) 174 * => 1.0 - pow(fpRate, 1.0 / nHashFuncs) = exp(-nHashFuncs * nMaxElements / nFilterBits) 175 * => log(1.0 - pow(fpRate, 1.0 / nHashFuncs)) = -nHashFuncs * nMaxElements / nFilterBits 176 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - pow(fpRate, 1.0 / nHashFuncs)) 177 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs)) 178 */ 179 uint32_t nFilterBits = (uint32_t)ceil(-1.0 * nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs))); 180 data.clear(); 181 /* For each data element we need to store 2 bits. If both bits are 0, the 182 * bit is treated as unset. If the bits are (01), (10), or (11), the bit is 183 * treated as set in generation 1, 2, or 3 respectively. 184 * These bits are stored in separate integers: position P corresponds to bit 185 * (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */ 186 data.resize(CeilDiv(nFilterBits, 64u) << 1); 187 reset(); 188 } 189 190 /* Similar to CBloomFilter::Hash */ 191 static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak, std::span<const unsigned char> vDataToHash) 192 { 193 return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash); 194 } 195 196 void CRollingBloomFilter::insert(std::span<const unsigned char> vKey) 197 { 198 if (nEntriesThisGeneration == nEntriesPerGeneration) { 199 nEntriesThisGeneration = 0; 200 nGeneration++; 201 if (nGeneration == 4) { 202 nGeneration = 1; 203 } 204 uint64_t nGenerationMask1 = 0 - (uint64_t)(nGeneration & 1); 205 uint64_t nGenerationMask2 = 0 - (uint64_t)(nGeneration >> 1); 206 /* Wipe old entries that used this generation number. */ 207 for (uint32_t p = 0; p < data.size(); p += 2) { 208 uint64_t p1 = data[p], p2 = data[p + 1]; 209 uint64_t mask = (p1 ^ nGenerationMask1) | (p2 ^ nGenerationMask2); 210 data[p] = p1 & mask; 211 data[p + 1] = p2 & mask; 212 } 213 } 214 nEntriesThisGeneration++; 215 216 for (int n = 0; n < nHashFuncs; n++) { 217 uint32_t h = RollingBloomHash(n, nTweak, vKey); 218 int bit = h & 0x3F; 219 /* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */ 220 uint32_t pos = FastRange32(h, data.size()); 221 /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */ 222 data[pos & ~1U] = (data[pos & ~1U] & ~(uint64_t{1} << bit)) | (uint64_t(nGeneration & 1)) << bit; 223 data[pos | 1] = (data[pos | 1] & ~(uint64_t{1} << bit)) | (uint64_t(nGeneration >> 1)) << bit; 224 } 225 } 226 227 bool CRollingBloomFilter::contains(std::span<const unsigned char> vKey) const 228 { 229 for (int n = 0; n < nHashFuncs; n++) { 230 uint32_t h = RollingBloomHash(n, nTweak, vKey); 231 int bit = h & 0x3F; 232 uint32_t pos = FastRange32(h, data.size()); 233 /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */ 234 if (!(((data[pos & ~1U] | data[pos | 1]) >> bit) & 1)) { 235 return false; 236 } 237 } 238 return true; 239 } 240 241 void CRollingBloomFilter::reset() 242 { 243 nTweak = FastRandomContext().rand<unsigned int>(); 244 nEntriesThisGeneration = 0; 245 nGeneration = 1; 246 std::fill(data.begin(), data.end(), 0); 247 }