/ src / leveldb / util / bloom.cc
bloom.cc
 1  // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
 2  // Use of this source code is governed by a BSD-style license that can be
 3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
 4  
 5  #include "leveldb/filter_policy.h"
 6  
 7  #include "leveldb/slice.h"
 8  #include "util/hash.h"
 9  
10  namespace leveldb {
11  
12  namespace {
13  static uint32_t BloomHash(const Slice& key) {
14    return Hash(key.data(), key.size(), 0xbc9f1d34);
15  }
16  
17  class BloomFilterPolicy : public FilterPolicy {
18   public:
19    explicit BloomFilterPolicy(int bits_per_key) : bits_per_key_(bits_per_key) {
20      // We intentionally round down to reduce probing cost a little bit
21      k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
22      if (k_ < 1) k_ = 1;
23      if (k_ > 30) k_ = 30;
24    }
25  
26    const char* Name() const override { return "leveldb.BuiltinBloomFilter2"; }
27  
28    void CreateFilter(const Slice* keys, int n, std::string* dst) const override {
29      // Compute bloom filter size (in both bits and bytes)
30      size_t bits = n * bits_per_key_;
31  
32      // For small n, we can see a very high false positive rate.  Fix it
33      // by enforcing a minimum bloom filter length.
34      if (bits < 64) bits = 64;
35  
36      size_t bytes = (bits + 7) / 8;
37      bits = bytes * 8;
38  
39      const size_t init_size = dst->size();
40      dst->resize(init_size + bytes, 0);
41      dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
42      char* array = &(*dst)[init_size];
43      for (int i = 0; i < n; i++) {
44        // Use double-hashing to generate a sequence of hash values.
45        // See analysis in [Kirsch,Mitzenmacher 2006].
46        uint32_t h = BloomHash(keys[i]);
47        const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
48        for (size_t j = 0; j < k_; j++) {
49          const uint32_t bitpos = h % bits;
50          array[bitpos / 8] |= (1 << (bitpos % 8));
51          h += delta;
52        }
53      }
54    }
55  
56    bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override {
57      const size_t len = bloom_filter.size();
58      if (len < 2) return false;
59  
60      const char* array = bloom_filter.data();
61      const size_t bits = (len - 1) * 8;
62  
63      // Use the encoded k so that we can read filters generated by
64      // bloom filters created using different parameters.
65      const size_t k = array[len - 1];
66      if (k > 30) {
67        // Reserved for potentially new encodings for short bloom filters.
68        // Consider it a match.
69        return true;
70      }
71  
72      uint32_t h = BloomHash(key);
73      const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
74      for (size_t j = 0; j < k; j++) {
75        const uint32_t bitpos = h % bits;
76        if ((array[bitpos / 8] & (1 << (bitpos % 8))) == 0) return false;
77        h += delta;
78      }
79      return true;
80    }
81  
82   private:
83    size_t bits_per_key_;
84    size_t k_;
85  };
86  }  // namespace
87  
88  const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
89    return new BloomFilterPolicy(bits_per_key);
90  }
91  
92  }  // namespace leveldb