/ src / leveldb / util / bloom_test.cc
bloom_test.cc
  1  // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  2  // Use of this source code is governed by a BSD-style license that can be
  3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4  
  5  #include "leveldb/filter_policy.h"
  6  
  7  #include "util/coding.h"
  8  #include "util/logging.h"
  9  #include "util/testharness.h"
 10  #include "util/testutil.h"
 11  
 12  namespace leveldb {
 13  
 14  static const int kVerbose = 1;
 15  
 16  static Slice Key(int i, char* buffer) {
 17    EncodeFixed32(buffer, i);
 18    return Slice(buffer, sizeof(uint32_t));
 19  }
 20  
 21  class BloomTest {
 22   public:
 23    BloomTest() : policy_(NewBloomFilterPolicy(10)) {}
 24  
 25    ~BloomTest() { delete policy_; }
 26  
 27    void Reset() {
 28      keys_.clear();
 29      filter_.clear();
 30    }
 31  
 32    void Add(const Slice& s) { keys_.push_back(s.ToString()); }
 33  
 34    void Build() {
 35      std::vector<Slice> key_slices;
 36      for (size_t i = 0; i < keys_.size(); i++) {
 37        key_slices.push_back(Slice(keys_[i]));
 38      }
 39      filter_.clear();
 40      policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
 41                            &filter_);
 42      keys_.clear();
 43      if (kVerbose >= 2) DumpFilter();
 44    }
 45  
 46    size_t FilterSize() const { return filter_.size(); }
 47  
 48    void DumpFilter() {
 49      fprintf(stderr, "F(");
 50      for (size_t i = 0; i + 1 < filter_.size(); i++) {
 51        const unsigned int c = static_cast<unsigned int>(filter_[i]);
 52        for (int j = 0; j < 8; j++) {
 53          fprintf(stderr, "%c", (c & (1 << j)) ? '1' : '.');
 54        }
 55      }
 56      fprintf(stderr, ")\n");
 57    }
 58  
 59    bool Matches(const Slice& s) {
 60      if (!keys_.empty()) {
 61        Build();
 62      }
 63      return policy_->KeyMayMatch(s, filter_);
 64    }
 65  
 66    double FalsePositiveRate() {
 67      char buffer[sizeof(int)];
 68      int result = 0;
 69      for (int i = 0; i < 10000; i++) {
 70        if (Matches(Key(i + 1000000000, buffer))) {
 71          result++;
 72        }
 73      }
 74      return result / 10000.0;
 75    }
 76  
 77   private:
 78    const FilterPolicy* policy_;
 79    std::string filter_;
 80    std::vector<std::string> keys_;
 81  };
 82  
 83  TEST(BloomTest, EmptyFilter) {
 84    ASSERT_TRUE(!Matches("hello"));
 85    ASSERT_TRUE(!Matches("world"));
 86  }
 87  
 88  TEST(BloomTest, Small) {
 89    Add("hello");
 90    Add("world");
 91    ASSERT_TRUE(Matches("hello"));
 92    ASSERT_TRUE(Matches("world"));
 93    ASSERT_TRUE(!Matches("x"));
 94    ASSERT_TRUE(!Matches("foo"));
 95  }
 96  
 97  static int NextLength(int length) {
 98    if (length < 10) {
 99      length += 1;
100    } else if (length < 100) {
101      length += 10;
102    } else if (length < 1000) {
103      length += 100;
104    } else {
105      length += 1000;
106    }
107    return length;
108  }
109  
110  TEST(BloomTest, VaryingLengths) {
111    char buffer[sizeof(int)];
112  
113    // Count number of filters that significantly exceed the false positive rate
114    int mediocre_filters = 0;
115    int good_filters = 0;
116  
117    for (int length = 1; length <= 10000; length = NextLength(length)) {
118      Reset();
119      for (int i = 0; i < length; i++) {
120        Add(Key(i, buffer));
121      }
122      Build();
123  
124      ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40))
125          << length;
126  
127      // All added keys must match
128      for (int i = 0; i < length; i++) {
129        ASSERT_TRUE(Matches(Key(i, buffer)))
130            << "Length " << length << "; key " << i;
131      }
132  
133      // Check false positive rate
134      double rate = FalsePositiveRate();
135      if (kVerbose >= 1) {
136        fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
137                rate * 100.0, length, static_cast<int>(FilterSize()));
138      }
139      ASSERT_LE(rate, 0.02);  // Must not be over 2%
140      if (rate > 0.0125)
141        mediocre_filters++;  // Allowed, but not too often
142      else
143        good_filters++;
144    }
145    if (kVerbose >= 1) {
146      fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
147              mediocre_filters);
148    }
149    ASSERT_LE(mediocre_filters, good_filters / 5);
150  }
151  
152  // Different bits-per-byte
153  
154  }  // namespace leveldb
155  
156  int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }