bloom_test.cc
1 // Copyright (c) 2012 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 #include "leveldb/filter_policy.h" 6 7 #include "util/coding.h" 8 #include "util/logging.h" 9 #include "util/testharness.h" 10 #include "util/testutil.h" 11 12 namespace leveldb { 13 14 static const int kVerbose = 1; 15 16 static Slice Key(int i, char* buffer) { 17 EncodeFixed32(buffer, i); 18 return Slice(buffer, sizeof(uint32_t)); 19 } 20 21 class BloomTest { 22 public: 23 BloomTest() : policy_(NewBloomFilterPolicy(10)) {} 24 25 ~BloomTest() { delete policy_; } 26 27 void Reset() { 28 keys_.clear(); 29 filter_.clear(); 30 } 31 32 void Add(const Slice& s) { keys_.push_back(s.ToString()); } 33 34 void Build() { 35 std::vector<Slice> key_slices; 36 for (size_t i = 0; i < keys_.size(); i++) { 37 key_slices.push_back(Slice(keys_[i])); 38 } 39 filter_.clear(); 40 policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()), 41 &filter_); 42 keys_.clear(); 43 if (kVerbose >= 2) DumpFilter(); 44 } 45 46 size_t FilterSize() const { return filter_.size(); } 47 48 void DumpFilter() { 49 fprintf(stderr, "F("); 50 for (size_t i = 0; i + 1 < filter_.size(); i++) { 51 const unsigned int c = static_cast<unsigned int>(filter_[i]); 52 for (int j = 0; j < 8; j++) { 53 fprintf(stderr, "%c", (c & (1 << j)) ? '1' : '.'); 54 } 55 } 56 fprintf(stderr, ")\n"); 57 } 58 59 bool Matches(const Slice& s) { 60 if (!keys_.empty()) { 61 Build(); 62 } 63 return policy_->KeyMayMatch(s, filter_); 64 } 65 66 double FalsePositiveRate() { 67 char buffer[sizeof(int)]; 68 int result = 0; 69 for (int i = 0; i < 10000; i++) { 70 if (Matches(Key(i + 1000000000, buffer))) { 71 result++; 72 } 73 } 74 return result / 10000.0; 75 } 76 77 private: 78 const FilterPolicy* policy_; 79 std::string filter_; 80 std::vector<std::string> keys_; 81 }; 82 83 TEST(BloomTest, EmptyFilter) { 84 ASSERT_TRUE(!Matches("hello")); 85 ASSERT_TRUE(!Matches("world")); 86 } 87 88 TEST(BloomTest, Small) { 89 Add("hello"); 90 Add("world"); 91 ASSERT_TRUE(Matches("hello")); 92 ASSERT_TRUE(Matches("world")); 93 ASSERT_TRUE(!Matches("x")); 94 ASSERT_TRUE(!Matches("foo")); 95 } 96 97 static int NextLength(int length) { 98 if (length < 10) { 99 length += 1; 100 } else if (length < 100) { 101 length += 10; 102 } else if (length < 1000) { 103 length += 100; 104 } else { 105 length += 1000; 106 } 107 return length; 108 } 109 110 TEST(BloomTest, VaryingLengths) { 111 char buffer[sizeof(int)]; 112 113 // Count number of filters that significantly exceed the false positive rate 114 int mediocre_filters = 0; 115 int good_filters = 0; 116 117 for (int length = 1; length <= 10000; length = NextLength(length)) { 118 Reset(); 119 for (int i = 0; i < length; i++) { 120 Add(Key(i, buffer)); 121 } 122 Build(); 123 124 ASSERT_LE(FilterSize(), static_cast<size_t>((length * 10 / 8) + 40)) 125 << length; 126 127 // All added keys must match 128 for (int i = 0; i < length; i++) { 129 ASSERT_TRUE(Matches(Key(i, buffer))) 130 << "Length " << length << "; key " << i; 131 } 132 133 // Check false positive rate 134 double rate = FalsePositiveRate(); 135 if (kVerbose >= 1) { 136 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", 137 rate * 100.0, length, static_cast<int>(FilterSize())); 138 } 139 ASSERT_LE(rate, 0.02); // Must not be over 2% 140 if (rate > 0.0125) 141 mediocre_filters++; // Allowed, but not too often 142 else 143 good_filters++; 144 } 145 if (kVerbose >= 1) { 146 fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters, 147 mediocre_filters); 148 } 149 ASSERT_LE(mediocre_filters, good_filters / 5); 150 } 151 152 // Different bits-per-byte 153 154 } // namespace leveldb 155 156 int main(int argc, char** argv) { return leveldb::test::RunAllTests(); }