cuckoocache_tests.cpp
1 // Copyright (c) 2012-2021 The Bitcoin Core developers 2 // Distributed under the MIT software license, see the accompanying 3 // file COPYING or http://www.opensource.org/licenses/mit-license.php. 4 5 #include <cuckoocache.h> 6 #include <random.h> 7 #include <script/sigcache.h> 8 #include <test/util/random.h> 9 #include <test/util/setup_common.h> 10 11 #include <boost/test/unit_test.hpp> 12 13 #include <deque> 14 #include <mutex> 15 #include <shared_mutex> 16 #include <thread> 17 #include <vector> 18 19 /** Test Suite for CuckooCache 20 * 21 * 1. All tests should have a deterministic result (using insecure rand 22 * with deterministic seeds) 23 * 2. Some test methods are templated to allow for easier testing 24 * against new versions / comparing 25 * 3. Results should be treated as a regression test, i.e., did the behavior 26 * change significantly from what was expected. This can be OK, depending on 27 * the nature of the change, but requires updating the tests to reflect the new 28 * expected behavior. For example improving the hit rate may cause some tests 29 * using BOOST_CHECK_CLOSE to fail. 30 * 31 */ 32 BOOST_AUTO_TEST_SUITE(cuckoocache_tests); 33 34 /* Test that no values not inserted into the cache are read out of it. 35 * 36 * There are no repeats in the first 200000 insecure_GetRandHash calls 37 */ 38 BOOST_AUTO_TEST_CASE(test_cuckoocache_no_fakes) 39 { 40 SeedInsecureRand(SeedRand::ZEROS); 41 CuckooCache::cache<uint256, SignatureCacheHasher> cc{}; 42 size_t megabytes = 4; 43 cc.setup_bytes(megabytes << 20); 44 for (int x = 0; x < 100000; ++x) { 45 cc.insert(InsecureRand256()); 46 } 47 for (int x = 0; x < 100000; ++x) { 48 BOOST_CHECK(!cc.contains(InsecureRand256(), false)); 49 } 50 }; 51 52 /** This helper returns the hit rate when megabytes*load worth of entries are 53 * inserted into a megabytes sized cache 54 */ 55 template <typename Cache> 56 static double test_cache(size_t megabytes, double load) 57 { 58 SeedInsecureRand(SeedRand::ZEROS); 59 std::vector<uint256> hashes; 60 Cache set{}; 61 size_t bytes = megabytes * (1 << 20); 62 set.setup_bytes(bytes); 63 uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256))); 64 hashes.resize(n_insert); 65 for (uint32_t i = 0; i < n_insert; ++i) { 66 uint32_t* ptr = (uint32_t*)hashes[i].begin(); 67 for (uint8_t j = 0; j < 8; ++j) 68 *(ptr++) = InsecureRand32(); 69 } 70 /** We make a copy of the hashes because future optimizations of the 71 * cuckoocache may overwrite the inserted element, so the test is 72 * "future proofed". 73 */ 74 std::vector<uint256> hashes_insert_copy = hashes; 75 /** Do the insert */ 76 for (const uint256& h : hashes_insert_copy) 77 set.insert(h); 78 /** Count the hits */ 79 uint32_t count = 0; 80 for (const uint256& h : hashes) 81 count += set.contains(h, false); 82 double hit_rate = ((double)count) / ((double)n_insert); 83 return hit_rate; 84 } 85 86 /** The normalized hit rate for a given load. 87 * 88 * The semantics are a little confusing, so please see the below 89 * explanation. 90 * 91 * Examples: 92 * 93 * 1. at load 0.5, we expect a perfect hit rate, so we multiply by 94 * 1.0 95 * 2. at load 2.0, we expect to see half the entries, so a perfect hit rate 96 * would be 0.5. Therefore, if we see a hit rate of 0.4, 0.4*2.0 = 0.8 is the 97 * normalized hit rate. 98 * 99 * This is basically the right semantics, but has a bit of a glitch depending on 100 * how you measure around load 1.0 as after load 1.0 your normalized hit rate 101 * becomes effectively perfect, ignoring freshness. 102 */ 103 static double normalize_hit_rate(double hits, double load) 104 { 105 return hits * std::max(load, 1.0); 106 } 107 108 /** Check the hit rate on loads ranging from 0.1 to 1.6 */ 109 BOOST_AUTO_TEST_CASE(cuckoocache_hit_rate_ok) 110 { 111 /** Arbitrarily selected Hit Rate threshold that happens to work for this test 112 * as a lower bound on performance. 113 */ 114 double HitRateThresh = 0.98; 115 size_t megabytes = 4; 116 for (double load = 0.1; load < 2; load *= 2) { 117 double hits = test_cache<CuckooCache::cache<uint256, SignatureCacheHasher>>(megabytes, load); 118 BOOST_CHECK(normalize_hit_rate(hits, load) > HitRateThresh); 119 } 120 } 121 122 123 /** This helper checks that erased elements are preferentially inserted onto and 124 * that the hit rate of "fresher" keys is reasonable*/ 125 template <typename Cache> 126 static void test_cache_erase(size_t megabytes) 127 { 128 double load = 1; 129 SeedInsecureRand(SeedRand::ZEROS); 130 std::vector<uint256> hashes; 131 Cache set{}; 132 size_t bytes = megabytes * (1 << 20); 133 set.setup_bytes(bytes); 134 uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256))); 135 hashes.resize(n_insert); 136 for (uint32_t i = 0; i < n_insert; ++i) { 137 uint32_t* ptr = (uint32_t*)hashes[i].begin(); 138 for (uint8_t j = 0; j < 8; ++j) 139 *(ptr++) = InsecureRand32(); 140 } 141 /** We make a copy of the hashes because future optimizations of the 142 * cuckoocache may overwrite the inserted element, so the test is 143 * "future proofed". 144 */ 145 std::vector<uint256> hashes_insert_copy = hashes; 146 147 /** Insert the first half */ 148 for (uint32_t i = 0; i < (n_insert / 2); ++i) 149 set.insert(hashes_insert_copy[i]); 150 /** Erase the first quarter */ 151 for (uint32_t i = 0; i < (n_insert / 4); ++i) 152 BOOST_CHECK(set.contains(hashes[i], true)); 153 /** Insert the second half */ 154 for (uint32_t i = (n_insert / 2); i < n_insert; ++i) 155 set.insert(hashes_insert_copy[i]); 156 157 /** elements that we marked as erased but are still there */ 158 size_t count_erased_but_contained = 0; 159 /** elements that we did not erase but are older */ 160 size_t count_stale = 0; 161 /** elements that were most recently inserted */ 162 size_t count_fresh = 0; 163 164 for (uint32_t i = 0; i < (n_insert / 4); ++i) 165 count_erased_but_contained += set.contains(hashes[i], false); 166 for (uint32_t i = (n_insert / 4); i < (n_insert / 2); ++i) 167 count_stale += set.contains(hashes[i], false); 168 for (uint32_t i = (n_insert / 2); i < n_insert; ++i) 169 count_fresh += set.contains(hashes[i], false); 170 171 double hit_rate_erased_but_contained = double(count_erased_but_contained) / (double(n_insert) / 4.0); 172 double hit_rate_stale = double(count_stale) / (double(n_insert) / 4.0); 173 double hit_rate_fresh = double(count_fresh) / (double(n_insert) / 2.0); 174 175 // Check that our hit_rate_fresh is perfect 176 BOOST_CHECK_EQUAL(hit_rate_fresh, 1.0); 177 // Check that we have a more than 2x better hit rate on stale elements than 178 // erased elements. 179 BOOST_CHECK(hit_rate_stale > 2 * hit_rate_erased_but_contained); 180 } 181 182 BOOST_AUTO_TEST_CASE(cuckoocache_erase_ok) 183 { 184 size_t megabytes = 4; 185 test_cache_erase<CuckooCache::cache<uint256, SignatureCacheHasher>>(megabytes); 186 } 187 188 template <typename Cache> 189 static void test_cache_erase_parallel(size_t megabytes) 190 { 191 double load = 1; 192 SeedInsecureRand(SeedRand::ZEROS); 193 std::vector<uint256> hashes; 194 Cache set{}; 195 size_t bytes = megabytes * (1 << 20); 196 set.setup_bytes(bytes); 197 uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256))); 198 hashes.resize(n_insert); 199 for (uint32_t i = 0; i < n_insert; ++i) { 200 uint32_t* ptr = (uint32_t*)hashes[i].begin(); 201 for (uint8_t j = 0; j < 8; ++j) 202 *(ptr++) = InsecureRand32(); 203 } 204 /** We make a copy of the hashes because future optimizations of the 205 * cuckoocache may overwrite the inserted element, so the test is 206 * "future proofed". 207 */ 208 std::vector<uint256> hashes_insert_copy = hashes; 209 std::shared_mutex mtx; 210 211 { 212 /** Grab lock to make sure we release inserts */ 213 std::unique_lock<std::shared_mutex> l(mtx); 214 /** Insert the first half */ 215 for (uint32_t i = 0; i < (n_insert / 2); ++i) 216 set.insert(hashes_insert_copy[i]); 217 } 218 219 /** Spin up 3 threads to run contains with erase. 220 */ 221 std::vector<std::thread> threads; 222 /** Erase the first quarter */ 223 for (uint32_t x = 0; x < 3; ++x) 224 /** Each thread is emplaced with x copy-by-value 225 */ 226 threads.emplace_back([&, x] { 227 std::shared_lock<std::shared_mutex> l(mtx); 228 size_t ntodo = (n_insert/4)/3; 229 size_t start = ntodo*x; 230 size_t end = ntodo*(x+1); 231 for (uint32_t i = start; i < end; ++i) { 232 bool contains = set.contains(hashes[i], true); 233 assert(contains); 234 } 235 }); 236 237 /** Wait for all threads to finish 238 */ 239 for (std::thread& t : threads) 240 t.join(); 241 /** Grab lock to make sure we observe erases */ 242 std::unique_lock<std::shared_mutex> l(mtx); 243 /** Insert the second half */ 244 for (uint32_t i = (n_insert / 2); i < n_insert; ++i) 245 set.insert(hashes_insert_copy[i]); 246 247 /** elements that we marked erased but that are still there */ 248 size_t count_erased_but_contained = 0; 249 /** elements that we did not erase but are older */ 250 size_t count_stale = 0; 251 /** elements that were most recently inserted */ 252 size_t count_fresh = 0; 253 254 for (uint32_t i = 0; i < (n_insert / 4); ++i) 255 count_erased_but_contained += set.contains(hashes[i], false); 256 for (uint32_t i = (n_insert / 4); i < (n_insert / 2); ++i) 257 count_stale += set.contains(hashes[i], false); 258 for (uint32_t i = (n_insert / 2); i < n_insert; ++i) 259 count_fresh += set.contains(hashes[i], false); 260 261 double hit_rate_erased_but_contained = double(count_erased_but_contained) / (double(n_insert) / 4.0); 262 double hit_rate_stale = double(count_stale) / (double(n_insert) / 4.0); 263 double hit_rate_fresh = double(count_fresh) / (double(n_insert) / 2.0); 264 265 // Check that our hit_rate_fresh is perfect 266 BOOST_CHECK_EQUAL(hit_rate_fresh, 1.0); 267 // Check that we have a more than 2x better hit rate on stale elements than 268 // erased elements. 269 BOOST_CHECK(hit_rate_stale > 2 * hit_rate_erased_but_contained); 270 } 271 BOOST_AUTO_TEST_CASE(cuckoocache_erase_parallel_ok) 272 { 273 size_t megabytes = 4; 274 test_cache_erase_parallel<CuckooCache::cache<uint256, SignatureCacheHasher>>(megabytes); 275 } 276 277 278 template <typename Cache> 279 static void test_cache_generations() 280 { 281 // This test checks that for a simulation of network activity, the fresh hit 282 // rate is never below 99%, and the number of times that it is worse than 283 // 99.9% are less than 1% of the time. 284 double min_hit_rate = 0.99; 285 double tight_hit_rate = 0.999; 286 double max_rate_less_than_tight_hit_rate = 0.01; 287 // A cache that meets this specification is therefore shown to have a hit 288 // rate of at least tight_hit_rate * (1 - max_rate_less_than_tight_hit_rate) + 289 // min_hit_rate*max_rate_less_than_tight_hit_rate = 0.999*99%+0.99*1% == 99.89% 290 // hit rate with low variance. 291 292 // We use deterministic values, but this test has also passed on many 293 // iterations with non-deterministic values, so it isn't "overfit" to the 294 // specific entropy in FastRandomContext(true) and implementation of the 295 // cache. 296 SeedInsecureRand(SeedRand::ZEROS); 297 298 // block_activity models a chunk of network activity. n_insert elements are 299 // added to the cache. The first and last n/4 are stored for removal later 300 // and the middle n/2 are not stored. This models a network which uses half 301 // the signatures of recently (since the last block) added transactions 302 // immediately and never uses the other half. 303 struct block_activity { 304 std::vector<uint256> reads; 305 block_activity(uint32_t n_insert, Cache& c) : reads() 306 { 307 std::vector<uint256> inserts; 308 inserts.resize(n_insert); 309 reads.reserve(n_insert / 2); 310 for (uint32_t i = 0; i < n_insert; ++i) { 311 uint32_t* ptr = (uint32_t*)inserts[i].begin(); 312 for (uint8_t j = 0; j < 8; ++j) 313 *(ptr++) = InsecureRand32(); 314 } 315 for (uint32_t i = 0; i < n_insert / 4; ++i) 316 reads.push_back(inserts[i]); 317 for (uint32_t i = n_insert - (n_insert / 4); i < n_insert; ++i) 318 reads.push_back(inserts[i]); 319 for (const auto& h : inserts) 320 c.insert(h); 321 } 322 }; 323 324 const uint32_t BLOCK_SIZE = 1000; 325 // We expect window size 60 to perform reasonably given that each epoch 326 // stores 45% of the cache size (~472k). 327 const uint32_t WINDOW_SIZE = 60; 328 const uint32_t POP_AMOUNT = (BLOCK_SIZE / WINDOW_SIZE) / 2; 329 const double load = 10; 330 const size_t megabytes = 4; 331 const size_t bytes = megabytes * (1 << 20); 332 const uint32_t n_insert = static_cast<uint32_t>(load * (bytes / sizeof(uint256))); 333 334 std::vector<block_activity> hashes; 335 Cache set{}; 336 set.setup_bytes(bytes); 337 hashes.reserve(n_insert / BLOCK_SIZE); 338 std::deque<block_activity> last_few; 339 uint32_t out_of_tight_tolerance = 0; 340 uint32_t total = n_insert / BLOCK_SIZE; 341 // we use the deque last_few to model a sliding window of blocks. at each 342 // step, each of the last WINDOW_SIZE block_activities checks the cache for 343 // POP_AMOUNT of the hashes that they inserted, and marks these erased. 344 for (uint32_t i = 0; i < total; ++i) { 345 if (last_few.size() == WINDOW_SIZE) 346 last_few.pop_front(); 347 last_few.emplace_back(BLOCK_SIZE, set); 348 uint32_t count = 0; 349 for (auto& act : last_few) 350 for (uint32_t k = 0; k < POP_AMOUNT; ++k) { 351 count += set.contains(act.reads.back(), true); 352 act.reads.pop_back(); 353 } 354 // We use last_few.size() rather than WINDOW_SIZE for the correct 355 // behavior on the first WINDOW_SIZE iterations where the deque is not 356 // full yet. 357 double hit = (double(count)) / (last_few.size() * POP_AMOUNT); 358 // Loose Check that hit rate is above min_hit_rate 359 BOOST_CHECK(hit > min_hit_rate); 360 // Tighter check, count number of times we are less than tight_hit_rate 361 // (and implicitly, greater than min_hit_rate) 362 out_of_tight_tolerance += hit < tight_hit_rate; 363 } 364 // Check that being out of tolerance happens less than 365 // max_rate_less_than_tight_hit_rate of the time 366 BOOST_CHECK(double(out_of_tight_tolerance) / double(total) < max_rate_less_than_tight_hit_rate); 367 } 368 BOOST_AUTO_TEST_CASE(cuckoocache_generations) 369 { 370 test_cache_generations<CuckooCache::cache<uint256, SignatureCacheHasher>>(); 371 } 372 373 BOOST_AUTO_TEST_SUITE_END();