bloomfiltertest.py
1 """ 2 dev/bloomfiltertest.py 3 ====================== 4 5 """ 6 7 import sqlite3 8 from os import getenv, path 9 from time import time 10 11 from pybloom import BloomFilter as BloomFilter1 # pylint: disable=import-error 12 from pybloomfilter import BloomFilter as BloomFilter2 # pylint: disable=import-error 13 14 # Ubuntu: apt-get install python-pybloomfiltermmap 15 16 conn = sqlite3.connect(path.join(getenv("HOME"), '.config/PyBitmessage/messages.dat')) 17 18 conn.text_factory = str 19 cur = conn.cursor() 20 rawlen = 0 21 itemcount = 0 22 23 cur.execute('''SELECT COUNT(hash) FROM inventory''') 24 for row in cur.fetchall(): 25 itemcount = row[0] 26 27 filtersize = 1000 * (int(itemcount / 1000) + 1) 28 errorrate = 1.0 / 1000.0 29 30 bf1 = BloomFilter1(capacity=filtersize, error_rate=errorrate) 31 bf2 = BloomFilter2(capacity=filtersize, error_rate=errorrate) 32 33 item = '''SELECT hash FROM inventory''' 34 cur.execute(item, '') 35 bf1time = 0 36 bf2time = 0 37 for row in cur.fetchall(): 38 rawlen += len(row[0]) 39 try: 40 times = [time()] 41 bf1.add(row[0]) 42 times.append(time()) 43 bf2.add(row[0]) 44 times.append(time()) 45 bf1time += times[1] - times[0] 46 bf2time += times[2] - times[1] 47 except IndexError: 48 pass 49 50 # f = open("/home/shurdeek/tmp/bloom.dat", "wb") 51 # sb1.tofile(f) 52 # f.close() 53 54 55 print(("Item count: %i" % (itemcount))) 56 print(("Raw length: %i" % (rawlen))) 57 print(("Bloom filter 1 length: %i, reduction to: %.2f%%" % 58 (bf1.bitarray.buffer_info()[1], 59 100.0 * bf1.bitarray.buffer_info()[1] / rawlen))) 60 print(("Bloom filter 1 capacity: %i and error rate: %.3f%%" % (bf1.capacity, 100.0 * bf1.error_rate))) 61 print(("Bloom filter 1 took %.2fs" % (bf1time))) 62 print(("Bloom filter 2 length: %i, reduction to: %.3f%%" % 63 (bf2.num_bits / 8, 64 100.0 * bf2.num_bits / 8 / rawlen))) 65 print(("Bloom filter 2 capacity: %i and error rate: %.3f%%" % (bf2.capacity, 100.0 * bf2.error_rate))) 66 print(("Bloom filter 2 took %.2fs" % (bf2time)))