randomtrackingdict.py
1 """ 2 Track randomize ordered dict 3 """ 4 from threading import RLock 5 from time import time 6 7 try: 8 import helper_random 9 except ImportError: 10 import helper_random 11 12 13 class RandomTrackingDict(object): 14 """ 15 Dict with randomised order and tracking. 16 17 Keeps a track of how many items have been requested from the dict, 18 and timeouts. Resets after all objects have been retrieved and timed out. 19 The main purpose of this isn't as much putting related code together 20 as performance optimisation and anonymisation of downloading of objects 21 from other peers. If done using a standard dict or array, it takes 22 too much CPU (and looks convoluted). Randomisation helps with anonymity. 23 """ 24 # pylint: disable=too-many-instance-attributes 25 maxPending = 10 26 pendingTimeout = 60 27 28 def __init__(self): 29 self.dictionary = {} 30 self.indexDict = [] 31 self.len = 0 32 self.pendingLen = 0 33 self.lastPoll = 0 34 self.lastObject = 0 35 self.lock = RLock() 36 37 def __len__(self): 38 return self.len 39 40 def __contains__(self, key): 41 return key in self.dictionary 42 43 def __getitem__(self, key): 44 return self.dictionary[key][1] 45 46 def _swap(self, i1, i2): 47 with self.lock: 48 key1 = self.indexDict[i1] 49 key2 = self.indexDict[i2] 50 self.indexDict[i1] = key2 51 self.indexDict[i2] = key1 52 self.dictionary[key1][0] = i2 53 self.dictionary[key2][0] = i1 54 # for quick reassignment 55 return i2 56 57 def __setitem__(self, key, value): 58 with self.lock: 59 if key in self.dictionary: 60 self.dictionary[key][1] = value 61 else: 62 self.indexDict.append(key) 63 self.dictionary[key] = [self.len, value] 64 self._swap(self.len, self.len - self.pendingLen) 65 self.len += 1 66 67 def __delitem__(self, key): 68 if key not in self.dictionary: 69 raise KeyError 70 with self.lock: 71 index = self.dictionary[key][0] 72 # not pending 73 if index < self.len - self.pendingLen: 74 # left of pending part 75 index = self._swap(index, self.len - self.pendingLen - 1) 76 # pending 77 else: 78 self.pendingLen -= 1 79 # end 80 self._swap(index, self.len - 1) 81 # if the following del is batched, performance of this single 82 # operation can improve 4x, but it's already very fast so we'll 83 # ignore it for the time being 84 del self.indexDict[-1] 85 del self.dictionary[key] 86 self.len -= 1 87 88 def setMaxPending(self, maxPending): 89 """ 90 Sets maximum number of objects that can be retrieved from the class 91 simultaneously as long as there is no timeout 92 """ 93 self.maxPending = maxPending 94 95 def setPendingTimeout(self, pendingTimeout): 96 """Sets how long to wait for a timeout if max pending is reached 97 (or all objects have been retrieved)""" 98 self.pendingTimeout = pendingTimeout 99 100 def setLastObject(self): 101 """Update timestamp for tracking of received objects""" 102 self.lastObject = time() 103 104 def randomKeys(self, count=1): 105 """Retrieve count random keys from the dict 106 that haven't already been retrieved""" 107 if self.len == 0 or ( 108 (self.pendingLen >= self.maxPending or self.pendingLen == self.len) 109 and self.lastPoll + self.pendingTimeout > time()): 110 raise KeyError 111 112 # pylint: disable=redefined-outer-name 113 with self.lock: 114 # reset if we've requested all 115 # and if last object received too long time ago 116 if self.pendingLen == self.len and self.lastObject + \ 117 self.pendingTimeout < time(): 118 self.pendingLen = 0 119 self.setLastObject() 120 available = self.len - self.pendingLen 121 if count > available: 122 count = available 123 randomIndex = helper_random.randomsample( 124 list(range(self.len - self.pendingLen)), count) 125 retval = [self.indexDict[i] for i in randomIndex] 126 127 for i in sorted(randomIndex, reverse=True): 128 # swap with one below lowest pending 129 self._swap(i, self.len - self.pendingLen - 1) 130 self.pendingLen += 1 131 self.lastPoll = time() 132 return retval