From 0c4b00a8af3ed9aa552d70151822e3b3e4b2cba3 Mon Sep 17 00:00:00 2001 From: BreakWa11 Date: Sat, 19 Dec 2015 12:22:23 +0800 Subject: [PATCH] fix LRUCache --- shadowsocks/lru_cache.py | 51 ++++----- shadowsocks/ordereddict.py | 214 +++++++++++++++++++++++++++++++++++++ 2 files changed, 240 insertions(+), 25 deletions(-) create mode 100644 shadowsocks/ordereddict.py diff --git a/shadowsocks/lru_cache.py b/shadowsocks/lru_cache.py index 3b3c264..648b5a3 100644 --- a/shadowsocks/lru_cache.py +++ b/shadowsocks/lru_cache.py @@ -22,11 +22,22 @@ import collections import logging import time +if __name__ == '__main__': + import os, sys, inspect + file_path = os.path.dirname(os.path.realpath(inspect.getfile(inspect.currentframe()))) + sys.path.insert(0, os.path.join(file_path, '../')) + +try: + from collections import OrderedDict + print("loaded collections.OrderedDict") +except: + from shadowsocks.ordereddict import OrderedDict + # this LRUCache is optimized for concurrency, not QPS # n: concurrency, keys stored in the cache # m: visits not timed out, proportional to QPS * timeout -# get & set is O(log(n)), not O(n). thus we can support very large n -# sweep is O((n - m)*log(n)) or O(1024*log(n)) at most, +# get & set is O(1), not O(n). thus we can support very large n +# sweep is O((n - m)) or O(1024) at most, # no metter how large the cache or timeout value is SWEEP_MAX_ITEMS = 1024 @@ -38,39 +49,30 @@ class LRUCache(collections.MutableMapping): self.timeout = timeout self.close_callback = close_callback self._store = {} - self._time_to_keys = collections.OrderedDict() - self._keys_to_last_time = {} - self._visit_id = 0 + self._keys_to_last_time = OrderedDict() self.update(dict(*args, **kwargs)) # use the free update to set keys def __getitem__(self, key): - # O(log(n)) + # O(1) t = time.time() - last_t, vid = self._keys_to_last_time[key] - self._keys_to_last_time[key] = (t, vid) - if last_t != t: - del self._time_to_keys[(last_t, vid)] - self._time_to_keys[(t, vid)] = key + last_t = self._keys_to_last_time[key] + del self._keys_to_last_time[key] + self._keys_to_last_time[key] = t return self._store[key] def __setitem__(self, key, value): - # O(log(n)) + # O(1) t = time.time() if key in self._keys_to_last_time: - last_t, vid = self._keys_to_last_time[key] - del self._time_to_keys[(last_t, vid)] - vid = self._visit_id - self._visit_id += 1 - self._keys_to_last_time[key] = (t, vid) + del self._keys_to_last_time[key] + self._keys_to_last_time[key] = t self._store[key] = value - self._time_to_keys[(t, vid)] = key def __delitem__(self, key): - # O(log(n)) - last_t, vid = self._keys_to_last_time[key] + # O(1) + last_t = self._keys_to_last_time[key] del self._store[key] del self._keys_to_last_time[key] - del self._time_to_keys[(last_t, vid)] def __iter__(self): return iter(self._store) @@ -83,18 +85,17 @@ class LRUCache(collections.MutableMapping): now = time.time() c = 0 while c < SWEEP_MAX_ITEMS: - if len(self._time_to_keys) == 0: + if len(self._keys_to_last_time) == 0: break - last_t, vid = iter(self._time_to_keys).next() + key = iter(self._keys_to_last_time).next() + last_t = self._keys_to_last_time[key] if now - last_t <= self.timeout: break - key = self._time_to_keys[(last_t, vid)] value = self._store[key] if self.close_callback is not None: self.close_callback(value) del self._store[key] del self._keys_to_last_time[key] - del self._time_to_keys[(last_t, vid)] c += 1 if c: logging.debug('%d keys swept' % c) diff --git a/shadowsocks/ordereddict.py b/shadowsocks/ordereddict.py new file mode 100644 index 0000000..e1918f5 --- /dev/null +++ b/shadowsocks/ordereddict.py @@ -0,0 +1,214 @@ +import collections + +################################################################################ +### OrderedDict +################################################################################ + +class OrderedDict(dict): + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as regular dictionaries. + + # The internal self.__map dict maps keys to links in a doubly linked list. + # The circular doubly linked list starts and ends with a sentinel element. + # The sentinel element never gets deleted (this simplifies the algorithm). + # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + + def __init__(*args, **kwds): + '''Initialize an ordered dictionary. The signature is the same as + regular dictionaries, but keyword arguments are not recommended because + their insertion order is arbitrary. + + ''' + if not args: + raise TypeError("descriptor '__init__' of 'OrderedDict' object " + "needs an argument") + self = args[0] + args = args[1:] + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link at the end of the linked list, + # and the inherited dictionary is updated with the new key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + return dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which gets + # removed by updating the links in the predecessor and successor nodes. + dict_delitem(self, key) + link_prev, link_next, _ = self.__map.pop(key) + link_prev[1] = link_next # update link_prev[NEXT] + link_next[0] = link_prev # update link_next[PREV] + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + # Traverse the linked list in order. + root = self.__root + curr = root[1] # start at the first node + while curr is not root: + yield curr[2] # yield the curr[KEY] + curr = curr[1] # move to next node + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + # Traverse the linked list in reverse order. + root = self.__root + curr = root[0] # start at the last node + while curr is not root: + yield curr[2] # yield the curr[KEY] + curr = curr[0] # move to previous node + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + dict.clear(self) + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) pairs in od' + for k in self: + yield (k, self[k]) + + update = collections.MutableMapping.update + + __update = update # let subclasses override update without breaking __init__ + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the corresponding + value. If key is not found, d is returned if given, otherwise KeyError + is raised. + + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if false. + + ''' + if not self: + raise KeyError('dictionary is empty') + key = next(reversed(self) if last else iter(self)) + value = self.pop(key) + return key, value + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, self.items()) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S. + If not specified, the value defaults to None. + + ''' + self = cls() + for key in iterable: + self[key] = value + return self + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive + while comparison to a regular mapping is order-insensitive. + + ''' + if isinstance(other, OrderedDict): + return dict.__eq__(self, other) and all(_imap(_eq, self, other)) + return dict.__eq__(self, other) + + def __ne__(self, other): + 'od.__ne__(y) <==> od!=y' + return not self == other + + # -- the following methods support python 3.x style dictionary views -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) +