From bd203ff05a2e5edda044c24bb54af37caed8384c Mon Sep 17 00:00:00 2001 From: Yoshinori Okuji <yo@nexedi.com> Date: Tue, 30 Jan 2007 22:36:29 +0000 Subject: [PATCH] Fix some bugs. Add more tests and a profiler. git-svn-id: https://svn.erp5.org/repos/neo/branches/prototype3@76 71dcc9de-d417-0410-9af5-da40c76e7ee4 --- neo/client/mq.py | 551 +++++++++++++++++++++++++---------------------- 1 file changed, 289 insertions(+), 262 deletions(-) diff --git a/neo/client/mq.py b/neo/client/mq.py index 6bc3b49f..d2f17658 100644 --- a/neo/client/mq.py +++ b/neo/client/mq.py @@ -1,262 +1,289 @@ -############################################################################## -# -# Copyright (c) 2005 Nexedi SARL and Contributors. All Rights Reserved. -# Yoshinori Okuji <yo@nexedi.com> -# -# WARNING: This program as such is intended to be used by professional -# programmers who take the whole responsability of assessing all potential -# consequences resulting from its eventual inadequacies and bugs -# End users who are looking for a ready-to-use solution with commercial -# garantees and support are strongly adviced to contract a Free Software -# Service Company -# -# This program is Free Software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation; either version 2 -# of the License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -############################################################################## - -""" -Multi-Queue Cache Algorithm. -""" - -from math import log - -class Element: - """ - This class defines an element of a FIFO buffer. - """ - pass - -class FIFO: - """ - This class implements a FIFO buffer. - """ - - def __init__(self): - self._head = None - self._tail = None - self._len = 0 - - def __len__(self): - return self._len - - def append(self): - element = Element() - element.next = None - element.prev = self._tail - if self._tail is not None: - self._tail.next = element - self._tail = element - if self._head is None: - self._head = element - self._len += 1 - return element - - def head(self): - return self._head - - def tail(self): - return self._tail - - def shift(self): - element = self._head - if element is None: - return None - del self[element] - return element - - def __delitem__(self, element): - if element.next is None: - self._tail = element.prev - else: - element.next.prev = element.prev - - if element.prev is None: - self._head = element.next - else: - element.prev.next = element.next - - self._len -= 1 - -class Data: - """ - Data for each element in a FIFO buffer. - """ - pass - -class MQ: - """ - This class manages cached data by a variant of Multi-Queue. - - This class caches various sizes of objects. Here are some considerations: - - - Expired objects are not really deleted immediately. But if GC is invoked too often, - it degrades the performance significantly. - - - If large objects are cached, the number of cached objects decreases. This might affect - the cache hit ratio. It might be better to tweak a buffer level according to the size of - an object. - - - Stored values must be strings. - - - The size calculation is not accurate. - """ - - def __init__(self, life_time=10000, buffer_levels=9, max_history_size=100000, max_size=20*1024*1024): - self._history_buffer = FIFO() - self._cache_buffers = [] - for level in range(buffer_levels): - self._cache_buffers.append(FIFO()) - self._data = {} - self._time = 0 - self._life_time = life_time - self._buffer_levels = buffer_levels - self._max_history_size = max_history_size - self._max_size = max_size - self._size = 0 - - def has_key(self, id): - if id in self._data: - data = self._data[id] - if data.level >= 0: - return 1 - return 0 - - __contains__ = has_key - - def fetch(self, id): - """ - Fetch a value associated with the id. - """ - if id in self._data: - data = self._data[id] - if data.level >= 0: - del self._cache_buffers[data.level][data.element] - value = data.value - self._size -= len(value) # XXX inaccurate - self.store(id, value) - return value - raise KeyError, "%s was not found in the cache" % id - - __getitem__ = fetch - - def get(self, id, d=None): - try: - return self.fetch(id) - except KeyError: - return d - - def _evict(self, id): - """ - Evict an element to the history buffer. - """ - data = self._data[id] - self._size -= len(data.value) # XXX inaccurate - del self._cache_buffers[data.level][data.element] - element = self._history_buffer.append() - data.level = -1 - data.element = element - delattr(data, 'value') - delattr(data, 'expire_time') - element.data = data - if len(self._history_buffer) > self._max_history_size: - element = self._history_buffer.shift() - del self._data[element.data.id] - - def store(self, id, value): - if id in self._data: - data = self._data[id] - level, element, counter = data.level, data.element, data.counter + 1 - if level >= 0: - del self._cache_buffers[level][element] - else: - del self._history_buffer[element] - else: - counter = 1 - - # XXX It might be better to adjust the level according to the object size. - level = int(log(counter, 2)) - if level >= self._buffer_levels: - level = self._buffer_levels - 1 - element = self._cache_buffers[level].append() - data = Data() - data.id = id - data.expire_time = self._time + self._life_time - data.level = level - data.element = element - data.value = value - data.counter = counter - element.data = data - self._data[id] = data - self._size += len(value) # XXX inaccurate - - self._time += 1 - - # Expire old elements. - for level in range(self._buffer_levels): - cache_buffer = self._cache_buffers[level] - head = cache_buffer.head() - if head is not None and head.data.expire_time < self._time: - del cache_buffer[head] - data = head.data - if level > 0: - new_level = level - 1 - element = cache_buffer[new_level].append() - element.data = data - data.expire_time = self._time + self._life_time - data.level = new_level - data.element = element - else: - self._evict(data.id) - - # Limit the size. - size = self._size - max_size = self._max_size - if size > max_size: - for cache_buffer in self._cache_buffers: - while size > max_size: - element = cache_buffer.shift() - if element is None: - break - data = element.data - del self._data[data.id] - size -= len(data.value) # XXX inaccurate - if size <= max_size: - break - self._size = size - - __setitem__ = store - - def invalidate(self, id): - if id in self._data: - data = self._data[id] - if data.level >= 0: - del self._cache_buffers[data.level][data.element] - self._evict(id) - return - raise KeyError, "%s was not found in the cache" % id - - __delitem__ = invalidate - - -# Here is a test. -if __name__ == '__main__': - cache = MQ() - cache[1] = "1" - cache[2] = "2" - assert cache.get(1) == "1", 'cannot get 1' - assert cache.get(2) == "2", 'cannot get 2' - assert cache.get(3) == None, 'can get 3!' - del cache[1] - assert cache.get(1) == None, 'can get 1!' +############################################################################## +# +# Copyright (c) 2005 Nexedi SARL and Contributors. All Rights Reserved. +# Yoshinori Okuji <yo@nexedi.com> +# +# WARNING: This program as such is intended to be used by professional +# programmers who take the whole responsability of assessing all potential +# consequences resulting from its eventual inadequacies and bugs +# End users who are looking for a ready-to-use solution with commercial +# garantees and support are strongly adviced to contract a Free Software +# Service Company +# +# This program is Free Software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +############################################################################## + +""" +Multi-Queue Cache Algorithm. +""" + +from math import log + +class Element(object): + """ + This class defines an element of a FIFO buffer. + """ + pass + +class FIFO(object): + """ + This class implements a FIFO buffer. + """ + + def __init__(self): + self._head = None + self._tail = None + self._len = 0 + + def __len__(self): + return self._len + + def append(self): + element = Element() + element.next = None + element.prev = self._tail + if self._tail is not None: + self._tail.next = element + self._tail = element + if self._head is None: + self._head = element + self._len += 1 + return element + + def head(self): + return self._head + + def tail(self): + return self._tail + + def shift(self): + element = self._head + if element is None: + return None + del self[element] + return element + + def __delitem__(self, element): + if element.next is None: + self._tail = element.prev + else: + element.next.prev = element.prev + + if element.prev is None: + self._head = element.next + else: + element.prev.next = element.next + + self._len -= 1 + +class Data(object): + """ + Data for each element in a FIFO buffer. + """ + pass + +class MQ(object): + """ + This class manages cached data by a variant of Multi-Queue. + + This class caches various sizes of objects. Here are some considerations: + + - Expired objects are not really deleted immediately. But if GC is invoked too often, + it degrades the performance significantly. + + - If large objects are cached, the number of cached objects decreases. This might affect + the cache hit ratio. It might be better to tweak a buffer level according to the size of + an object. + + - Stored values must be strings. + + - The size calculation is not accurate. + """ + + def __init__(self, life_time=10000, buffer_levels=9, max_history_size=100000, max_size=20*1024*1024): + self._history_buffer = FIFO() + self._cache_buffers = [] + for level in range(buffer_levels): + self._cache_buffers.append(FIFO()) + self._data = {} + self._time = 0 + self._life_time = life_time + self._buffer_levels = buffer_levels + self._max_history_size = max_history_size + self._max_size = max_size + self._size = 0 + + def has_key(self, id): + if id in self._data: + data = self._data[id] + if data.level >= 0: + return 1 + return 0 + + __contains__ = has_key + + def fetch(self, id): + """ + Fetch a value associated with the id. + """ + data = self._data[id] + if data.level >= 0: + value = data.value + self._size -= len(value) # XXX inaccurate + self.store(id, value) + return value + raise KeyError(id) + + __getitem__ = fetch + + def get(self, id, d=None): + try: + return self.fetch(id) + except KeyError: + return d + + def _evict(self, id): + """ + Evict an element to the history buffer. + """ + data = self._data[id] + self._size -= len(data.value) # XXX inaccurate + del self._cache_buffers[data.level][data.element] + element = self._history_buffer.append() + data.level = -1 + data.element = element + delattr(data, 'value') + delattr(data, 'expire_time') + element.data = data + if len(self._history_buffer) > self._max_history_size: + element = self._history_buffer.shift() + del self._data[element.data.id] + + def store(self, id, value): + cache_buffers = self._cache_buffers + + try: + data = self._data[id] + level, element, counter = data.level, data.element, data.counter + 1 + if level >= 0: + del cache_buffers[level][element] + else: + del self._history_buffer[element] + except KeyError: + counter = 1 + + # XXX It might be better to adjust the level according to the object size. + level = min(int(log(counter, 2)), self._buffer_levels - 1) + element = cache_buffers[level].append() + data = Data() + data.id = id + data.expire_time = self._time + self._life_time + data.level = level + data.element = element + data.value = value + data.counter = counter + element.data = data + self._data[id] = data + self._size += len(value) # XXX inaccurate + + self._time += 1 + + # Expire old elements. + time = self._time + for level in xrange(self._buffer_levels): + cache_buffer = cache_buffers[level] + head = cache_buffer.head() + if head is not None and head.data.expire_time < time: + del cache_buffer[head] + data = head.data + if level > 0: + new_level = level - 1 + element = cache_buffers[new_level].append() + element.data = data + data.expire_time = time + self._life_time + data.level = new_level + data.element = element + else: + self._evict(data.id) + + # Limit the size. + size = self._size + max_size = self._max_size + if size > max_size: + for cache_buffer in cache_buffers: + while size > max_size: + element = cache_buffer.shift() + if element is None: + break + data = element.data + del self._data[data.id] + size -= len(data.value) # XXX inaccurate + if size <= max_size: + break + self._size = size + + __setitem__ = store + + def invalidate(self, id): + if id in self._data: + data = self._data[id] + if data.level >= 0: + del self._cache_buffers[data.level][data.element] + self._evict(id) + return + raise KeyError, "%s was not found in the cache" % id + + __delitem__ = invalidate + + +# Here is a test. +if __name__ == '__main__': + import hotshot, hotshot.stats + + def test(): + cache = MQ(life_time=100, buffer_levels=9, max_history_size=10000, + max_size=2*1024*1024) + + for i in xrange(10000): + assert cache.get(i) is None, '%d should not be present' % i + + for i in xrange(10000): + cache[i] = str(i) + assert cache.get(i) == str(i), '%d does not exist' % i + + for i in xrange(10000 - 100 - 1): + assert cache.get(i) is None, '%d should not be present' % i + + for i in xrange(10): + cache[i] = str(i) + + for j in xrange(1000): + for i in xrange(10): + assert cache.get(i) == str(i), '%d does not exist' % i + + for i in xrange(10,500): + cache[i] = str(i) + + for i in xrange(10): + assert cache.get(i) == str(i), '%d does not exist' % i + + prof = hotshot.Profile("mq.prof") + prof.runcall(test) + prof.close() + stats = hotshot.stats.load("mq.prof") + stats.strip_dirs() + stats.sort_stats('time', 'calls') + stats.print_stats(20) -- 2.30.9