- use an ordered mapping so we dump the oldest stuff from the cache first

- remove the pointless check for cache time being enabled inside _cached_result - _cached_result isn't actually called if caching isn't enabled - slight code tidy in DA.py - lots of comments in the test_caching.py and DA.py's _cached_result method

- use an ordered mapping so we dump the oldest stuff from the cache first
- remove the pointless check for cache time being enabled inside _cached_result - _cached_result isn't actually called if caching isn't enabled - slight code tidy in DA.py - lots of comments in the test_caching.py and DA.py's _cached_result method
1ed2f06e · Chris Withers · c3838d6d · 1ed2f06e · 1ed2f06e
Commit 1ed2f06e authored Nov 17, 2006 by Chris Withers
Hide whitespace changes
Inline Side-by-side

Showing with 102 additions and 49 deletions

lib/python/Shared/DC/ZRDB/DA.py lib/python/Shared/DC/ZRDB/DA.py +51 -26

lib/python/Shared/DC/ZRDB/tests/test_caching.py lib/python/Shared/DC/ZRDB/tests/test_caching.py +51 -23

No files found.
--- a/lib/python/Shared/DC/ZRDB/DA.py
+++ b/lib/python/Shared/DC/ZRDB/DA.py
@@ -39,7 +39,7 @@ from AccessControl.DTML import RestrictedDTML
 from webdav.Resource import Resource
 from webdav.Lockable import ResourceLockedError
 from zExceptions import BadRequest
-Bucket=lambda:{}
+from BTrees.OOBTree import OOBucket as Bucket
 class DatabaseError(BadRequest):
@@ -352,27 +352,52 @@ class DA(
    def _searchable_result_columns(self): return self._col
    def _cached_result(self, DB__, query, max_rows, conn_id):
+        # Try to fetch a result from the cache.
+        # Compute and cache the result otherwise.
+        # Also maintains the cache and ensures stale entries
+        # are never returned and that the cache never gets too large.
+        # NB: Correct cache behavior is predicated on Bucket.keys()
+        #     returning a sequence ordered from smalled number
+        #     (ie: the oldest cache entry) to largest number
+        #     (ie: the newest cache entry). Please be careful if you
+        #     change the class instantied below!
+        # get hold of a cache
+        caches = getattr(self,'_v_cache',None)
+        if caches is None:
+            caches = self._v_cache = {}, Bucket()
+        cache, tcache = caches
+        # the key for caching
        cache_key = query,max_rows,conn_id
+        # the maximum number of result sets to cache
-        # Try to fetch from cache
-        if hasattr(self,'_v_cache'): cache=self._v_cache
-        else: cache=self._v_cache={}, Bucket()
-        cache, tcache = cache
        max_cache=self.max_cache_
+        # the current time
        now=time()
+        # the oldest time which is not stale
        t=now-self.cache_time_
+        # if the cache is too big, we purge entries from it
        if len(cache) >= max_cache:
            keys=tcache.keys()
-            keys.reverse()
+            # We also hoover out any stale entries, as we're
-            while keys and (len(keys) >= max_cache or keys[-1] < t):
+            # already doing cache minimisation.
-                key=keys[-1]
+            # 'keys' is ordered, so we purge the oldest results
+            # until the cache is small enough and there are no
+            # stale entries in it
+            while keys and (len(keys) >= max_cache or keys[0] < t):
+                key=keys[0]
                q=tcache[key]
                del tcache[key]
                del cache[q]
-                del keys[-1]
+                del keys[0]
+        # okay, now see if we have a cached result
        if cache.has_key(cache_key):
            k, r = cache[cache_key]
+            # the result may still be stale, as we only hoover out
+            # stale results above if the cache gets too large.
            if k > t:
                # yay! a cached result returned!
                return r
@@ -383,22 +408,22 @@ class DA(
        # call the pure query
        result=DB__.query(query,max_rows)
-        if self.cache_time_ > 0:
-            # When a ZSQL method is handled by one ZPublisher thread twice in
+        # When a ZSQL method is handled by one ZPublisher thread twice in
-            # less time than it takes for time.time() to return a different
+        # less time than it takes for time.time() to return a different
-            # value, the SQL generated is different, then this code will leak
+        # value, the SQL generated is different, then this code will leak
-            # an entry in 'cache' for each time the ZSQL method generates
+        # an entry in 'cache' for each time the ZSQL method generates
-            # different SQL until time.time() returns a different value.
+        # different SQL until time.time() returns a different value.
-            #
+        #
-            # On Linux, you would need an extremely fast machine under extremely
+        # On Linux, you would need an extremely fast machine under extremely
-            # high load, making this extremely unlikely. On Windows, this is a
+        # high load, making this extremely unlikely. On Windows, this is a
-            # little more likely, but still unlikely to be a problem.
+        # little more likely, but still unlikely to be a problem.
-            #
+        #
-            # If it does become a problem, the values of the tcache mapping
+        # If it does become a problem, the values of the tcache mapping
-            # need to be turned into sets of cache keys rather than a single
+        # need to be turned into sets of cache keys rather than a single
-            # cache key.
+        # cache key.
-            tcache[now]=cache_key
+        tcache[now]=cache_key
-            cache[cache_key]= now, result
+        cache[cache_key]= now, result
        return result

--- a/lib/python/Shared/DC/ZRDB/tests/test_caching.py
+++ b/lib/python/Shared/DC/ZRDB/tests/test_caching.py
@@ -100,8 +100,18 @@ class TestCaching(TestCase):
        if result:
            self.fail('\n\n'+'\n'.join(result))
+    def test_bad_aquisition(self):
+        # checks that something called _v_cache isn't acquired from anywhere
+        from ExtensionClass import Base
+        class Dummy(Base):
+            _v_cache = 'muhahaha'
+        obj = Dummy()
+        self.da = self.da.__of__(obj)
+        del self.da._v_cache
+        self._do_query('query',1)
    def test_same_query_different_seconds(self):
-        # this tests a sequence set of requests for the same
+        # this tests a sequence of requests for the same
        # query, but where the item returned is always in the cache
        self._check_cache({},{})
        for t in range(1,6):
@@ -114,8 +124,7 @@ class TestCaching(TestCase):
    def test_same_query_same_second(self):
        # this tests a sequence set of requests for the same
        # query, but where the item returned is always in the cache
-        # and where the queries all occur in the same second, so
+        # and where the queries all occur in the same second
-        # tickling the potential cache time rounding problem
        self._check_cache({},{})
        for t in range(11,16,1):
            t = float(t)/10
@@ -128,8 +137,6 @@ class TestCaching(TestCase):
    def test_different_queries_different_second(self):
        # This tests different queries being fired into the cache
        # in sufficient volume to excercise the purging code
-        # XXX this demonstrates newer cached material being incorrectly
-        #     dumped due to the replacement of Bucket with dict
        self._check_cache({},{})
        # one
        self._do_query('query1',1.1)
@@ -147,28 +154,25 @@ class TestCaching(TestCase):
            )
        # three - now we drop our first cache entry
        self._do_query('query3',4.3)
-        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
        self._check_cache(
-            {('query1',1,'conn_id'): (1.1,'result for query1'),
+            {('query2',1,'conn_id'): (3.2,'result for query2'),
             ('query3',1,'conn_id'): (4.3,'result for query3'),},
-            {1.1: ('query1',1,'conn_id'),
+            {3.2: ('query2',1,'conn_id'),
             4.3: ('query3',1,'conn_id'),}
            )
        # four - now we drop our second cache entry
        self._do_query('query4',8.4)
-        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
        self._check_cache(
-            {('query1',1,'conn_id'): (1.1,'result for query1'),
+            {('query3',1,'conn_id'): (4.3,'result for query3'),
             ('query4',1,'conn_id'): (8.4,'result for query4'),},
-            {1.1: ('query1',1,'conn_id'),
+            {4.3: ('query3',1,'conn_id'),
             8.4: ('query4',1,'conn_id'),}
            )
    def test_different_queries_same_second(self):
        # This tests different queries being fired into the cache
-        # in the same second.
+        # in the same second in sufficient quantities to exercise
-        # XXX this demonstrates newer cached material being incorrectly
+        # the purging code
-        #     dumped due to the replacement of Bucket with dict
        self._check_cache({},{})
        # one
        self._do_query('query1',1.0)
@@ -194,15 +198,17 @@ class TestCaching(TestCase):
            )
        # four - now we drop another cache entry
        self._do_query('query4',1.3)
-        # XXX oops - because dicts have an arbitary ordering, we dumped the wrong key!
        self._check_cache(
-            {('query2',1,'conn_id'): (1.1,'result for query2'),
+            {('query3',1,'conn_id'): (1.2,'result for query3'),
             ('query4',1,'conn_id'): (1.3,'result for query4'),},
-            {1.1: ('query2',1,'conn_id'),
+            {1.2: ('query3',1,'conn_id'),
             1.3: ('query4',1,'conn_id'),}
            )
    def test_time_tcache_expires(self):
+        # This tests that once the cache purging code is triggered,
+        # it will actively hoover out all expired cache entries
        # the first query gets cached
        self._do_query('query1',1)
        self._check_cache(
@@ -218,7 +224,7 @@ class TestCaching(TestCase):
             2:('query2',1,'conn_id')}
            )
        # the 3rd trips the max_cache trigger, so both our old queries get
-        # dumped
+        # dumped because they are past their expiration time
        self._do_query('query',23)
        self._check_cache(
            {('query',1,'conn_id'): (23,'result for query')},
@@ -226,6 +232,10 @@ class TestCaching(TestCase):
            )
    def test_time_refreshed_cache(self):
+        # This tests that when a cached query is expired when it comes
+        # to check for a cached entry for that query, the stale entry is
+        # removed and replaced with a fresh entry.
        # the first query gets cached
        self._do_query('query1',1)
        self._check_cache(
@@ -258,6 +268,8 @@ class Hook:
        return conn_to_use
 class TestCacheKeys(TestCase):
+    # These tests check that the keys used for caching are unique
+    # in the right ways.
    def _cached_result(self,DB__,query,row_count,conn_id):
        self.cache_key = query,row_count,conn_id
@@ -291,20 +303,36 @@ class TestCacheKeys(TestCase):
        self.da.conn2 = DummyDA()
        self.da()
        self.assertEqual(self.cache_key,('some sql',1000,'conn2'))
 class TestFullChain(TestCase):
+    # This exercises both DA.__call__ and DA._cached_result.
-    def test_full_chain(self):
+    def setUp(self):
        from Shared.DC.ZRDB.DA import DA
        self.da = DA('da','title','conn_id','arg1 arg2','some sql')        
        self.da.conn_id = DummyDA()
+    def test_args_match(self):
+        # This checks is that DA._cached_result's call signature
+        # matches that expected by DA.__call__
        # These need to be set so DA.__call__ tries for a cached result
        self.da.cache_time_ = 1
        self.da.max_cache_ = 1
-        # run the method, exercising both DA.__call__ and DA._cached_result
+        # the actual test, will throw exceptions if things aren't right
-        # currently all this checks is that DA._cached_result's call signature
+        self.da()
-        # matches that expected by DA.__call__
+    def test_cached_result_not_called_for_no_caching(self):
+        # blow up the _cached_result method on our
+        # test instance
+        self.da._cached_result = None
+        # check we never get there with the default "no cachine"
        self.da()
+        # turn caching on
+        self.da.cache_time_ = 1
+        self.da.max_cache_ = 1
+        # check that we get an exception
+        self.assertRaises(TypeError,self.da)
 def test_suite():
    suite = TestSuite()