Lots of changes while integrating wth ClientStorage

- testZEO tests now pass - async tests now pass again Probably need to write more async tests to reflect changes. (Or maybe the ZEO tests that drove tem are enough.) - dropped heartbeat tests, which were insane. Will add simpler test when I add heartbeats to the async implementation.

Lots of changes while integrating wth ClientStorage
- testZEO tests now pass - async tests now pass again Probably need to write more async tests to reflect changes. (Or maybe the ZEO tests that drove tem are enough.) - dropped heartbeat tests, which were insane. Will add simpler test when I add heartbeats to the async implementation.
cc333941 · Jim Fulton · 3f31236b · cc333941 · cc333941 · cc333941
Commit cc333941 authored May 25, 2016 by Jim Fulton
8 changed files
--- a/src/ZEO/ClientStorage.py
+++ b/src/ZEO/ClientStorage.py
@@ -29,6 +29,8 @@ import time
 import weakref
 from binascii import hexlify

+import BTrees.OOBTree
+
 import zc.lockfile
 import ZODB
 import ZODB.BaseStorage
@@ -223,7 +225,8 @@ class ClientStorage(object):

        self._oids = [] # List of pre-fetched oids from server

-        cache = self._cache = open_cache(cache, var, client, cache_size)
+        cache = self._cache = open_cache(
+            cache, var, client, storage, cache_size)

        # XXX need to check for POSIX-ness here
        self.blob_dir = blob_dir
@@ -257,8 +260,8 @@ class ClientStorage(object):
            addr, self, cache, storage,
            ZEO.asyncio.client.Fallback if read_only_fallback else read_only,
            wait_timeout or 30,
-            wait=wait,
            )
+        self._server.start()
        self._call = self._server.call
        self._async = self._server.async
        self._async_iter = self._server.async_iter
@@ -341,13 +344,6 @@ class ClientStorage(object):

        self._info.update(info)

-        # for name in self._info.get('extensionMethods', {}).keys():
-        #     if not hasattr(self, name):
-        #         def mklambda(mname):
-        #             return (lambda *args, **kw:
-        #                     self._server.rpc.call(mname, *args, **kw))
-        #         setattr(self, name, mklambda(name))
-
        for iface in (
            ZODB.interfaces.IStorageRestoreable,
            ZODB.interfaces.IStorageIteration,
@@ -560,7 +556,7 @@ class ClientStorage(object):

            def store():
                yield ('storeBlobStart', ())
-                f = open(blobfilename, 'rb')
+                f = open(target, 'rb')
                while 1:
                    chunk = f.read(59000)
                    if not chunk:
@@ -714,6 +710,12 @@ class ClientStorage(object):

        try:
            tbuf = txn.data(self)
+        except AttributeError:
+            # Gaaaa. This is a recovery transaction. Work around this
+            # until we can think of something better. XXX
+            tb = {}
+            txn.data = tb.__getitem__
+            txn.set_data = tb.__setitem__
        except KeyError:
            pass
        else:
@@ -855,9 +857,6 @@ class ClientStorage(object):
        assert not version
        self._check_trans(transaction, 'restore')
        self._async('restorea', oid, serial, data, prev_txn, id(transaction))
-        # Don't update the transaction buffer, because current data are
-        # unaffected.
-        return self._check_serials()

    # Below are methods invoked by the StorageServer

@@ -871,6 +870,10 @@ class ClientStorage(object):
        """Server callback to update the info dictionary."""
        self._info.update(dict)

+    def invalidateCache(self):
+        if self._db is not None:
+            self._db.invalidateCache()
+
    def invalidateTransaction(self, tid, oids):
        """Server callback: Invalidate objects modified by tid."""
        if self._db is not None:
@@ -1154,14 +1157,16 @@ def _lock_blob(path):
        else:
            break

-def open_cache(cache, var, client, cache_size):
+def open_cache(cache, var, client, storage, cache_size):
    if isinstance(cache, (None.__class__, str)):
        from ZEO.cache import ClientCache
        if cache is None:
            if client:
-                cache = os.path.join(var or os.getcwd(), client)
+                cache = os.path.join(var or os.getcwd(),
+                                     "%s-%s.zec" % (client, storage))
            else:
-                return ClientCache(cache, cache_size)
+                # ephemeral cache
+                return ClientCache(None, cache_size)

        cache = ClientCache(cache, cache_size)


--- a/src/ZEO/TransactionBuffer.py
+++ b/src/ZEO/TransactionBuffer.py
@@ -62,6 +62,7 @@ class TransactionBuffer:
    def serial(self, oid, serial):
        if isinstance(serial, Exception):
            self.exception = serial
+            self.serials[oid] = None
        else:
            self.serials[oid] = serial


--- a/src/ZEO/asyncio/client.py
+++ b/src/ZEO/asyncio/client.py
@@ -7,9 +7,13 @@ import logging
 import random
 import threading
 import traceback
-import ZEO.Exceptions
+
+import ZODB.event
 import ZODB.POSException

+import ZEO.Exceptions
+import ZEO.interfaces
+
 logger = logging.getLogger(__name__)

 Fallback = object()
@@ -272,6 +276,16 @@ class Protocol(asyncio.Protocol):
                type(args[0]) == self.exception_type_type and
                issubclass(args[0], Exception)
                ):
+                if not issubclass(
+                    args[0], (
+                        ZODB.POSException.POSKeyError,
+                        ZODB.POSException.ConflictError,)
+                    ):
+                    logger.error("%s from server: %s.%s:%s",
+                                 self.name,
+                                 args[0].__module__,
+                                 args[0].__name__,
+                                 args[1])
                future.set_exception(args[1])
            else:
                future.set_result(args)
@@ -307,7 +321,7 @@ class Protocol(asyncio.Protocol):
        'receiveBlobStart', 'receiveBlobChunk', 'receiveBlobStop',
        # plus: notify_connected, notify_disconnected
        )
-    client_delegated = client_methods[1:]
+    client_delegated = client_methods[2:]

 class Client:
    """asyncio low-level ZEO client interface
@@ -432,6 +446,8 @@ class Client:
                    self.client.invalidateCache()
                    self.finished_verify(server_tid)
                elif cache_tid > server_tid:
+                    logger.critical(
+                        'Client has seen newer transactions than server!')
                    raise AssertionError("Server behind client, %r < %r, %s",
                                         server_tid, cache_tid, protocol)
                elif cache_tid == server_tid:
@@ -447,7 +463,15 @@ class Client:
                            return tid
                        else:
                            # cache is too old
-                            logger.info("cache too old %s", protocol)
+                            try:
+                                ZODB.event.notify(
+                                    ZEO.interfaces.StaleCache(self.client))
+                            except Exception:
+                                logger.exception("sending StaleCache event")
+                            logger.critical(
+                                "%s dropping stale cache",
+                                getattr(self.client, '__name__', ''),
+                                )
                            self.cache.clear()
                            self.client.invalidateCache()
                            return server_tid
@@ -561,14 +585,24 @@ class Client:
        if self.ready:
            @self.protocol.promise('tpc_finish', tid)
            def committed(tid):
-                cache = self.cache
-                for oid, data, resolved in updates:
-                    cache.invalidate(oid, tid)
-                    if data and not resolved:
-                        cache.store(oid, tid, None, data)
-                cache.setLastTid(tid)
-                f(tid)
-                future.set_result(tid)
+                try:
+                    cache = self.cache
+                    for oid, data, resolved in updates:
+                        cache.invalidate(oid, tid)
+                        if data and not resolved:
+                            cache.store(oid, tid, None, data)
+                    cache.setLastTid(tid)
+                except Exception as exc:
+                    future.set_exception(exc)
+
+                    # At this point, our cache is in an inconsistent
+                    # state.  We need to reconnect in hopes of
+                    # recovering to a consistent state.
+                    self.protocol.close()
+                    self.disconnected(self.protocol)
+                else:
+                    f(tid)
+                    future.set_result(tid)

            committed.catch(future.set_exception)
        else:
@@ -585,6 +619,18 @@ class Client:
            self.cache.setLastTid(tid)
            self.client.invalidateTransaction(tid, oids)

+    def serialnos(self, serials):
+        # Before delegating, check for errors (likely ConflictErrors)
+        # and invalidate the oids they're associated with.  In the
+        # past, this was done by the client, but now we control the
+        # cache and this is our last chance, as the client won't call
+        # back into us when there's an error.
+        for oid, serial in serials:
+            if isinstance(serial, Exception):
+                self.cache.invalidate(oid, None)
+
+        self.client.serialnos(serials)
+
    @property
    def protocol_version(self):
        return self.protocol.protocol_version
@@ -699,19 +745,15 @@ class ClientThread(ClientRunner):

    def __init__(self, addrs, client, cache,
                 storage_key='1', read_only=False, timeout=30,
-                 disconnect_poll=1, wait=True):
+                 disconnect_poll=1):
        self.set_options(addrs, client, cache, storage_key, read_only,
                         timeout, disconnect_poll)
        self.thread = threading.Thread(
            target=self.run,
-            name='zeo_client_'+storage_key,
+            name="%s zeo client networking thread" % client.__name__,
            daemon=True,
            )
        self.started = threading.Event()
-        self.thread.start()
-        self.started.wait()
-        if wait:
-            self.connected.result(timeout)

    exception = None
    def run(self):
@@ -724,11 +766,24 @@ class ClientThread(ClientRunner):
        except Exception as exc:
            logger.exception("Client thread")
            self.exception = exc
-            raise
-        else:
+        finally:
+            if not self.closed:
+                if self.client.ready:
+                    self.closed = True
+                    self.client.ready = False
+                    self.client.client.notify_disconnected()
+                logger.critical("Client loop stopped unexpectedly")
            loop.close()
            logger.debug('Stopping client thread')

+    def start(self, wait=True):
+        self.thread.start()
+        self.started.wait()
+        if self.exception:
+            raise self.exception
+        if wait:
+            self.connected.result(self.timeout)
+
    closed = False
    def close(self):
        if not self.closed:

--- a/src/ZEO/asyncio/tests.py
+++ b/src/ZEO/asyncio/tests.py
@@ -96,7 +96,16 @@ class AsyncTests(setupstack.TestCase, ClientRunner):
        # Actually, the client isn't connected until it initializes it's cache:
        self.assertFalse(client.connected.done() or transport.data)

-        # If we try to make calls while the client is connecting, they're queued
+        # If we try to make calls while the client is *initially*
+        # connecting, we get an error. This is because some dufus
+        # decided to create a client storage without waiting for it to
+        # connect.
+        f1 = self.call('foo', 1, 2)
+        self.assertTrue(isinstance(f1.exception(), ClientDisconnected))
+
+        # When the client is reconnecting, it's ready flag is set to False and
+        # it queues calls:
+        client.ready = False
        f1 = self.call('foo', 1, 2)
        self.assertFalse(f1.done())

@@ -195,7 +204,7 @@ class AsyncTests(setupstack.TestCase, ClientRunner):
        self.assertEqual(parse(transport.pop()),
                         (8, False, 'tpc_finish', (b'd'*8,)))
        respond(8, b'e'*8)
-        self.assertEqual(committed.result(), None)
+        self.assertEqual(committed.result(), b'e'*8)
        self.assertEqual(cache.load(b'1'*8), None)
        self.assertEqual(cache.load(b'2'*8), ('committed 2', b'e'*8))
        self.assertEqual(cache.load(b'4'*8), ('committed 4', b'e'*8))

--- a/src/ZEO/tests/drop_cache_rather_than_verify.txt
+++ b/src/ZEO/tests/drop_cache_rather_than_verify.txt
@@ -2,17 +2,14 @@ Avoiding cache verifification
 =============================

 For large databases it is common to also use very large ZEO cache
-files.  If a client has beed disconnected for too long, cache verification
-might be necessary, but cache verification can be very hard on the
-storage server.
+files.  If a client has beed disconnected for too long, the server
+can't play back missing invalidations.  In this case, the cache is
+cleared. When this happens, a ZEO.interfaces.StaleCache event is
+published, largely for backward compatibility.

-When verification is needed, a ZEO.interfaces.StaleCache event is
-published. Applications may handle this event to perform actions such
-as exiting the process to avoid a cold restart.
-
-ClientStorage provides an option to drop it's cache rather than doing
-verification.  When this option is used, and verification would be
-necessary, after publishing the event, ClientStorage:
+ClientStorage used to provide an option to drop it's cache rather than
+doing verification.  This is now the only behavior.  Cache
+verification is no longer supported.

 - Invalidates all object caches

@@ -27,8 +24,7 @@ Start a server, create a cient to it and commit some data

    >>> addr, admin = start_server(keep=1)
    >>> import ZEO, transaction
-    >>> db = ZEO.DB(addr, drop_cache_rather_verify=True, client='cache',
-    ...             name='test')
+    >>> db = ZEO.DB(addr, client='cache', name='test')
    >>> wait_connected(db.storage)
    >>> conn = db.open()
    >>> conn.root()[1] = conn.root().__class__()
@@ -58,11 +54,11 @@ logging and event data:

    >>> import logging, zope.testing.loggingsupport, ZODB.event
    >>> handler = zope.testing.loggingsupport.InstalledHandler(
-    ...     'ZEO.ClientStorage', level=logging.ERROR)
+    ...     'ZEO', level=logging.ERROR)
    >>> events = []
    >>> def event_handler(e):
    ...     events.append((
-    ...       len(e.storage._cache), str(handler), e.__class__.__name__))
+    ...       len(e.storage._server.client.cache), str(handler), e.__class__.__name__))

    >>> old_notify = ZODB.event.notify
    >>> ZODB.event.notify = event_handler
@@ -91,7 +87,7 @@ Now, let's verify our assertions above:
    yet.

    >>> del events[:]
-  
+
 - Drops or clears it's client cache. (The end result is that the cache
  is working but empty.)

@@ -105,8 +101,8 @@ Now, let's verify our assertions above:

 - Logs a CRITICAL message.

-    >>> print(handler)
-    ZEO.ClientStorage CRITICAL
+    >>> print(handler) # doctest: +ELLIPSIS
+    ZEO... CRITICAL
      test dropping stale cache

    >>> handler.clear()
@@ -135,8 +131,8 @@ another client:
    >>> db = ZEO.DB(addr, drop_cache_rather_verify=True, client='cache',
    ...             name='test')
    >>> wait_connected(db.storage)
-    
-  
+
+
 - Drops or clears it's client cache. (The end result is that the cache
  is working but empty.)

@@ -156,8 +152,8 @@ in the database, which is why we get 1, rather than 0 objects in the cache.)

 - Logs a CRITICAL message.

-    >>> print(handler)
-    ZEO.ClientStorage CRITICAL
+    >>> print(handler) # doctest: +ELLIPSIS
+    ZEO... CRITICAL
      test dropping stale cache

    >>> handler.clear()
@@ -168,49 +164,6 @@ If we access the root object, it'll be loaded from the server:
    >>> conn.root()[1].x
    11

-
-Finally, let's look at what happens without the
-drop_cache_rather_verify option:
-
-    >>> db.close()
-    >>> db = ZEO.DB(addr, client='cache')
-    >>> wait_connected(db.storage)
-    >>> conn = db.open()
-    >>> conn.root()[1].x
-    11
-    >>> conn.root()[2] = conn.root().__class__()
-    >>> transaction.commit()
-    >>> len(db.storage._cache)
-    4
-
-    >>> stop_server(admin)
-    >>> addr2, admin = start_server(keep=1)
-    >>> db2 = ZEO.DB(addr2)
-    >>> wait_connected(db2.storage)
-    >>> conn2 = db2.open()
-    >>> for i in range(5):
-    ...     conn2.root()[1].x += 1
-    ...     transaction.commit()
-    >>> db2.close()
-    >>> stop_server(admin)
-
-    >>> _, admin = start_server(zeo_conf=dict(invalidation_queue_size=1),
-    ...                         addr=addr)
-    >>> wait_connected(db.storage)
-
-    >>> for e in events:
-    ...     print(e)
-    (4, '', 'StaleCache')
-
-    >>> print(handler)
-    <BLANKLINE>
-
-    >>> len(db.storage._cache)
-    3
-
-Here we see the cache wasn't dropped, although one of the records was
-invalidated during verification.
-
 .. Cleanup

    >>> db.close()

--- a/src/ZEO/tests/forker.py
+++ b/src/ZEO/tests/forker.py
@@ -108,9 +108,10 @@ def start_zeo_server(storage_conf=None, zeo_conf=None, port=None, keep=False,

    if not storage_conf:
        storage_conf = '<filestorage>\npath %s\n</filestorage>' % path
-        if blob_dir:
-            storage_conf = '<blobstorage>\nblob-dir %s\n%s\n</blobstorage>' % (
-                blob_dir, storage_conf)
+
+    if blob_dir:
+        storage_conf = '<blobstorage>\nblob-dir %s\n%s\n</blobstorage>' % (
+            blob_dir, storage_conf)

    if port is None:
        raise AssertionError("The port wasn't specified")

--- a/src/ZEO/tests/testZEO.py
+++ b/src/ZEO/tests/testZEO.py
--- a/src/ZEO/tests/zeo-fan-out.test
+++ b/src/ZEO/tests/zeo-fan-out.test
@@ -103,7 +103,7 @@ Now, let's see if we can break it. :)
    ...     path = s2.fshelper.getBlobFilename(*blob_id)
    ...     if os.path.exists(path):
    ...         ZODB.blob.remove_committed(path)
-    ...     s2._server.sendBlob(*blob_id)
+    ...     s2._call('sendBlob', *blob_id)
    ... else: print('Dang')

    >>> threadf.join()