Commit e73b5cc6 authored by Jim Fulton's avatar Jim Fulton

ClientStorage now provides blob cache management. When using

non-shared blob directories, you can set a target cache size and the
cache will periodically be reduced to the target size.

To enable blob cache management, a new IBlobStorage method,
openCommittedBlobFile has been added.
parent e270b692
...@@ -33,6 +33,13 @@ New Features ...@@ -33,6 +33,13 @@ New Features
The ordinary file may be used outside the current transaction and The ordinary file may be used outside the current transaction and
even after the blob's database connection has been closed. even after the blob's database connection has been closed.
- ClientStorage now provides blob cache management. When using
non-shared blob directories, you can set a target cache size and the
cache will periodically be reduced to the target size.
The client blob directory layout has changed. If you have existing
non-shared blob directories, you will have to remove them.
Bugs Fixed Bugs Fixed
---------- ----------
......
This diff is collapsed.
ZEO Client Configuration
========================
Here we'll describe (and test) the various ZEO Client configuration
options. To facilitate this, we'l start a server that our client can
connect to:
>>> addr, _ = start_server(blob_dir='server-blobs')
The simplest client configuration specified a server address:
>>> import ZODB.config
>>> storage = ZODB.config.storageFromString("""
... <zeoclient>
... server %s:%s
... </zeoclient>
... """ % addr)
>>> storage.getName(), storage.__class__.__name__
... # doctest: +ELLIPSIS
("[('localhost', ...)] (connected)", 'ClientStorage')
>>> storage.blob_dir
>>> storage._storage
'1'
>>> storage._cache.maxsize
20971520
>>> storage._cache.path
>>> storage._rpc_mgr.tmin
5
>>> storage._rpc_mgr.tmax
300
>>> storage._is_read_only
False
>>> storage._read_only_fallback
False
>>> storage._drop_cache_rather_verify
False
>>> storage._blob_cache_size
>>> storage.close()
>>> storage = ZODB.config.storageFromString("""
... <zeoclient>
... server %s:%s
... blob-dir blobs
... storage 2
... cache-size 100
... name bob
... client cache
... min-disconnect-poll 1
... max-disconnect-poll 5
... read-only true
... drop-cache-rather-verify true
... blob-cache-size 1000MB
... blob-cache-size-check 10
... wait false
... </zeoclient>
... """ % addr)
>>> storage.getName(), storage.__class__.__name__
('bob (disconnected)', 'ClientStorage')
>>> storage.blob_dir
'blobs'
>>> storage._storage
'2'
>>> storage._cache.maxsize
100
>>> import os
>>> storage._cache.path == os.path.abspath('cache-2.zec')
True
>>> storage._rpc_mgr.tmin
1
>>> storage._rpc_mgr.tmax
5
>>> storage._is_read_only
True
>>> storage._read_only_fallback
False
>>> storage._drop_cache_rather_verify
True
>>> storage._blob_cache_size
1048576000
>>> print storage._blob_cache_size_check
104857600
>>> storage.close()
...@@ -285,7 +285,7 @@ def setUp(test): ...@@ -285,7 +285,7 @@ def setUp(test):
servers = {} servers = {}
def start_server(storage_conf=None, zeo_conf=None, port=None, keep=False, def start_server(storage_conf=None, zeo_conf=None, port=None, keep=False,
addr=None, path='Data.fs', protocol=None): addr=None, path='Data.fs', protocol=None, blob_dir=None):
"""Start a ZEO server. """Start a ZEO server.
Return the server and admin addresses. Return the server and admin addresses.
...@@ -298,7 +298,7 @@ def setUp(test): ...@@ -298,7 +298,7 @@ def setUp(test):
elif addr is not None: elif addr is not None:
raise TypeError("Can't specify port and addr") raise TypeError("Can't specify port and addr")
addr, adminaddr, pid, config_path = start_zeo_server( addr, adminaddr, pid, config_path = start_zeo_server(
storage_conf, zeo_conf, port, keep, path, protocol) storage_conf, zeo_conf, port, keep, path, protocol, blob_dir)
os.remove(config_path) os.remove(config_path)
servers[adminaddr] = pid servers[adminaddr] = pid
return addr, adminaddr return addr, adminaddr
......
...@@ -737,7 +737,11 @@ class BlobAdaptedFileStorageTests(FullGenericTests, CommonBlobTests): ...@@ -737,7 +737,11 @@ class BlobAdaptedFileStorageTests(FullGenericTests, CommonBlobTests):
check_data(filename) check_data(filename)
# ... and on the server # ... and on the server
server_filename = filename.replace(self.blob_cache_dir, self.blobdir) server_filename = os.path.join(
self.blobdir,
ZODB.blob.BushyLayout().getBlobFilePath(oid, revid),
)
self.assert_(server_filename.startswith(self.blobdir)) self.assert_(server_filename.startswith(self.blobdir))
check_data(server_filename) check_data(server_filename)
...@@ -1167,8 +1171,8 @@ def test_suite(): ...@@ -1167,8 +1171,8 @@ def test_suite():
zeo.addTest( zeo.addTest(
doctest.DocFileSuite( doctest.DocFileSuite(
'zeo-fan-out.test', 'zdoptions.test', 'zeo-fan-out.test', 'zdoptions.test',
'drop_cache_rather_than_verify.txt', 'drop_cache_rather_than_verify.txt', 'client-config.test',
'protocols.test', 'protocols.test', 'zeo_blob_cache.test',
setUp=forker.setUp, tearDown=zope.testing.setupstack.tearDown, setUp=forker.setUp, tearDown=zope.testing.setupstack.tearDown,
), ),
) )
......
ZEO caching of blob data
========================
ZEO supports 2 modes for providing clients access to blob data:
shared
Blob data are shared via a network file system. The client shares
a common blob directory with the server.
non-shared
Blob data are loaded from the storage server and cached locally.
A maximum size for the blob data can be set and data are removed
when the size is exceeded.
In this test, we'll demonstrate that blobs data are removed from a ZEO
cache when the amount of data stored exceeds a given limit.
Let's start by setting up some data:
>>> addr, _ = start_server(blob_dir='server-blobs')
We'll also create a client.
>>> import ZEO
>>> db = ZEO.DB(addr, blob_dir='blobs',
... blob_cache_size=4000, blob_cache_size_check=10)
Here, we passed a blob_cache_size parameter, which specifies a target
blob cache size. This is not a hard limit, but rather a target. It
defaults to a very large value. We also passed a blob_cache_size_check
option. The blob_cache_size_check option specifies the number of
bytes, as a percent of the target that can be written or downloaded
from the server before the cache size is checked. The
blob_cache_size_check option defaults to 100. We passed 10, to check
after writing 10% of the target size.
We want to check for name collections in the blob cache dir. We'll try
to provoke name collections by reducing the number of cache directory
subdirectories.
>>> import ZEO.ClientStorage
>>> orig_blob_cache_layout_size = ZEO.ClientStorage.BlobCacheLayout.size
>>> ZEO.ClientStorage.BlobCacheLayout.size = 11
Now, let's write some data:
>>> import ZODB.blob, transaction, time
>>> conn = db.open()
>>> for i in range(1, 101):
... conn.root()[i] = ZODB.blob.Blob()
... conn.root()[i].open('w').write(chr(i)*100)
>>> transaction.commit()
We've committed 10000 bytes of data, but our target size is 4000. We
expect to have not much more than the target size in the cache blob
directory.
>>> import os
>>> def cache_size(d):
... size = 0
... for base, dirs, files in os.walk(d):
... for f in files:
... if f.endswith('.blob'):
... size += os.stat(os.path.join(base, f)).st_size
... return size
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
If we read all of the blobs, data will be downloaded again, as
necessary, but the cache size will remain not much bigger than the
target:
>>> for i in range(1, 101):
... data = conn.root()[i].open().read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
>>> for i in range(1, 101):
... data = conn.root()[i].open().read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> for i in range(1, 101):
... data = conn.root()[i].open('c').read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
>>> for i in range(1, 101):
... data = open(conn.root()[i].committed(), 'rb').read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
Now let see if we can stress things a bit. We'll create many clients
and get them to pound on the blobs all at once to see if we can
provoke problems:
>>> import threading, random
>>> def run():
... db = ZEO.DB(addr, blob_dir='blobs',
... blob_cache_size=4000, blob_cache_size_check=10)
... conn = db.open()
... for i in range(300):
... time.sleep(0)
... i = random.randint(1, 100)
... data = conn.root()[i].open().read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
... i = random.randint(1, 100)
... data = conn.root()[i].open('c').read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
... db._storage._check_blob_size_thread.join()
... db.close()
>>> threads = [threading.Thread(target=run) for i in range(10)]
>>> for thread in threads:
... thread.setDaemon(True)
>>> for thread in threads:
... thread.start()
>>> for thread in threads:
... thread.join()
>>> cache_size('blobs') < 6000
True
.. cleanup
>>> db.close()
>>> ZEO.ClientStorage.BlobCacheLayout.size = orig_blob_cache_layout_size
...@@ -38,6 +38,7 @@ from zope.interface import implements ...@@ -38,6 +38,7 @@ from zope.interface import implements
import transaction import transaction
import ZODB
from ZODB.blob import SAVEPOINT_SUFFIX from ZODB.blob import SAVEPOINT_SUFFIX
from ZODB.ConflictResolution import ResolvedSerial from ZODB.ConflictResolution import ResolvedSerial
from ZODB.ExportImport import ExportImport from ZODB.ExportImport import ExportImport
...@@ -1271,6 +1272,13 @@ class TmpStore: ...@@ -1271,6 +1272,13 @@ class TmpStore:
return self._storage.loadBlob(oid, serial) return self._storage.loadBlob(oid, serial)
return filename return filename
def openCommittedBlobFile(self, oid, serial, blob=None):
blob_filename = self.loadBlob(oid, serial)
if blob is None:
return open(blob_filename, 'rb')
else:
return ZODB.blob.BlobFile(blob_filename, 'r', blob)
def _getBlobPath(self): def _getBlobPath(self):
return os.path.join(self.temporaryDirectory(), 'savepoints') return os.path.join(self.temporaryDirectory(), 'savepoints')
......
...@@ -174,7 +174,21 @@ class DemoStorage(object): ...@@ -174,7 +174,21 @@ class DemoStorage(object):
if self._blobify(): if self._blobify():
return self.loadBlob(oid, serial) return self.loadBlob(oid, serial)
raise raise
def openCommittedBlobFile(self, oid, serial, blob=None):
try:
return self.changes.openCommittedBlobFile(oid, serial, blob)
except ZODB.POSException.POSKeyError:
try:
return self.base.openCommittedBlobFile(oid, serial, blob)
except AttributeError:
if not zope.interface.IBlobStorage.providBy(self.base):
raise ZODB.POSException.POSKeyError(oid, serial)
raise
except AttributeError:
if self._blobify():
return self.openCommittedBlobFile(oid, serial, blob)
raise
def loadSerial(self, oid, serial): def loadSerial(self, oid, serial):
try: try:
......
...@@ -120,7 +120,15 @@ class Blob(persistent.Persistent): ...@@ -120,7 +120,15 @@ class Blob(persistent.Persistent):
raise ValueError("invalid mode", mode) raise ValueError("invalid mode", mode)
if mode == 'c': if mode == 'c':
return open(self.committed(), 'rb') if (self._p_blob_uncommitted
or
not self._p_blob_committed
or
self._p_blob_committed.endswith(SAVEPOINT_SUFFIX)
):
raise BlobError('Uncommitted changes')
return self._p_jar._storage.openCommittedBlobFile(
self._p_oid, self._p_serial)
if self.writers: if self.writers:
raise BlobError("Already opened for writing.") raise BlobError("Already opened for writing.")
...@@ -129,10 +137,20 @@ class Blob(persistent.Persistent): ...@@ -129,10 +137,20 @@ class Blob(persistent.Persistent):
self.readers = [] self.readers = []
if mode == 'r': if mode == 'r':
if self._current_filename() is None: result = None
self._create_uncommitted_file() to_open = self._p_blob_uncommitted
if not to_open:
to_open = self._p_blob_committed
if to_open:
result = self._p_jar._storage.openCommittedBlobFile(
self._p_oid, self._p_serial, self)
else:
self._create_uncommitted_file()
to_open = self._p_blob_uncommitted
assert to_open
result = BlobFile(self._current_filename(), mode, self) if result is None:
result = BlobFile(to_open, mode, self)
def destroyed(ref, readers=self.readers): def destroyed(ref, readers=self.readers):
try: try:
...@@ -181,7 +199,15 @@ class Blob(persistent.Persistent): ...@@ -181,7 +199,15 @@ class Blob(persistent.Persistent):
self._p_blob_committed.endswith(SAVEPOINT_SUFFIX) self._p_blob_committed.endswith(SAVEPOINT_SUFFIX)
): ):
raise BlobError('Uncommitted changes') raise BlobError('Uncommitted changes')
return self._p_blob_committed
result = self._p_blob_committed
# We do this to make sure we have the file and to let the
# storage know we're accessing the file.
n = self._p_jar._storage.loadBlob(self._p_oid, self._p_serial)
assert result == n, (result, n)
return result
def consumeFile(self, filename): def consumeFile(self, filename):
"""Will replace the current data of the blob with the file given under """Will replace the current data of the blob with the file given under
...@@ -234,11 +260,6 @@ class Blob(persistent.Persistent): ...@@ -234,11 +260,6 @@ class Blob(persistent.Persistent):
# utility methods # utility methods
def _current_filename(self):
# NOTE: _p_blob_committed and _p_blob_uncommitted appear by virtue of
# Connection._setstate
return self._p_blob_uncommitted or self._p_blob_committed
def _create_uncommitted_file(self): def _create_uncommitted_file(self):
assert self._p_blob_uncommitted is None, ( assert self._p_blob_uncommitted is None, (
"Uncommitted file already exists.") "Uncommitted file already exists.")
...@@ -391,13 +412,15 @@ class FilesystemHelper: ...@@ -391,13 +412,15 @@ class FilesystemHelper:
'committed' blob file related to that oid and tid. 'committed' blob file related to that oid and tid.
""" """
oid_path = self.getPathForOID(oid)
# TIDs are numbers and sometimes passed around as integers. For our # TIDs are numbers and sometimes passed around as integers. For our
# computations we rely on the 64-bit packed string representation # computations we rely on the 64-bit packed string representation
if isinstance(oid, int):
oid = utils.p64(oid)
if isinstance(tid, int): if isinstance(tid, int):
tid = utils.p64(tid) tid = utils.p64(tid)
filename = "%s%s" % (utils.tid_repr(tid), BLOB_SUFFIX) return os.path.join(self.base_dir,
return os.path.join(oid_path, filename) self.layout.getBlobFilePath(oid, tid),
)
def blob_mkstemp(self, oid, tid): def blob_mkstemp(self, oid, tid):
"""Given an oid and a tid, return a temporary file descriptor """Given an oid and a tid, return a temporary file descriptor
...@@ -516,10 +539,18 @@ class BushyLayout(object): ...@@ -516,10 +539,18 @@ class BushyLayout(object):
oid = ''.join(binascii.unhexlify(byte[2:]) for byte in path) oid = ''.join(binascii.unhexlify(byte[2:]) for byte in path)
return oid return oid
LAYOUTS['bushy'] = BushyLayout() def getBlobFilePath(self, oid, tid):
"""Given an oid and a tid, return the full filename of the
'committed' blob file related to that oid and tid.
"""
oid_path = self.oid_to_path(oid)
filename = "%s%s" % (utils.tid_repr(tid), BLOB_SUFFIX)
return os.path.join(oid_path, filename)
LAYOUTS['bushy'] = BushyLayout()
class LawnLayout(object): class LawnLayout(BushyLayout):
"""A shallow directory layout for blob directories. """A shallow directory layout for blob directories.
Creates a single level of directories (one for each oid). Creates a single level of directories (one for each oid).
...@@ -672,6 +703,14 @@ class BlobStorage(SpecificationDecoratorBase): ...@@ -672,6 +703,14 @@ class BlobStorage(SpecificationDecoratorBase):
raise POSKeyError("No blob file", oid, serial) raise POSKeyError("No blob file", oid, serial)
return filename return filename
@non_overridable
def openCommittedBlobFile(self, oid, serial, blob=None):
blob_filename = self.loadBlob(oid, serial)
if blob is None:
return open(blob_filename, 'rb')
else:
return BlobFile(blob_filename, 'r', blob)
@non_overridable @non_overridable
def _packUndoing(self, packtime, referencesf): def _packUndoing(self, packtime, referencesf):
# Walk over all existing revisions of all blob files and check # Walk over all existing revisions of all blob files and check
......
...@@ -93,7 +93,24 @@ ...@@ -93,7 +93,24 @@
but only the filename when committing. but only the filename when committing.
</description> </description>
</key> </key>
<key name="blob-cache-size" required="no" datatype="byte-size">
<description>
Maximum size of the ZEO blob cache, in bytes. If not set, then
the cache size isn't checked and the blob directory will
grow without bound.
This option is ignored if shared_blob_dir is true.
</description>
</key>
<key name="blob-cache-size-check" required="no" datatype="integer">
<description>
ZEO check size as percent of blob_cache_size. The ZEO
cache size will be checked when this many bytes have been
loaded into the cache. Defaults to 100% of the blob cache
size. This option is ignored if shared_blob_dir is true.
</description>
</key>
<key name="storage" default="1"> <key name="storage" default="1">
<description> <description>
The name of the storage that the client wants to use. If the The name of the storage that the client wants to use. If the
......
...@@ -164,6 +164,12 @@ class ZEOClient(BaseConfig): ...@@ -164,6 +164,12 @@ class ZEOClient(BaseConfig):
# config.server is a multikey of socket-connection-address values # config.server is a multikey of socket-connection-address values
# where the value is a socket family, address tuple. # where the value is a socket family, address tuple.
L = [server.address for server in self.config.server] L = [server.address for server in self.config.server]
options = {}
if self.config.blob_cache_size is not None:
options['blob_cache_size'] = self.config.blob_cache_size
if self.config.blob_cache_size_check is not None:
options['blob_cache_size_check'] = self.config.blob_cache_size_check
return ClientStorage( return ClientStorage(
L, L,
blob_dir=self.config.blob_dir, blob_dir=self.config.blob_dir,
...@@ -181,7 +187,8 @@ class ZEOClient(BaseConfig): ...@@ -181,7 +187,8 @@ class ZEOClient(BaseConfig):
drop_cache_rather_verify=self.config.drop_cache_rather_verify, drop_cache_rather_verify=self.config.drop_cache_rather_verify,
username=self.config.username, username=self.config.username,
password=self.config.password, password=self.config.password,
realm=self.config.realm) realm=self.config.realm,
**options)
class BDBStorage(BaseConfig): class BDBStorage(BaseConfig):
......
...@@ -1034,6 +1034,18 @@ class IBlobStorage(Interface): ...@@ -1034,6 +1034,18 @@ class IBlobStorage(Interface):
Raises POSKeyError if the blobfile cannot be found. Raises POSKeyError if the blobfile cannot be found.
""" """
def openCommittedBlobFile(oid, serial, blob=None):
"""Return a file for committed data for the given object id and serial
If a blob is provided, then a BlobFile object is returned,
otherwise, an ordinary file is returned. In either case, the
file is opened for binary reading.
This method is used to allow storages that cache blob data to
make sure that data are available at least long enough for the
file to be opened.
"""
def temporaryDirectory(): def temporaryDirectory():
"""Return a directory that should be used for uncommitted blob data. """Return a directory that should be used for uncommitted blob data.
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# #
############################################################################## ##############################################################################
import os
import transaction import transaction
import unittest import unittest
import ZEO.ClientStorage import ZEO.ClientStorage
...@@ -115,15 +116,16 @@ class ZEOConfigTest(ConfigTestBase): ...@@ -115,15 +116,16 @@ class ZEOConfigTest(ConfigTestBase):
cfg = """ cfg = """
<zodb> <zodb>
<zeoclient> <zeoclient>
blob-dir /tmp blob-dir blobs
server localhost:56897 server localhost:56897
wait false wait false
</zeoclient> </zeoclient>
</zodb> </zodb>
""" """
config, handle = ZConfig.loadConfigFile(getDbSchema(), StringIO(cfg)) config, handle = ZConfig.loadConfigFile(getDbSchema(), StringIO(cfg))
self.assertEqual(config.database.config.storage.config.blob_dir, self.assertEqual(
'/tmp') os.path.abspath(config.database.config.storage.config.blob_dir),
os.path.abspath('blobs'))
self.assertRaises(ClientDisconnected, self._test, cfg) self.assertRaises(ClientDisconnected, self._test, cfg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment