Commit e73b5cc6 authored by Jim Fulton's avatar Jim Fulton

ClientStorage now provides blob cache management. When using

non-shared blob directories, you can set a target cache size and the
cache will periodically be reduced to the target size.

To enable blob cache management, a new IBlobStorage method,
openCommittedBlobFile has been added.
parent e270b692
......@@ -33,6 +33,13 @@ New Features
The ordinary file may be used outside the current transaction and
even after the blob's database connection has been closed.
- ClientStorage now provides blob cache management. When using
non-shared blob directories, you can set a target cache size and the
cache will periodically be reduced to the target size.
The client blob directory layout has changed. If you have existing
non-shared blob directories, you will have to remove them.
Bugs Fixed
----------
......
This diff is collapsed.
ZEO Client Configuration
========================
Here we'll describe (and test) the various ZEO Client configuration
options. To facilitate this, we'l start a server that our client can
connect to:
>>> addr, _ = start_server(blob_dir='server-blobs')
The simplest client configuration specified a server address:
>>> import ZODB.config
>>> storage = ZODB.config.storageFromString("""
... <zeoclient>
... server %s:%s
... </zeoclient>
... """ % addr)
>>> storage.getName(), storage.__class__.__name__
... # doctest: +ELLIPSIS
("[('localhost', ...)] (connected)", 'ClientStorage')
>>> storage.blob_dir
>>> storage._storage
'1'
>>> storage._cache.maxsize
20971520
>>> storage._cache.path
>>> storage._rpc_mgr.tmin
5
>>> storage._rpc_mgr.tmax
300
>>> storage._is_read_only
False
>>> storage._read_only_fallback
False
>>> storage._drop_cache_rather_verify
False
>>> storage._blob_cache_size
>>> storage.close()
>>> storage = ZODB.config.storageFromString("""
... <zeoclient>
... server %s:%s
... blob-dir blobs
... storage 2
... cache-size 100
... name bob
... client cache
... min-disconnect-poll 1
... max-disconnect-poll 5
... read-only true
... drop-cache-rather-verify true
... blob-cache-size 1000MB
... blob-cache-size-check 10
... wait false
... </zeoclient>
... """ % addr)
>>> storage.getName(), storage.__class__.__name__
('bob (disconnected)', 'ClientStorage')
>>> storage.blob_dir
'blobs'
>>> storage._storage
'2'
>>> storage._cache.maxsize
100
>>> import os
>>> storage._cache.path == os.path.abspath('cache-2.zec')
True
>>> storage._rpc_mgr.tmin
1
>>> storage._rpc_mgr.tmax
5
>>> storage._is_read_only
True
>>> storage._read_only_fallback
False
>>> storage._drop_cache_rather_verify
True
>>> storage._blob_cache_size
1048576000
>>> print storage._blob_cache_size_check
104857600
>>> storage.close()
......@@ -285,7 +285,7 @@ def setUp(test):
servers = {}
def start_server(storage_conf=None, zeo_conf=None, port=None, keep=False,
addr=None, path='Data.fs', protocol=None):
addr=None, path='Data.fs', protocol=None, blob_dir=None):
"""Start a ZEO server.
Return the server and admin addresses.
......@@ -298,7 +298,7 @@ def setUp(test):
elif addr is not None:
raise TypeError("Can't specify port and addr")
addr, adminaddr, pid, config_path = start_zeo_server(
storage_conf, zeo_conf, port, keep, path, protocol)
storage_conf, zeo_conf, port, keep, path, protocol, blob_dir)
os.remove(config_path)
servers[adminaddr] = pid
return addr, adminaddr
......
......@@ -737,7 +737,11 @@ class BlobAdaptedFileStorageTests(FullGenericTests, CommonBlobTests):
check_data(filename)
# ... and on the server
server_filename = filename.replace(self.blob_cache_dir, self.blobdir)
server_filename = os.path.join(
self.blobdir,
ZODB.blob.BushyLayout().getBlobFilePath(oid, revid),
)
self.assert_(server_filename.startswith(self.blobdir))
check_data(server_filename)
......@@ -1167,8 +1171,8 @@ def test_suite():
zeo.addTest(
doctest.DocFileSuite(
'zeo-fan-out.test', 'zdoptions.test',
'drop_cache_rather_than_verify.txt',
'protocols.test',
'drop_cache_rather_than_verify.txt', 'client-config.test',
'protocols.test', 'zeo_blob_cache.test',
setUp=forker.setUp, tearDown=zope.testing.setupstack.tearDown,
),
)
......
ZEO caching of blob data
========================
ZEO supports 2 modes for providing clients access to blob data:
shared
Blob data are shared via a network file system. The client shares
a common blob directory with the server.
non-shared
Blob data are loaded from the storage server and cached locally.
A maximum size for the blob data can be set and data are removed
when the size is exceeded.
In this test, we'll demonstrate that blobs data are removed from a ZEO
cache when the amount of data stored exceeds a given limit.
Let's start by setting up some data:
>>> addr, _ = start_server(blob_dir='server-blobs')
We'll also create a client.
>>> import ZEO
>>> db = ZEO.DB(addr, blob_dir='blobs',
... blob_cache_size=4000, blob_cache_size_check=10)
Here, we passed a blob_cache_size parameter, which specifies a target
blob cache size. This is not a hard limit, but rather a target. It
defaults to a very large value. We also passed a blob_cache_size_check
option. The blob_cache_size_check option specifies the number of
bytes, as a percent of the target that can be written or downloaded
from the server before the cache size is checked. The
blob_cache_size_check option defaults to 100. We passed 10, to check
after writing 10% of the target size.
We want to check for name collections in the blob cache dir. We'll try
to provoke name collections by reducing the number of cache directory
subdirectories.
>>> import ZEO.ClientStorage
>>> orig_blob_cache_layout_size = ZEO.ClientStorage.BlobCacheLayout.size
>>> ZEO.ClientStorage.BlobCacheLayout.size = 11
Now, let's write some data:
>>> import ZODB.blob, transaction, time
>>> conn = db.open()
>>> for i in range(1, 101):
... conn.root()[i] = ZODB.blob.Blob()
... conn.root()[i].open('w').write(chr(i)*100)
>>> transaction.commit()
We've committed 10000 bytes of data, but our target size is 4000. We
expect to have not much more than the target size in the cache blob
directory.
>>> import os
>>> def cache_size(d):
... size = 0
... for base, dirs, files in os.walk(d):
... for f in files:
... if f.endswith('.blob'):
... size += os.stat(os.path.join(base, f)).st_size
... return size
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
If we read all of the blobs, data will be downloaded again, as
necessary, but the cache size will remain not much bigger than the
target:
>>> for i in range(1, 101):
... data = conn.root()[i].open().read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
>>> for i in range(1, 101):
... data = conn.root()[i].open().read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> for i in range(1, 101):
... data = conn.root()[i].open('c').read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
>>> for i in range(1, 101):
... data = open(conn.root()[i].committed(), 'rb').read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
>>> db.storage._check_blob_size_thread.join()
>>> cache_size('blobs') < 6000
True
Now let see if we can stress things a bit. We'll create many clients
and get them to pound on the blobs all at once to see if we can
provoke problems:
>>> import threading, random
>>> def run():
... db = ZEO.DB(addr, blob_dir='blobs',
... blob_cache_size=4000, blob_cache_size_check=10)
... conn = db.open()
... for i in range(300):
... time.sleep(0)
... i = random.randint(1, 100)
... data = conn.root()[i].open().read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
... i = random.randint(1, 100)
... data = conn.root()[i].open('c').read()
... if data != chr(i)*100:
... print 'bad data', `chr(i)`, `data`
... db._storage._check_blob_size_thread.join()
... db.close()
>>> threads = [threading.Thread(target=run) for i in range(10)]
>>> for thread in threads:
... thread.setDaemon(True)
>>> for thread in threads:
... thread.start()
>>> for thread in threads:
... thread.join()
>>> cache_size('blobs') < 6000
True
.. cleanup
>>> db.close()
>>> ZEO.ClientStorage.BlobCacheLayout.size = orig_blob_cache_layout_size
......@@ -38,6 +38,7 @@ from zope.interface import implements
import transaction
import ZODB
from ZODB.blob import SAVEPOINT_SUFFIX
from ZODB.ConflictResolution import ResolvedSerial
from ZODB.ExportImport import ExportImport
......@@ -1271,6 +1272,13 @@ class TmpStore:
return self._storage.loadBlob(oid, serial)
return filename
def openCommittedBlobFile(self, oid, serial, blob=None):
blob_filename = self.loadBlob(oid, serial)
if blob is None:
return open(blob_filename, 'rb')
else:
return ZODB.blob.BlobFile(blob_filename, 'r', blob)
def _getBlobPath(self):
return os.path.join(self.temporaryDirectory(), 'savepoints')
......
......@@ -175,6 +175,20 @@ class DemoStorage(object):
return self.loadBlob(oid, serial)
raise
def openCommittedBlobFile(self, oid, serial, blob=None):
try:
return self.changes.openCommittedBlobFile(oid, serial, blob)
except ZODB.POSException.POSKeyError:
try:
return self.base.openCommittedBlobFile(oid, serial, blob)
except AttributeError:
if not zope.interface.IBlobStorage.providBy(self.base):
raise ZODB.POSException.POSKeyError(oid, serial)
raise
except AttributeError:
if self._blobify():
return self.openCommittedBlobFile(oid, serial, blob)
raise
def loadSerial(self, oid, serial):
try:
......
......@@ -120,7 +120,15 @@ class Blob(persistent.Persistent):
raise ValueError("invalid mode", mode)
if mode == 'c':
return open(self.committed(), 'rb')
if (self._p_blob_uncommitted
or
not self._p_blob_committed
or
self._p_blob_committed.endswith(SAVEPOINT_SUFFIX)
):
raise BlobError('Uncommitted changes')
return self._p_jar._storage.openCommittedBlobFile(
self._p_oid, self._p_serial)
if self.writers:
raise BlobError("Already opened for writing.")
......@@ -129,10 +137,20 @@ class Blob(persistent.Persistent):
self.readers = []
if mode == 'r':
if self._current_filename() is None:
result = None
to_open = self._p_blob_uncommitted
if not to_open:
to_open = self._p_blob_committed
if to_open:
result = self._p_jar._storage.openCommittedBlobFile(
self._p_oid, self._p_serial, self)
else:
self._create_uncommitted_file()
to_open = self._p_blob_uncommitted
assert to_open
result = BlobFile(self._current_filename(), mode, self)
if result is None:
result = BlobFile(to_open, mode, self)
def destroyed(ref, readers=self.readers):
try:
......@@ -181,7 +199,15 @@ class Blob(persistent.Persistent):
self._p_blob_committed.endswith(SAVEPOINT_SUFFIX)
):
raise BlobError('Uncommitted changes')
return self._p_blob_committed
result = self._p_blob_committed
# We do this to make sure we have the file and to let the
# storage know we're accessing the file.
n = self._p_jar._storage.loadBlob(self._p_oid, self._p_serial)
assert result == n, (result, n)
return result
def consumeFile(self, filename):
"""Will replace the current data of the blob with the file given under
......@@ -234,11 +260,6 @@ class Blob(persistent.Persistent):
# utility methods
def _current_filename(self):
# NOTE: _p_blob_committed and _p_blob_uncommitted appear by virtue of
# Connection._setstate
return self._p_blob_uncommitted or self._p_blob_committed
def _create_uncommitted_file(self):
assert self._p_blob_uncommitted is None, (
"Uncommitted file already exists.")
......@@ -391,13 +412,15 @@ class FilesystemHelper:
'committed' blob file related to that oid and tid.
"""
oid_path = self.getPathForOID(oid)
# TIDs are numbers and sometimes passed around as integers. For our
# computations we rely on the 64-bit packed string representation
if isinstance(oid, int):
oid = utils.p64(oid)
if isinstance(tid, int):
tid = utils.p64(tid)
filename = "%s%s" % (utils.tid_repr(tid), BLOB_SUFFIX)
return os.path.join(oid_path, filename)
return os.path.join(self.base_dir,
self.layout.getBlobFilePath(oid, tid),
)
def blob_mkstemp(self, oid, tid):
"""Given an oid and a tid, return a temporary file descriptor
......@@ -516,10 +539,18 @@ class BushyLayout(object):
oid = ''.join(binascii.unhexlify(byte[2:]) for byte in path)
return oid
LAYOUTS['bushy'] = BushyLayout()
def getBlobFilePath(self, oid, tid):
"""Given an oid and a tid, return the full filename of the
'committed' blob file related to that oid and tid.
"""
oid_path = self.oid_to_path(oid)
filename = "%s%s" % (utils.tid_repr(tid), BLOB_SUFFIX)
return os.path.join(oid_path, filename)
LAYOUTS['bushy'] = BushyLayout()
class LawnLayout(object):
class LawnLayout(BushyLayout):
"""A shallow directory layout for blob directories.
Creates a single level of directories (one for each oid).
......@@ -672,6 +703,14 @@ class BlobStorage(SpecificationDecoratorBase):
raise POSKeyError("No blob file", oid, serial)
return filename
@non_overridable
def openCommittedBlobFile(self, oid, serial, blob=None):
blob_filename = self.loadBlob(oid, serial)
if blob is None:
return open(blob_filename, 'rb')
else:
return BlobFile(blob_filename, 'r', blob)
@non_overridable
def _packUndoing(self, packtime, referencesf):
# Walk over all existing revisions of all blob files and check
......
......@@ -93,6 +93,23 @@
but only the filename when committing.
</description>
</key>
<key name="blob-cache-size" required="no" datatype="byte-size">
<description>
Maximum size of the ZEO blob cache, in bytes. If not set, then
the cache size isn't checked and the blob directory will
grow without bound.
This option is ignored if shared_blob_dir is true.
</description>
</key>
<key name="blob-cache-size-check" required="no" datatype="integer">
<description>
ZEO check size as percent of blob_cache_size. The ZEO
cache size will be checked when this many bytes have been
loaded into the cache. Defaults to 100% of the blob cache
size. This option is ignored if shared_blob_dir is true.
</description>
</key>
<key name="storage" default="1">
<description>
......
......@@ -164,6 +164,12 @@ class ZEOClient(BaseConfig):
# config.server is a multikey of socket-connection-address values
# where the value is a socket family, address tuple.
L = [server.address for server in self.config.server]
options = {}
if self.config.blob_cache_size is not None:
options['blob_cache_size'] = self.config.blob_cache_size
if self.config.blob_cache_size_check is not None:
options['blob_cache_size_check'] = self.config.blob_cache_size_check
return ClientStorage(
L,
blob_dir=self.config.blob_dir,
......@@ -181,7 +187,8 @@ class ZEOClient(BaseConfig):
drop_cache_rather_verify=self.config.drop_cache_rather_verify,
username=self.config.username,
password=self.config.password,
realm=self.config.realm)
realm=self.config.realm,
**options)
class BDBStorage(BaseConfig):
......
......@@ -1034,6 +1034,18 @@ class IBlobStorage(Interface):
Raises POSKeyError if the blobfile cannot be found.
"""
def openCommittedBlobFile(oid, serial, blob=None):
"""Return a file for committed data for the given object id and serial
If a blob is provided, then a BlobFile object is returned,
otherwise, an ordinary file is returned. In either case, the
file is opened for binary reading.
This method is used to allow storages that cache blob data to
make sure that data are available at least long enough for the
file to be opened.
"""
def temporaryDirectory():
"""Return a directory that should be used for uncommitted blob data.
......
......@@ -12,6 +12,7 @@
#
##############################################################################
import os
import transaction
import unittest
import ZEO.ClientStorage
......@@ -115,15 +116,16 @@ class ZEOConfigTest(ConfigTestBase):
cfg = """
<zodb>
<zeoclient>
blob-dir /tmp
blob-dir blobs
server localhost:56897
wait false
</zeoclient>
</zodb>
"""
config, handle = ZConfig.loadConfigFile(getDbSchema(), StringIO(cfg))
self.assertEqual(config.database.config.storage.config.blob_dir,
'/tmp')
self.assertEqual(
os.path.abspath(config.database.config.storage.config.blob_dir),
os.path.abspath('blobs'))
self.assertRaises(ClientDisconnected, self._test, cfg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment