Commit 5d71ba3f authored by Jim Fulton's avatar Jim Fulton

- FileStorage now provides optional garbage collection. A 'gc'

  keyword option can be passed to the pack method.  A false value
  prevents garbage collection.

- The FileStorage constructor now provides a boolean pack_gc option,
  which defaults to True, to control whether garbage collection is
  performed when packing by default. This can be overridden with the
  gc option to the pack method.

The ZConfig configuration for FileStorage now includes a pack-gc
  option, corresponding to the pack_gc constructor argument.

- The FileStorage constructor now has a packer keyword argument that
  allows an alternative packer to be supplied.

The ZConfig configuration for FileStorage now includes a packer
  option, corresponding to the packer constructor argument.
parent 2eda1395
......@@ -22,6 +22,27 @@ New Features
XXX There are known issues with this implementation that need to be
sorted out before it is "released".
New Features
------------
- FileStorage now provides optional garbage collection. A 'gc'
keyword option can be passed to the pack method. A false value
prevents garbage collection.
- The FileStorage constructor now provides a boolean pack_gc option,
which defaults to True, to control whether garbage collection is
performed when packing by default. This can be overridden with the
gc option to the pack method.
The ZConfig configuration for FileStorage now includes a pack-gc
option, corresponding to the pack_gc constructor argument.
- The FileStorage constructor now has a packer keyword argument that
allows an alternative packer to be supplied.
The ZConfig configuration for FileStorage now includes a packer
option, corresponding to the packer constructor argument.
3.9.0a2 (2008-10-31)
====================
......
......@@ -102,7 +102,7 @@ class FileStorage(BaseStorage.BaseStorage,
_pack_is_in_progress = False
def __init__(self, file_name, create=False, read_only=False, stop=None,
quota=None):
quota=None, pack_gc=True, packer=None):
if read_only:
self._is_read_only = True
......@@ -125,6 +125,10 @@ class FileStorage(BaseStorage.BaseStorage,
self._file_name = file_name
self._pack_gc = pack_gc
if packer is not None:
self.packer = packer
BaseStorage.BaseStorage.__init__(self, file_name)
index, tindex = self._newIndexes()
......@@ -979,7 +983,26 @@ class FileStorage(BaseStorage.BaseStorage,
file.seek(pos - p + 8)
return file.read(1) not in ' u'
def pack(self, t, referencesf):
@staticmethod
def packer(storage, referencesf, stop, gc):
# Our default packer is built around the original packer. We
# simply adapt the old interface to the new. We don't really
# want to invest much in the old packer, at least for now.
p = FileStoragePacker(
storage._file.name,
stop,
storage._lock_acquire,
storage._lock_release,
storage._commit_lock_acquire,
storage._commit_lock_release,
storage.getSize(),
gc)
opos = p.pack()
if opos is None:
return None
return opos, p.index
def pack(self, t, referencesf, gc=None):
"""Copy data from the current database file to a packed file
Non-current records from transactions with time-stamp strings less
......@@ -1003,23 +1026,23 @@ class FileStorage(BaseStorage.BaseStorage,
if self._pack_is_in_progress:
raise FileStorageError('Already packing')
self._pack_is_in_progress = True
current_size = self.getSize()
finally:
self._lock_release()
p = FileStoragePacker(self._file_name, stop,
self._lock_acquire, self._lock_release,
self._commit_lock_acquire,
self._commit_lock_release,
current_size)
if gc is None:
gc = self._pack_gc
have_commit_lock = False
try:
opos = None
pack_result = None
try:
opos = p.pack()
pack_result = self.packer(self, referencesf, stop, gc)
except RedundantPackWarning, detail:
logger.info(str(detail))
if opos is None:
if pack_result is None:
return
have_commit_lock = True
opos, index = pack_result
oldpath = self._file_name + ".old"
self._lock_acquire()
try:
......@@ -1035,13 +1058,13 @@ class FileStorage(BaseStorage.BaseStorage,
# OK, we're beyond the point of no return
os.rename(self._file_name + '.pack', self._file_name)
self._file = open(self._file_name, 'r+b')
self._initIndex(p.index, p.tindex)
self._initIndex(index, self._tindex)
self._pos = opos
self._save_index()
finally:
self._lock_release()
finally:
if p.locked:
if have_commit_lock:
self._commit_lock_release()
self._lock_acquire()
self._pack_is_in_progress = False
......
......@@ -139,11 +139,12 @@ class PackCopier(FileStorageFormatter):
class GC(FileStorageFormatter):
def __init__(self, file, eof, packtime):
def __init__(self, file, eof, packtime, gc):
self._file = file
self._name = file.name
self.eof = eof
self.packtime = packtime
self.gc = gc
# packpos: position of first txn header after pack time
self.packpos = None
self.oid2curpos = fsIndex() # maps oid to current data record position
......@@ -157,7 +158,6 @@ class GC(FileStorageFormatter):
# second is a dictionary mapping objects to lists of
# positions; it is used to handle the same number of objects
# for which we must keep multiple revisions.
self.reachable = fsIndex()
self.reach_ex = {}
......@@ -176,11 +176,14 @@ class GC(FileStorageFormatter):
def findReachable(self):
self.buildPackIndex()
if self.gc:
self.findReachableAtPacktime([z64])
self.findReachableFromFuture()
# These mappings are no longer needed and may consume a lot
# of space.
# These mappings are no longer needed and may consume a lot of
# space.
del self.oid2curpos
else:
self.reachable = self.oid2curpos
def buildPackIndex(self):
pos = 4L
......@@ -320,7 +323,7 @@ class FileStoragePacker(FileStorageFormatter):
# current_size is the storage's _pos. All valid data at the start
# lives before that offset (there may be a checkpoint transaction in
# progress after it).
def __init__(self, path, stop, la, lr, cla, clr, current_size):
def __init__(self, path, stop, la, lr, cla, clr, current_size, gc=True):
self._name = path
# We open our own handle on the storage so that much of pack can
# proceed in parallel. It's important to close this file at every
......@@ -329,10 +332,10 @@ class FileStoragePacker(FileStorageFormatter):
self._file = open(path, "rb")
self._path = path
self._stop = stop
self.locked = 0
self.locked = False
self.file_end = current_size
self.gc = GC(self._file, self.file_end, self._stop)
self.gc = GC(self._file, self.file_end, self._stop, gc)
# The packer needs to acquire the parent's commit lock
# during the copying stage, so the two sets of lock acquire
......@@ -386,22 +389,25 @@ class FileStoragePacker(FileStorageFormatter):
os.remove(self._name + ".pack")
return None
self._commit_lock_acquire()
self.locked = 1
self.locked = True
try:
self._lock_acquire()
try:
# Re-open the file in unbuffered mode.
# The main thread may write new transactions to the file,
# which creates the possibility that we will read a status
# 'c' transaction into the pack thread's stdio buffer even
# though we're acquiring the commit lock. Transactions
# can still be in progress throughout much of packing, and
# are written to the same physical file but via a distinct
# Python file object. The code used to leave off the
# trailing 0 argument, and then on every platform except
# native Windows it was observed that we could read stale
# The main thread may write new transactions to the
# file, which creates the possibility that we will
# read a status 'c' transaction into the pack thread's
# stdio buffer even though we're acquiring the commit
# lock. Transactions can still be in progress
# throughout much of packing, and are written to the
# same physical file but via a distinct Python file
# object. The code used to leave off the trailing 0
# argument, and then on every platform except native
# Windows it was observed that we could read stale
# data from the tail end of the file.
self._file.close() # else self.gc keeps the original alive & open
self._file.close() # else self.gc keeps the original
# alive & open
self._file = open(self._path, "rb", 0)
self._file.seek(0, 2)
self.file_end = self._file.tell()
......@@ -417,6 +423,10 @@ class FileStoragePacker(FileStorageFormatter):
self._file.close()
return pos
except:
if self.locked:
self._commit_lock_release()
raise
def copyToPacktime(self):
offset = 0L # the amount of space freed by packing
......@@ -524,9 +534,6 @@ class FileStoragePacker(FileStorageFormatter):
# After the pack time, all data records are copied.
# Copy one txn at a time, using copy() for data.
# Release the commit lock every 20 copies
self._lock_counter = 0
try:
while 1:
ipos = self.copyOne(ipos)
......@@ -543,9 +550,9 @@ class FileStoragePacker(FileStorageFormatter):
def copyOne(self, ipos):
# The call below will raise CorruptedDataError at EOF.
th = self._read_txn_header(ipos)
self._lock_counter += 1
if self._lock_counter % 20 == 0:
# Release commit lock while writing to pack file
self._commit_lock_release()
self.locked = False
pos = self._tfile.tell()
self._copier.setTxnPos(pos)
self._tfile.write(th.asString())
......@@ -573,6 +580,6 @@ class FileStoragePacker(FileStorageFormatter):
self.index.update(self.tindex)
self.tindex.clear()
if self._lock_counter % 20 == 0:
self._commit_lock_acquire()
self.locked = True
return ipos
##############################################################################
#
# Copyright (c) Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import zope.interface
class IFileStoragePacker(zope.interface.Interface):
def __call__(storage, referencesf, stop, gc):
"""Pack the file storage into a new file
The new file will have the same name as the old file with
'.pack' appended. (The packer can get the old file name via
storage._file.name.)
If packing is unnecessary, or would not change the file, then
None is returned, otherwise a tule is returned with:
- the size of the packed file, and
- the packed index
If and only if packing was necessary (non-None) and there was
no error, then the commit lock must be acquired.
"""
class IFileStorage(zope.interface.Interface):
packer = zope.interface.Attribute(
"The IFileStoragePacker to be used for packing."
)
_file = zope.interface.Attribute(
"The file object used to access the underlying data."
)
def _lock_acquire():
"Acquire the storage lock"
def _lock_release():
"Release the storage lock"
def _commit_lock_acquire():
"Acquire the storage commit lock"
def _commit_lock_release():
"Release the storage commit lock"
##############################################################################
#
# Copyright (c) Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import unittest
from zope.testing import doctest
def test_suite():
return unittest.TestSuite((
doctest.DocFileSuite('zconfig.txt'),
))
Defining FileStorages using ZConfig
===================================
ZODB provides support for defining many storages, including
FileStorages, using ZConfig. To do this, you use a filestorage
section, and define a path:
>>> import ZODB.config
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... </filestorage>
... """)
>>> fs._file.name
'my.fs'
There are a number of options we can provide:
create
Flag that indicates whether the storage should be truncated if
it already exists.
To demonstrate this, we'll first write some dataL
>>> db = ZODB.DB(fs) # writes object 0
>>> db.close()
Then reopen with the create option:
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... create true
... </filestorage>
... """)
Because the file was truncated, we no-longer have object 0:
>>> fs.load('\0'*8)
Traceback (most recent call last):
...
POSKeyError: 0x00
>>> fs.close()
read-only
If true, only reads may be executed against the storage. Note
that the "pack" operation is not considered a write operation
and is still allowed on a read-only filestorage.
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... read-only true
... </filestorage>
... """)
>>> fs.isReadOnly()
True
>>> fs.close()
quota
Maximum allowed size of the storage file. Operations which
would cause the size of the storage to exceed the quota will
result in a ZODB.FileStorage.FileStorageQuotaError being
raised.
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... quota 10
... </filestorage>
... """)
>>> db = ZODB.DB(fs) # writes object 0
Traceback (most recent call last):
...
FileStorageQuotaError: The storage quota has been exceeded.
>>> fs.close()
packer
The dotten name (dotten module name and object name) of a
packer object. This is used to provide an alternative pack
implementation.
To demonstrate this, we'll create a null packer that just prints
some information about it's arguments:
>>> def packer(storage, referencesf, stop, gc):
... print referencesf, storage is fs, gc
>>> ZODB.FileStorage.config_demo_printing_packer = packer
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... packer ZODB.FileStorage.config_demo_printing_packer
... </filestorage>
... """)
>>> import time
>>> db = ZODB.DB(fs) # writes object 0
>>> fs.pack(time.time(), 42)
42 True True
>>> fs.close()
pack-gc
If false, then no garbage collection will be performed when
packing. This can make packing go much faster and can avoid
problems when objects are referenced only from other
databases.
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... packer ZODB.FileStorage.config_demo_printing_packer
... pack-gc false
... </filestorage>
... """)
>>> fs.pack(time.time(), 42)
42 True False
Note that if we pass the gc option to pack, then this will
override the value set in the configuration:
>>> fs.pack(time.time(), 42, gc=True)
42 True True
......@@ -35,6 +35,21 @@
raised.
</description>
</key>
<key name="packer" datatype="dotted-name">
<description>
The dotten name (dotten module name and object name) of a
packer object. This is used to provide an alternative pack
implementation.
</description>
</key>
<key name="pack-gc" datatype="boolean" default="true">
<description>
If false, then no garbage collection will be performed when
packing. This can make packing go much faster and can avoid
problems when objects are referenced only from other
databases.
</description>
</key>
</sectiontype>
<sectiontype name="mappingstorage" datatype=".MappingStorage"
......
......@@ -137,10 +137,17 @@ class FileStorage(BaseConfig):
def open(self):
from ZODB.FileStorage import FileStorage
options = {}
if self.config.packer:
m, name = self.config.packer.rsplit('.', 1)
options['packer'] = getattr(__import__(m, {}, {}, ['*']), name)
return FileStorage(self.config.path,
create=self.config.create,
read_only=self.config.read_only,
quota=self.config.quota)
quota=self.config.quota,
pack_gc=self.config.pack_gc,
**options)
class BlobStorage(BaseConfig):
......
......@@ -41,7 +41,7 @@ class FileStorageTests(
BasicStorage.BasicStorage,
TransactionalUndoStorage.TransactionalUndoStorage,
RevisionStorage.RevisionStorage,
PackableStorage.PackableStorage,
PackableStorage.PackableStorageWithOptionalGC,
PackableStorage.PackableUndoStorage,
Synchronization.SynchronizedStorage,
ConflictResolution.ConflictResolvingStorage,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment