Commit 5d71ba3f authored by Jim Fulton's avatar Jim Fulton

- FileStorage now provides optional garbage collection. A 'gc'

  keyword option can be passed to the pack method.  A false value
  prevents garbage collection.

- The FileStorage constructor now provides a boolean pack_gc option,
  which defaults to True, to control whether garbage collection is
  performed when packing by default. This can be overridden with the
  gc option to the pack method.

The ZConfig configuration for FileStorage now includes a pack-gc
  option, corresponding to the pack_gc constructor argument.

- The FileStorage constructor now has a packer keyword argument that
  allows an alternative packer to be supplied.

The ZConfig configuration for FileStorage now includes a packer
  option, corresponding to the packer constructor argument.
parent 2eda1395
...@@ -22,6 +22,27 @@ New Features ...@@ -22,6 +22,27 @@ New Features
XXX There are known issues with this implementation that need to be XXX There are known issues with this implementation that need to be
sorted out before it is "released". sorted out before it is "released".
New Features
------------
- FileStorage now provides optional garbage collection. A 'gc'
keyword option can be passed to the pack method. A false value
prevents garbage collection.
- The FileStorage constructor now provides a boolean pack_gc option,
which defaults to True, to control whether garbage collection is
performed when packing by default. This can be overridden with the
gc option to the pack method.
The ZConfig configuration for FileStorage now includes a pack-gc
option, corresponding to the pack_gc constructor argument.
- The FileStorage constructor now has a packer keyword argument that
allows an alternative packer to be supplied.
The ZConfig configuration for FileStorage now includes a packer
option, corresponding to the packer constructor argument.
3.9.0a2 (2008-10-31) 3.9.0a2 (2008-10-31)
==================== ====================
......
...@@ -102,7 +102,7 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -102,7 +102,7 @@ class FileStorage(BaseStorage.BaseStorage,
_pack_is_in_progress = False _pack_is_in_progress = False
def __init__(self, file_name, create=False, read_only=False, stop=None, def __init__(self, file_name, create=False, read_only=False, stop=None,
quota=None): quota=None, pack_gc=True, packer=None):
if read_only: if read_only:
self._is_read_only = True self._is_read_only = True
...@@ -125,6 +125,10 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -125,6 +125,10 @@ class FileStorage(BaseStorage.BaseStorage,
self._file_name = file_name self._file_name = file_name
self._pack_gc = pack_gc
if packer is not None:
self.packer = packer
BaseStorage.BaseStorage.__init__(self, file_name) BaseStorage.BaseStorage.__init__(self, file_name)
index, tindex = self._newIndexes() index, tindex = self._newIndexes()
...@@ -979,7 +983,26 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -979,7 +983,26 @@ class FileStorage(BaseStorage.BaseStorage,
file.seek(pos - p + 8) file.seek(pos - p + 8)
return file.read(1) not in ' u' return file.read(1) not in ' u'
def pack(self, t, referencesf): @staticmethod
def packer(storage, referencesf, stop, gc):
# Our default packer is built around the original packer. We
# simply adapt the old interface to the new. We don't really
# want to invest much in the old packer, at least for now.
p = FileStoragePacker(
storage._file.name,
stop,
storage._lock_acquire,
storage._lock_release,
storage._commit_lock_acquire,
storage._commit_lock_release,
storage.getSize(),
gc)
opos = p.pack()
if opos is None:
return None
return opos, p.index
def pack(self, t, referencesf, gc=None):
"""Copy data from the current database file to a packed file """Copy data from the current database file to a packed file
Non-current records from transactions with time-stamp strings less Non-current records from transactions with time-stamp strings less
...@@ -1003,23 +1026,23 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1003,23 +1026,23 @@ class FileStorage(BaseStorage.BaseStorage,
if self._pack_is_in_progress: if self._pack_is_in_progress:
raise FileStorageError('Already packing') raise FileStorageError('Already packing')
self._pack_is_in_progress = True self._pack_is_in_progress = True
current_size = self.getSize()
finally: finally:
self._lock_release() self._lock_release()
p = FileStoragePacker(self._file_name, stop, if gc is None:
self._lock_acquire, self._lock_release, gc = self._pack_gc
self._commit_lock_acquire,
self._commit_lock_release, have_commit_lock = False
current_size)
try: try:
opos = None pack_result = None
try: try:
opos = p.pack() pack_result = self.packer(self, referencesf, stop, gc)
except RedundantPackWarning, detail: except RedundantPackWarning, detail:
logger.info(str(detail)) logger.info(str(detail))
if opos is None: if pack_result is None:
return return
have_commit_lock = True
opos, index = pack_result
oldpath = self._file_name + ".old" oldpath = self._file_name + ".old"
self._lock_acquire() self._lock_acquire()
try: try:
...@@ -1035,13 +1058,13 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1035,13 +1058,13 @@ class FileStorage(BaseStorage.BaseStorage,
# OK, we're beyond the point of no return # OK, we're beyond the point of no return
os.rename(self._file_name + '.pack', self._file_name) os.rename(self._file_name + '.pack', self._file_name)
self._file = open(self._file_name, 'r+b') self._file = open(self._file_name, 'r+b')
self._initIndex(p.index, p.tindex) self._initIndex(index, self._tindex)
self._pos = opos self._pos = opos
self._save_index() self._save_index()
finally: finally:
self._lock_release() self._lock_release()
finally: finally:
if p.locked: if have_commit_lock:
self._commit_lock_release() self._commit_lock_release()
self._lock_acquire() self._lock_acquire()
self._pack_is_in_progress = False self._pack_is_in_progress = False
......
...@@ -139,11 +139,12 @@ class PackCopier(FileStorageFormatter): ...@@ -139,11 +139,12 @@ class PackCopier(FileStorageFormatter):
class GC(FileStorageFormatter): class GC(FileStorageFormatter):
def __init__(self, file, eof, packtime): def __init__(self, file, eof, packtime, gc):
self._file = file self._file = file
self._name = file.name self._name = file.name
self.eof = eof self.eof = eof
self.packtime = packtime self.packtime = packtime
self.gc = gc
# packpos: position of first txn header after pack time # packpos: position of first txn header after pack time
self.packpos = None self.packpos = None
self.oid2curpos = fsIndex() # maps oid to current data record position self.oid2curpos = fsIndex() # maps oid to current data record position
...@@ -157,7 +158,6 @@ class GC(FileStorageFormatter): ...@@ -157,7 +158,6 @@ class GC(FileStorageFormatter):
# second is a dictionary mapping objects to lists of # second is a dictionary mapping objects to lists of
# positions; it is used to handle the same number of objects # positions; it is used to handle the same number of objects
# for which we must keep multiple revisions. # for which we must keep multiple revisions.
self.reachable = fsIndex() self.reachable = fsIndex()
self.reach_ex = {} self.reach_ex = {}
...@@ -176,11 +176,14 @@ class GC(FileStorageFormatter): ...@@ -176,11 +176,14 @@ class GC(FileStorageFormatter):
def findReachable(self): def findReachable(self):
self.buildPackIndex() self.buildPackIndex()
if self.gc:
self.findReachableAtPacktime([z64]) self.findReachableAtPacktime([z64])
self.findReachableFromFuture() self.findReachableFromFuture()
# These mappings are no longer needed and may consume a lot # These mappings are no longer needed and may consume a lot of
# of space. # space.
del self.oid2curpos del self.oid2curpos
else:
self.reachable = self.oid2curpos
def buildPackIndex(self): def buildPackIndex(self):
pos = 4L pos = 4L
...@@ -320,7 +323,7 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -320,7 +323,7 @@ class FileStoragePacker(FileStorageFormatter):
# current_size is the storage's _pos. All valid data at the start # current_size is the storage's _pos. All valid data at the start
# lives before that offset (there may be a checkpoint transaction in # lives before that offset (there may be a checkpoint transaction in
# progress after it). # progress after it).
def __init__(self, path, stop, la, lr, cla, clr, current_size): def __init__(self, path, stop, la, lr, cla, clr, current_size, gc=True):
self._name = path self._name = path
# We open our own handle on the storage so that much of pack can # We open our own handle on the storage so that much of pack can
# proceed in parallel. It's important to close this file at every # proceed in parallel. It's important to close this file at every
...@@ -329,10 +332,10 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -329,10 +332,10 @@ class FileStoragePacker(FileStorageFormatter):
self._file = open(path, "rb") self._file = open(path, "rb")
self._path = path self._path = path
self._stop = stop self._stop = stop
self.locked = 0 self.locked = False
self.file_end = current_size self.file_end = current_size
self.gc = GC(self._file, self.file_end, self._stop) self.gc = GC(self._file, self.file_end, self._stop, gc)
# The packer needs to acquire the parent's commit lock # The packer needs to acquire the parent's commit lock
# during the copying stage, so the two sets of lock acquire # during the copying stage, so the two sets of lock acquire
...@@ -386,22 +389,25 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -386,22 +389,25 @@ class FileStoragePacker(FileStorageFormatter):
os.remove(self._name + ".pack") os.remove(self._name + ".pack")
return None return None
self._commit_lock_acquire() self._commit_lock_acquire()
self.locked = 1 self.locked = True
try:
self._lock_acquire() self._lock_acquire()
try: try:
# Re-open the file in unbuffered mode. # Re-open the file in unbuffered mode.
# The main thread may write new transactions to the file, # The main thread may write new transactions to the
# which creates the possibility that we will read a status # file, which creates the possibility that we will
# 'c' transaction into the pack thread's stdio buffer even # read a status 'c' transaction into the pack thread's
# though we're acquiring the commit lock. Transactions # stdio buffer even though we're acquiring the commit
# can still be in progress throughout much of packing, and # lock. Transactions can still be in progress
# are written to the same physical file but via a distinct # throughout much of packing, and are written to the
# Python file object. The code used to leave off the # same physical file but via a distinct Python file
# trailing 0 argument, and then on every platform except # object. The code used to leave off the trailing 0
# native Windows it was observed that we could read stale # argument, and then on every platform except native
# Windows it was observed that we could read stale
# data from the tail end of the file. # data from the tail end of the file.
self._file.close() # else self.gc keeps the original alive & open self._file.close() # else self.gc keeps the original
# alive & open
self._file = open(self._path, "rb", 0) self._file = open(self._path, "rb", 0)
self._file.seek(0, 2) self._file.seek(0, 2)
self.file_end = self._file.tell() self.file_end = self._file.tell()
...@@ -417,6 +423,10 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -417,6 +423,10 @@ class FileStoragePacker(FileStorageFormatter):
self._file.close() self._file.close()
return pos return pos
except:
if self.locked:
self._commit_lock_release()
raise
def copyToPacktime(self): def copyToPacktime(self):
offset = 0L # the amount of space freed by packing offset = 0L # the amount of space freed by packing
...@@ -524,9 +534,6 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -524,9 +534,6 @@ class FileStoragePacker(FileStorageFormatter):
# After the pack time, all data records are copied. # After the pack time, all data records are copied.
# Copy one txn at a time, using copy() for data. # Copy one txn at a time, using copy() for data.
# Release the commit lock every 20 copies
self._lock_counter = 0
try: try:
while 1: while 1:
ipos = self.copyOne(ipos) ipos = self.copyOne(ipos)
...@@ -543,9 +550,9 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -543,9 +550,9 @@ class FileStoragePacker(FileStorageFormatter):
def copyOne(self, ipos): def copyOne(self, ipos):
# The call below will raise CorruptedDataError at EOF. # The call below will raise CorruptedDataError at EOF.
th = self._read_txn_header(ipos) th = self._read_txn_header(ipos)
self._lock_counter += 1 # Release commit lock while writing to pack file
if self._lock_counter % 20 == 0:
self._commit_lock_release() self._commit_lock_release()
self.locked = False
pos = self._tfile.tell() pos = self._tfile.tell()
self._copier.setTxnPos(pos) self._copier.setTxnPos(pos)
self._tfile.write(th.asString()) self._tfile.write(th.asString())
...@@ -573,6 +580,6 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -573,6 +580,6 @@ class FileStoragePacker(FileStorageFormatter):
self.index.update(self.tindex) self.index.update(self.tindex)
self.tindex.clear() self.tindex.clear()
if self._lock_counter % 20 == 0:
self._commit_lock_acquire() self._commit_lock_acquire()
self.locked = True
return ipos return ipos
##############################################################################
#
# Copyright (c) Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import zope.interface
class IFileStoragePacker(zope.interface.Interface):
def __call__(storage, referencesf, stop, gc):
"""Pack the file storage into a new file
The new file will have the same name as the old file with
'.pack' appended. (The packer can get the old file name via
storage._file.name.)
If packing is unnecessary, or would not change the file, then
None is returned, otherwise a tule is returned with:
- the size of the packed file, and
- the packed index
If and only if packing was necessary (non-None) and there was
no error, then the commit lock must be acquired.
"""
class IFileStorage(zope.interface.Interface):
packer = zope.interface.Attribute(
"The IFileStoragePacker to be used for packing."
)
_file = zope.interface.Attribute(
"The file object used to access the underlying data."
)
def _lock_acquire():
"Acquire the storage lock"
def _lock_release():
"Release the storage lock"
def _commit_lock_acquire():
"Acquire the storage commit lock"
def _commit_lock_release():
"Release the storage commit lock"
##############################################################################
#
# Copyright (c) Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
import unittest
from zope.testing import doctest
def test_suite():
return unittest.TestSuite((
doctest.DocFileSuite('zconfig.txt'),
))
Defining FileStorages using ZConfig
===================================
ZODB provides support for defining many storages, including
FileStorages, using ZConfig. To do this, you use a filestorage
section, and define a path:
>>> import ZODB.config
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... </filestorage>
... """)
>>> fs._file.name
'my.fs'
There are a number of options we can provide:
create
Flag that indicates whether the storage should be truncated if
it already exists.
To demonstrate this, we'll first write some dataL
>>> db = ZODB.DB(fs) # writes object 0
>>> db.close()
Then reopen with the create option:
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... create true
... </filestorage>
... """)
Because the file was truncated, we no-longer have object 0:
>>> fs.load('\0'*8)
Traceback (most recent call last):
...
POSKeyError: 0x00
>>> fs.close()
read-only
If true, only reads may be executed against the storage. Note
that the "pack" operation is not considered a write operation
and is still allowed on a read-only filestorage.
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... read-only true
... </filestorage>
... """)
>>> fs.isReadOnly()
True
>>> fs.close()
quota
Maximum allowed size of the storage file. Operations which
would cause the size of the storage to exceed the quota will
result in a ZODB.FileStorage.FileStorageQuotaError being
raised.
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... quota 10
... </filestorage>
... """)
>>> db = ZODB.DB(fs) # writes object 0
Traceback (most recent call last):
...
FileStorageQuotaError: The storage quota has been exceeded.
>>> fs.close()
packer
The dotten name (dotten module name and object name) of a
packer object. This is used to provide an alternative pack
implementation.
To demonstrate this, we'll create a null packer that just prints
some information about it's arguments:
>>> def packer(storage, referencesf, stop, gc):
... print referencesf, storage is fs, gc
>>> ZODB.FileStorage.config_demo_printing_packer = packer
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... packer ZODB.FileStorage.config_demo_printing_packer
... </filestorage>
... """)
>>> import time
>>> db = ZODB.DB(fs) # writes object 0
>>> fs.pack(time.time(), 42)
42 True True
>>> fs.close()
pack-gc
If false, then no garbage collection will be performed when
packing. This can make packing go much faster and can avoid
problems when objects are referenced only from other
databases.
>>> fs = ZODB.config.storageFromString("""
... <filestorage>
... path my.fs
... packer ZODB.FileStorage.config_demo_printing_packer
... pack-gc false
... </filestorage>
... """)
>>> fs.pack(time.time(), 42)
42 True False
Note that if we pass the gc option to pack, then this will
override the value set in the configuration:
>>> fs.pack(time.time(), 42, gc=True)
42 True True
...@@ -35,6 +35,21 @@ ...@@ -35,6 +35,21 @@
raised. raised.
</description> </description>
</key> </key>
<key name="packer" datatype="dotted-name">
<description>
The dotten name (dotten module name and object name) of a
packer object. This is used to provide an alternative pack
implementation.
</description>
</key>
<key name="pack-gc" datatype="boolean" default="true">
<description>
If false, then no garbage collection will be performed when
packing. This can make packing go much faster and can avoid
problems when objects are referenced only from other
databases.
</description>
</key>
</sectiontype> </sectiontype>
<sectiontype name="mappingstorage" datatype=".MappingStorage" <sectiontype name="mappingstorage" datatype=".MappingStorage"
......
...@@ -137,10 +137,17 @@ class FileStorage(BaseConfig): ...@@ -137,10 +137,17 @@ class FileStorage(BaseConfig):
def open(self): def open(self):
from ZODB.FileStorage import FileStorage from ZODB.FileStorage import FileStorage
options = {}
if self.config.packer:
m, name = self.config.packer.rsplit('.', 1)
options['packer'] = getattr(__import__(m, {}, {}, ['*']), name)
return FileStorage(self.config.path, return FileStorage(self.config.path,
create=self.config.create, create=self.config.create,
read_only=self.config.read_only, read_only=self.config.read_only,
quota=self.config.quota) quota=self.config.quota,
pack_gc=self.config.pack_gc,
**options)
class BlobStorage(BaseConfig): class BlobStorage(BaseConfig):
......
...@@ -41,7 +41,7 @@ class FileStorageTests( ...@@ -41,7 +41,7 @@ class FileStorageTests(
BasicStorage.BasicStorage, BasicStorage.BasicStorage,
TransactionalUndoStorage.TransactionalUndoStorage, TransactionalUndoStorage.TransactionalUndoStorage,
RevisionStorage.RevisionStorage, RevisionStorage.RevisionStorage,
PackableStorage.PackableStorage, PackableStorage.PackableStorageWithOptionalGC,
PackableStorage.PackableUndoStorage, PackableStorage.PackableUndoStorage,
Synchronization.SynchronizedStorage, Synchronization.SynchronizedStorage,
ConflictResolution.ConflictResolvingStorage, ConflictResolution.ConflictResolvingStorage,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment