Commit 1bb14faf authored by Jim Fulton's avatar Jim Fulton

Added fsIndex save method and fsIndex load class method for saving and

loading index data.  This leverages the new fsBucket toString and
fromString methods and provides much faster FileStorage index saving and loading
and smaller index files.  On my machine, saves are 5 times faster and
loads are 20 times faster (after a save, when data are in disk
cache).  Indexes are roughly 30% smaller.

The index format has changed.  Old indexes can be read just fine, but
new indexes won't be readable by older versions of ZODB.
parent ef3e0f10
...@@ -2,12 +2,16 @@ ...@@ -2,12 +2,16 @@
Change History Change History
================ ================
3.10.0a1 (2009-12-??) 3.10.0a1 (2010-02-??)
===================== =====================
New Features New Features
------------ ------------
- FileStorage indexes use a new format. They are saved and loaded much
faster and take less space. Old indexes can still be read, but new
indexes won't be readable by older versions of ZODB.
- The API for undoing multiple transactions has changed. To undo - The API for undoing multiple transactions has changed. To undo
multiple transactions in a single transaction, pass pass a list of multiple transactions in a single transaction, pass pass a list of
transaction identifiers to a database's undoMultiple method. Calling a transaction identifiers to a database's undoMultiple method. Calling a
......
...@@ -246,23 +246,7 @@ class FileStorage( ...@@ -246,23 +246,7 @@ class FileStorage(
index_name = self.__name__ + '.index' index_name = self.__name__ + '.index'
tmp_name = index_name + '.index_tmp' tmp_name = index_name + '.index_tmp'
f=open(tmp_name,'wb') self._index.save(self._pos, tmp_name)
p=Pickler(f,1)
# Pickle the index buckets first to avoid deep recursion:
buckets = []
bucket = self._index._data._firstbucket
while bucket is not None:
buckets.append(bucket)
bucket = bucket._next
buckets.reverse()
info=BTrees.OOBTree.Bucket(dict(
_buckets=buckets, index=self._index, pos=self._pos))
p.dump(info)
f.flush()
f.close()
try: try:
try: try:
...@@ -357,19 +341,15 @@ class FileStorage( ...@@ -357,19 +341,15 @@ class FileStorage(
file_name=self.__name__ file_name=self.__name__
index_name=file_name+'.index' index_name=file_name+'.index'
try: if os.path.exists(index_name):
f = open(index_name, 'rb') try:
except: info = fsIndex.load(index_name)
except:
logger.exception('loading index')
return None
else:
return None return None
p=Unpickler(f)
try:
info=p.load()
except:
exc, err = sys.exc_info()[:2]
logger.warning("Failed to load database index: %s: %s", exc, err)
return None
index = info.get('index') index = info.get('index')
pos = info.get('pos') pos = info.get('pos')
if index is None or pos is None: if index is None or pos is None:
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
# bytes back before using u64 to convert the data back to (long) # bytes back before using u64 to convert the data back to (long)
# integers. # integers.
import cPickle
import struct import struct
from BTrees._fsBTree import fsBucket from BTrees._fsBTree import fsBucket
...@@ -62,12 +63,62 @@ def prefix_minus_one(s): ...@@ -62,12 +63,62 @@ def prefix_minus_one(s):
class fsIndex(object): class fsIndex(object):
def __init__(self): def __init__(self, data=None):
self._data = OOBTree() self._data = OOBTree()
if data:
self.update(data)
def __getstate__(self):
return dict(
state_version = 1,
_data = [(k, v.toString())
for (k, v) in self._data.iteritems()
]
)
def __setstate__(self, state):
version = state.pop('state_version', 0)
getattr(self, '_setstate_%s' % version)(state)
def _setstate_0(self, state):
self.__dict__.clear()
self.__dict__.update(state)
def _setstate_1(self, state):
self._data = OOBTree([
(k, fsBucket().fromString(v))
for (k, v) in state['_data']
])
def __getitem__(self, key): def __getitem__(self, key):
return str2num(self._data[key[:6]][key[6:]]) return str2num(self._data[key[:6]][key[6:]])
def save(self, pos, fname):
with open(fname, 'wb') as f:
pickler = cPickle.Pickler(f, 1)
pickler.fast = True
pickler.dump(pos)
for k, v in self._data.iteritems():
pickler.dump((k, v.toString()))
pickler.dump(None)
@classmethod
def load(class_, fname):
with open(fname, 'rb') as f:
unpickler = cPickle.Unpickler(f)
pos = unpickler.load()
if not isinstance(pos, (int, long)):
return pos # Old format
index = class_()
data = index._data
while 1:
v = unpickler.load()
if not v:
break
k, v = v
data[k] = fsBucket().fromString(v)
return dict(pos=pos, index=index)
def get(self, key, default=None): def get(self, key, default=None):
tree = self._data.get(key[:6], default) tree = self._data.get(key[:6], default)
if tree is default: if tree is default:
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# FOR A PARTICULAR PURPOSE. # FOR A PARTICULAR PURPOSE.
# #
############################################################################## ##############################################################################
import cPickle
import os, unittest import os, unittest
import transaction import transaction
import ZODB.FileStorage import ZODB.FileStorage
...@@ -19,6 +20,7 @@ import ZODB.tests.util ...@@ -19,6 +20,7 @@ import ZODB.tests.util
import zope.testing.setupstack import zope.testing.setupstack
from ZODB import POSException from ZODB import POSException
from ZODB import DB from ZODB import DB
from ZODB.fsIndex import fsIndex
from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage from ZODB.tests import StorageTestBase, BasicStorage, TransactionalUndoStorage
from ZODB.tests import PackableStorage, Synchronization, ConflictResolution from ZODB.tests import PackableStorage, Synchronization, ConflictResolution
...@@ -69,7 +71,6 @@ class FileStorageTests( ...@@ -69,7 +71,6 @@ class FileStorageTests(
self.fail("expect long user field to raise error") self.fail("expect long user field to raise error")
def check_use_fsIndex(self): def check_use_fsIndex(self):
from ZODB.fsIndex import fsIndex
self.assertEqual(self._storage._index.__class__, fsIndex) self.assertEqual(self._storage._index.__class__, fsIndex)
...@@ -78,21 +79,13 @@ class FileStorageTests( ...@@ -78,21 +79,13 @@ class FileStorageTests(
def convert_index_to_dict(self): def convert_index_to_dict(self):
# Convert the index in the current .index file to a Python dict. # Convert the index in the current .index file to a Python dict.
# Return the index originally found. # Return the index originally found.
import cPickle as pickle data = fsIndex.load('FileStorageTests.fs.index')
f = open('FileStorageTests.fs.index', 'r+b')
p = pickle.Unpickler(f)
data = p.load()
index = data['index'] index = data['index']
newindex = dict(index) newindex = dict(index)
data['index'] = newindex data['index'] = newindex
f.seek(0) cPickle.dump(data, open('FileStorageTests.fs.index', 'wb'), 1)
f.truncate()
p = pickle.Pickler(f, 1)
p.dump(data)
f.close()
return index return index
def check_conversion_to_fsIndex(self, read_only=False): def check_conversion_to_fsIndex(self, read_only=False):
......
...@@ -11,11 +11,13 @@ ...@@ -11,11 +11,13 @@
# FOR A PARTICULAR PURPOSE. # FOR A PARTICULAR PURPOSE.
# #
############################################################################## ##############################################################################
import unittest import doctest
import random import random
import unittest
from ZODB.fsIndex import fsIndex from ZODB.fsIndex import fsIndex
from ZODB.utils import p64, z64 from ZODB.utils import p64, z64
from ZODB.tests.util import setUp, tearDown
class Test(unittest.TestCase): class Test(unittest.TestCase):
...@@ -30,7 +32,7 @@ class Test(unittest.TestCase): ...@@ -30,7 +32,7 @@ class Test(unittest.TestCase):
index = self.index index = self.index
self.assert_(p64(1000) in index) self.assert_(p64(1000) in index)
self.assert_(p64(100*1000) in index) self.assert_(p64(100*1000) in index)
del self.index[p64(1000)] del self.index[p64(1000)]
del self.index[p64(100*1000)] del self.index[p64(100*1000)]
...@@ -186,9 +188,44 @@ class Test(unittest.TestCase): ...@@ -186,9 +188,44 @@ class Test(unittest.TestCase):
self.assertEqual(index.minKey(b), c) self.assertEqual(index.minKey(b), c)
self.assertRaises(ValueError, index.minKey, d) self.assertRaises(ValueError, index.minKey, d)
def test_suite(): def fsIndex_save_and_load():
loader=unittest.TestLoader() """
return loader.loadTestsFromTestCase(Test) fsIndex objects now have save methods for saving them to disk in a new
format. The fsIndex class has a load class method that can load data.
Let's start by creating an fsIndex. We'll bother to allocate the
object ids to get multiple buckets:
>>> index = fsIndex(dict((p64(i), i) for i in xrange(0, 1<<28, 1<<15)))
>>> len(index._data)
4096
Now, we'll save the data to disk and then load it:
if __name__=='__main__': >>> index.save(42, 'index')
unittest.TextTestRunner().run(test_suite())
Note that we pass a file position, which gets saved with the index data.
>>> info = fsIndex.load('index')
>>> info['pos']
42
>>> info['index'].__getstate__() == index.__getstate__()
True
If we save the data in the old format, we can still read it:
>>> import cPickle
>>> cPickle.dump(dict(pos=42, index=index), open('old', 'wb'), 1)
>>> info = fsIndex.load('old')
>>> info['pos']
42
>>> info['index'].__getstate__() == index.__getstate__()
True
"""
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test))
suite.addTest(doctest.DocTestSuite(setUp=setUp, tearDown=tearDown))
return suite
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment