Commit 7f220b80 authored by Chris McDonough's avatar Chris McDonough

Add a record iteration protocol to FileStorage. You can use the record...

Add a record iteration protocol to FileStorage.  You can use the record iterator to iterate over all current revisions
of data pickles in the storage.

In order to support calling via ZEO, we don't implement this as an actual iterator.  An example of using the record iterator
protocol is as follows:

storage = FileStorage('anexisting.fs')
next_oid = None
while 1:
    oid, tid, data, next_oid = storage.record_iternext(next_oid)
    # do something with oid, tid and data
    if next_oid is None:
        break

The behavior of the iteration protocol is now to iterate over all current records in the database in ascending oid order, 
although this is not a promise to do so in the future.

parent ac5116e4
...@@ -371,8 +371,13 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -371,8 +371,13 @@ class FileStorage(BaseStorage.BaseStorage,
return None return None
pos = long(pos) pos = long(pos)
if isinstance(index, DictType): if (
# Convert to fsIndex. isinstance(index, DictType) or
(isinstance(index, fsIndex) and isinstance(index._data, DictType))
):
# Convert dictionary indexes to fsIndexes *or* convert fsIndexes
# which have a DictType `_data` attribute to a new fsIndex (newer
# fsIndexes have an OOBTree as `_data`)
newindex = fsIndex() newindex = fsIndex()
newindex.update(index) newindex.update(index)
index = newindex index = newindex
...@@ -1397,6 +1402,19 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1397,6 +1402,19 @@ class FileStorage(BaseStorage.BaseStorage,
if e.errno != errno.ENOENT: if e.errno != errno.ENOENT:
raise raise
def record_iternext(self, next=None):
index = self._index
oid = index.minKey(next)
try:
next_oid = index.minKey(self.new_oid(oid))
except ValueError: # "empty tree" error
next_oid = None
data, tid = self.load(oid, None) # ignore versions
return oid, tid, data, next_oid
def shift_transactions_forward(index, vindex, tindex, file, pos, opos): def shift_transactions_forward(index, vindex, tindex, file, pos, opos):
"""Copy transactions forward in the data file """Copy transactions forward in the data file
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
import struct import struct
from BTrees._fsBTree import fsBucket from BTrees._fsBTree import fsBucket
from BTrees.OOBTree import OOBTree
# convert between numbers and six-byte strings # convert between numbers and six-byte strings
...@@ -48,10 +49,18 @@ def num2str(n): ...@@ -48,10 +49,18 @@ def num2str(n):
def str2num(s): def str2num(s):
return struct.unpack(">Q", "\000\000" + s)[0] return struct.unpack(">Q", "\000\000" + s)[0]
def prefix_plus_one(s):
num = str2num(s)
return num2str(num + 1)
def prefix_minus_one(s):
num = str2num(s)
return num2str(num - 1)
class fsIndex(object): class fsIndex(object):
def __init__(self): def __init__(self):
self._data = {} self._data = OOBTree()
def __getitem__(self, key): def __getitem__(self, key):
return str2num(self._data[key[:6]][key[6:]]) return str2num(self._data[key[:6]][key[6:]])
...@@ -126,31 +135,61 @@ class fsIndex(object): ...@@ -126,31 +135,61 @@ class fsIndex(object):
def values(self): def values(self):
return list(self.itervalues()) return list(self.itervalues())
def maxKey(self): # Comment below applies for the following minKey and maxKey methods
# This is less general than the BTree method of the same name: we #
# only care about the largest key in the entire tree. By # Obscure: what if `tree` is actually empty? We're relying here on
# construction, that's the largest oid in use in the associated # that this class doesn't implement __delitem__: once a key gets
# FileStorage. # into an fsIndex, the only way it can go away is by invoking
# clear(). Therefore nothing in _data.values() is ever empty.
keys = self._data.keys() #
if not keys: # Note that because `tree` is an fsBTree, its minKey()/maxKey() methods are
# This is the same exception a BTree maxKey() raises when the # very efficient.
# tree is empty.
raise ValueError("empty tree") def minKey(self, key=None):
if key is None:
# We expect that keys is small, since each fsBTree in _data.values() smallest_prefix = self._data.minKey()
# can hold as many as 2**16 = 64K entries. So this max() should go else:
# fast too. Regardless, there's no faster way to find the largest smallest_prefix = self._data.minKey(key[:6])
# prefix.
biggest_prefix = max(keys) tree = self._data[smallest_prefix]
assert tree
if key is None:
smallest_suffix = tree.minKey()
else:
try:
smallest_suffix = tree.minKey(key[6:])
except ValueError: # 'empty tree' (no suffix >= arg)
next_prefix = prefix_plus_one(smallest_prefix)
smallest_prefix = self._data.minKey(next_prefix)
tree = self._data[smallest_prefix]
assert tree
smallest_suffix = tree.minKey()
return smallest_prefix + smallest_suffix
def maxKey(self, key=None):
if key is None:
biggest_prefix = self._data.maxKey()
else:
biggest_prefix = self._data.maxKey(key[:6])
tree = self._data[biggest_prefix] tree = self._data[biggest_prefix]
# Obscure: what if tree is actually empty? We're relying here on
# that this class doesn't implement __delitem__: once a key gets
# into an fsIndex, the only way it can go away is by invoking
# clear(). Therefore nothing in _data.values() is ever empty.
#
# Note that because `tree` is an fsBTree, its maxKey() method is very
# efficient.
assert tree assert tree
return biggest_prefix + tree.maxKey()
if key is None:
biggest_suffix = tree.maxKey()
else:
try:
biggest_suffix = tree.maxKey(key[6:])
except ValueError: # 'empty tree' (no suffix <= arg)
next_prefix = prefix_minus_one(biggest_prefix)
biggest_prefix = self._data.maxKey(next_prefix)
tree = self._data[biggest_prefix]
assert tree
biggest_suffix = tree.maxKey()
return biggest_prefix + biggest_suffix
...@@ -137,6 +137,41 @@ class FileStorageTests( ...@@ -137,6 +137,41 @@ class FileStorageTests(
# Python dict. # Python dict.
self.check_conversion_to_fsIndex(read_only=True) self.check_conversion_to_fsIndex(read_only=True)
def check_conversion_from_dict_to_btree_data_in_fsIndex(self):
# To support efficient range searches on its keys as part of
# implementing a record iteration protocol in FileStorage, we
# converted the fsIndex class from using a dictionary as its
# self._data attribute to using an OOBTree in its stead.
from ZODB.fsIndex import fsIndex
from BTrees.OOBTree import OOBTree
# Create some data, and remember the index.
for i in range(10):
self._dostore()
data_dict = dict(self._storage._index._data)
# Replace the OOBTree with a dictionary and commit it.
self._storage._index._data = data_dict
get_transaction().commit()
# Save the index.
self._storage.close()
# Verify it's converted to fsIndex in memory upon open.
self.open()
self.assert_(isinstance(self._storage._index, fsIndex))
self.assert_(isinstance(self._storage._index._data, OOBTree))
# Verify it has the right content.
new_data_dict = dict(self._storage._index._data)
self.assertEqual(len(data_dict), len(new_data_dict))
for k in data_dict:
old_tree = data_dict[k]
new_tree = new_data_dict[k]
self.assertEqual(list(old_tree.items()), list(new_tree.items()))
def check_save_after_load_with_no_index(self): def check_save_after_load_with_no_index(self):
for i in range(10): for i in range(10):
self._dostore() self._dostore()
...@@ -288,6 +323,35 @@ class FileStorageTests( ...@@ -288,6 +323,35 @@ class FileStorageTests(
else: else:
self.fail("expected CorruptedError") self.fail("expected CorruptedError")
def check_record_iternext(self):
from ZODB.DB import DB
db = DB(self._storage)
conn = db.open()
conn.root()['abc'] = MinPO('abc')
conn.root()['xyz'] = MinPO('xyz')
get_transaction().commit()
# Ensure it's all on disk.
db.close()
self._storage.close()
self.open()
key = None
for x in ('\000', '\001', '\002'):
oid, tid, data, next_oid = self._storage.record_iternext(key)
self.assertEqual(oid, ('\000' * 7) + x)
key = next_oid
expected_data, expected_tid = self._storage.load(oid, '')
self.assertEqual(expected_data, data)
self.assertEqual(expected_tid, tid)
if x == '\002':
self.assertEqual(next_oid, None)
else:
self.assertNotEqual(next_oid, None)
class FileStorageRecoveryTest( class FileStorageRecoveryTest(
StorageTestBase.StorageTestBase, StorageTestBase.StorageTestBase,
RecoveryStorage.RecoveryStorage, RecoveryStorage.RecoveryStorage,
......
...@@ -15,7 +15,7 @@ import unittest ...@@ -15,7 +15,7 @@ import unittest
import random import random
from ZODB.fsIndex import fsIndex from ZODB.fsIndex import fsIndex
from ZODB.utils import p64 from ZODB.utils import p64, z64
class Test(unittest.TestCase): class Test(unittest.TestCase):
...@@ -130,6 +130,44 @@ class Test(unittest.TestCase): ...@@ -130,6 +130,44 @@ class Test(unittest.TestCase):
index_max = index.maxKey() index_max = index.maxKey()
self.assertEqual(index_max, correct_max) self.assertEqual(index_max, correct_max)
index.clear()
a = '\000\000\000\000\000\001\000\000'
b = '\000\000\000\000\000\002\000\000'
c = '\000\000\000\000\000\003\000\000'
d = '\000\000\000\000\000\004\000\000'
index[a] = 1
index[c] = 2
self.assertEqual(index.maxKey(b), a)
self.assertEqual(index.maxKey(d), c)
self.assertRaises(ValueError, index.maxKey, z64)
def testMinKey(self):
index = self.index
index.clear()
# An empty index should complain.
self.assertRaises(ValueError, index.minKey)
# Now build up a tree with random values, and check maxKey at each
# step.
correct_min = "\xff" * 8 # bigger than anything we'll add
for i in range(1000):
key = p64(random.randrange(100000000))
index[key] = i
correct_min = min(correct_min, key)
index_min = index.minKey()
self.assertEqual(index_min, correct_min)
index.clear()
a = '\000\000\000\000\000\001\000\000'
b = '\000\000\000\000\000\002\000\000'
c = '\000\000\000\000\000\003\000\000'
d = '\000\000\000\000\000\004\000\000'
index[a] = 1
index[c] = 2
self.assertEqual(index.minKey(b), c)
self.assertRaises(ValueError, index.minKey, d)
def test_suite(): def test_suite():
loader=unittest.TestLoader() loader=unittest.TestLoader()
return loader.loadTestsFromTestCase(Test) return loader.loadTestsFromTestCase(Test)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment