Commit 2f8cc67a authored by Julien Muchembled's avatar Julien Muchembled

Make TransactionMetaData in charge of (de)serializing extension data

IStorage implementations used to do this task themselves which leads to code
duplication and sometimes bugs (one was fixed recently in NEO). Like for object
serialization, this should be done by the upper layer (Connection).

This commit also provides a way to get raw extensions data while iterating
over transactions (this is actually the original purpose[2]). So far, extension
data could only be retrieved unpickled, which caused several problems:

- tools like `zodb dump` [1] cannot dump data exactly as stored on a
  storage. This makes database potentially not bit-to-bit identical to
  its original after restoring from such dump.

- `zodb dump` output could be changing from run to run on the same
  database. This comes from the fact that e.g. python dictionaries are
  unordered and so when pickling a dict back to bytes the result could
  be not the same as original.

  ( this problem can be worked-around partly to work reliably for e.g.
    dict with str keys - by always emitting items in key sorted order,
    but it is hard to make it work reliably for arbitrary types )

Both issues make it hard to verify integrity of database at the lowest
possible level after restoration, and make it hard to verify bit-to-bit
compatibility with non-python ZODB implementations.

For this, TransactionMetaData gets a new 'extension_bytes' attribute and
and common usage becomes:

* Application committing a transaction:

  - 'extension' is set with a dictionary
  - the storage gets the bytes via 'extension_bytes'

* Iteration:

  - the storage passes bytes as 'extension' parameter of TransactionMetaData
  - the application can get extension data either as bytes ('extension_bytes')
    or deserialized ('extension'): in the former case, no deserialization
    happens and the returned value is exactly what was passed by the storage

[1] https://lab.nexedi.com/nexedi/zodbtools
[2] https://github.com/zopefoundation/ZODB/pull/183Co-Authored-By: Kirill Smelkov's avatarKirill Smelkov <kirr@nexedi.com>
parent 98dc5f1e
......@@ -18,6 +18,12 @@
rolled back. See `issue 268
<https://github.com/zopefoundation/ZODB/issues/268>`_.
- Make TransactionMetaData in charge of (de)serializing extension data.
A new ``extension_bytes`` attribute converts automatically from
``extension``, or vice-versa. During storage iteration, ``extension_bytes``
holds bytes as they are stored (i.e. no deserialization happens).
See `issue 207 <https://github.com/zopefoundation/ZODB/pull/207>`_.
5.5.1 (2018-10-25)
==================
......
......@@ -31,7 +31,7 @@ from . import POSException, utils
from .Connection import TransactionMetaData
from .utils import z64, oid_repr, byte_ord, byte_chr, load_current
from .UndoLogCompatible import UndoLogCompatible
from ._compat import dumps, _protocol, py2_hasattr
from ._compat import py2_hasattr
log = logging.getLogger("ZODB.BaseStorage")
......@@ -190,11 +190,7 @@ class BaseStorage(UndoLogCompatible):
user = transaction.user
desc = transaction.description
ext = transaction.extension
if ext:
ext = dumps(ext, _protocol)
else:
ext = ""
ext = transaction.extension_bytes
self._ude = user, desc, ext
......
......@@ -49,10 +49,10 @@ from ZODB.utils import p64, u64, z64, oid_repr, positive_id
from ZODB import utils
import six
from ._compat import dumps, loads, _protocol
from .mvccadapter import HistoricalStorageAdapter
from . import valuedoc
from . import _compat
global_reset_counter = 0
......@@ -1303,10 +1303,24 @@ large-record-size option in a configuration file) to specify a larger
size.
"""
class overridable_property(object):
"""
Same as property() with only a getter, except that setting a
value overrides the property rather than raising AttributeError.
"""
def __init__(self, func):
self.__doc__ = func.__doc__
self.func = func
def __get__(self, obj, cls):
return self if obj is None else self.func(obj)
@implementer(IStorageTransactionMetaData)
class TransactionMetaData(object):
def __init__(self, user=u'', description=u'', extension=b''):
def __init__(self, user=u'', description=u'', extension=None):
if not isinstance(user, bytes):
user = user.encode('utf-8')
self.user = user
......@@ -1315,9 +1329,20 @@ class TransactionMetaData(object):
description = description.encode('utf-8')
self.description = description
if not isinstance(extension, dict):
extension = _compat.loads(extension) if extension else {}
self.extension = extension
if isinstance(extension, bytes):
self.extension_bytes = extension
else:
self.extension = {} if extension is None else extension
@overridable_property
def extension(self):
extension_bytes = self.extension_bytes
return loads(extension_bytes) if extension_bytes else {}
@overridable_property
def extension_bytes(self):
extension = self.extension
return dumps(extension, _protocol) if extension else b''
def note(self, text): # for tests
text = text.strip()
......
......@@ -1988,15 +1988,8 @@ class FileIterator(FileStorageFormatter):
if h.status != "u":
pos = tpos + h.headerlen()
e = {}
if h.elen:
try:
e = loads(h.ext)
except:
pass
result = TransactionRecord(h.tid, h.status, h.user, h.descr,
e, pos, tend, self._file, tpos)
h.ext, pos, tend, self._file, tpos)
# Read the (intentionally redundant) transaction length
self._file.seek(tend)
......
......@@ -83,7 +83,6 @@ except ImportError:
import ZODB.FileStorage
from ZODB.utils import u64, as_text
from ZODB.FileStorage import TransactionRecord
from ZODB._compat import loads
from persistent.TimeStamp import TimeStamp
......@@ -143,12 +142,9 @@ def read_txn_header(f, pos, file_size, outp, ltid):
pos = tpos+(23+ul+dl+el)
user = f.read(ul)
description = f.read(dl)
if el:
try: e = loads(f.read(el))
except: e = {}
else: e = {}
ext = f.read(el)
result = TransactionRecord(tid, status, user, description, e, pos, tend,
result = TransactionRecord(tid, status, user, description, ext, pos, tend,
f, tpos)
pos = tend
......
......@@ -540,12 +540,19 @@ class IStorageTransactionMetaData(Interface):
Note that unlike transaction.interfaces.ITransaction, the ``user``
and ``description`` attributes are bytes, not text.
If either 'extension' or 'extension_bytes' is unset, it is automatically
converted for the other, which is set during __init__ depending of the
passed extension is of types bytes or not (the default value None sets {}).
The 2 attributes should not be set at the same time.
"""
user = Attribute("Bytes transaction user")
description = Attribute("Bytes transaction Description")
extension = Attribute(
"A dictionary carrying a transaction's extended_info data")
extension_bytes = Attribute(
"Serialization of 'extension' attribute"
" (verbatim bytes in case of storage iteration)")
def set_data(ob, data):
"""Hold data on behalf of an object
......
......@@ -49,6 +49,10 @@ class IteratorCompare(object):
class IteratorStorage(IteratorCompare):
# Override this in tests for storages that delegate to TransactionMetaData
# the task to (de)serialize extension data.
use_extension_bytes = False
def checkSimpleIteration(self):
# Store a bunch of revisions of a single object
self._oid = oid = self._storage.new_oid()
......@@ -85,14 +89,28 @@ class IteratorStorage(IteratorCompare):
self.assertEqual(rec.data, None)
def checkTransactionExtensionFromIterator(self):
# It will be deserialized into a simple dict, which will be serialized
# differently. This simulates that 'dumps(loads(x), ...)' does not
# always return x.
class ext(dict):
def __reduce__(self):
return dict,(tuple(self.items()),)
ext = ext(foo=1)
oid = self._storage.new_oid()
revid = self._dostore(oid, data=MinPO(1))
iter = self._storage.iterator()
count = 0
for txn in iter:
self.assertEqual(txn.extension, {})
count +=1
self.assertEqual(count, 1)
revid = self._dostore(oid, data=MinPO(1), extension=ext)
txn, = self._storage.iterator()
self.assertEqual(txn.extension, ext)
try:
extension_bytes = txn.extension_bytes
except AttributeError:
# Volatile storages often don't serialize extension because it
# would be useless.
return
txn.extension = ext
if self.use_extension_bytes:
self.assertEqual(extension_bytes, txn.extension_bytes)
else:
self.assertNotEqual(extension_bytes, txn.extension_bytes)
def checkIterationIntraTransaction(self):
# TODO: Try this test with logging enabled. If you see something
......@@ -215,6 +233,7 @@ class IteratorDeepCompare(object):
eq(txn1.user, txn2.user)
eq(txn1.description, txn2.description)
eq(txn1.extension, txn2.extension)
eq(txn1.extension_bytes, txn2.extension_bytes)
itxn1 = iter(txn1)
itxn2 = iter(txn2)
for rec1, rec2 in zip(itxn1, itxn2):
......
......@@ -124,7 +124,7 @@ class StorageTestBase(ZODB.tests.util.TestCase):
ZODB.tests.util.TestCase.tearDown(self)
def _dostore(self, oid=None, revid=None, data=None,
already_pickled=0, user=None, description=None):
already_pickled=0, user=None, description=None, extension=None):
"""Do a complete storage transaction. The defaults are:
- oid=None, ask the storage for a new oid
......@@ -144,7 +144,7 @@ class StorageTestBase(ZODB.tests.util.TestCase):
if not already_pickled:
data = zodb_pickle(data)
# Begin the transaction
t = TransactionMetaData()
t = TransactionMetaData(extension=extension)
if user is not None:
t.user = user
if description is not None:
......
......@@ -56,6 +56,8 @@ class FileStorageTests(
ReadOnlyStorage.ReadOnlyStorage
):
use_extension_bytes = True
def open(self, **kwargs):
self._storage = ZODB.FileStorage.FileStorage('FileStorageTests.fs',
**kwargs)
......@@ -77,7 +79,13 @@ class FileStorageTests(
except POSException.StorageError:
pass
else:
self.fail("expect long user field to raise error")
self.fail("expect long description field to raise error")
try:
self._dostore(extension={s: 1})
except POSException.StorageError:
pass
else:
self.fail("expect long extension field to raise error")
def check_use_fsIndex(self):
......
......@@ -14,17 +14,18 @@
import unittest
import warnings
from .._compat import dumps, loads, _protocol
from .._compat import dumps, loads
from ..Connection import TransactionMetaData
class TransactionMetaDataTests(unittest.TestCase):
def test_basic(self):
t = TransactionMetaData(
u'user\x80', u'description\x80', dict(foo='FOO'))
extension = dict(foo='FOO')
t = TransactionMetaData(u'user\x80', u'description\x80', extension)
self.assertEqual(t.user, b'user\xc2\x80')
self.assertEqual(t.description, b'description\xc2\x80')
self.assertEqual(t.extension, dict(foo='FOO'))
self.assertEqual(t.extension, extension)
self.assertEqual(loads(t.extension_bytes), extension)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
self.assertEqual(t._extension, t.extension)
......@@ -33,14 +34,16 @@ class TransactionMetaDataTests(unittest.TestCase):
self.assertTrue("_extension is deprecated" in str(w[-1].message))
def test_basic_no_encoding(self):
t = TransactionMetaData(
b'user', b'description', dumps(dict(foo='FOO'), _protocol))
extension = dict(foo='FOO')
extension_bytes = dumps(extension)
t = TransactionMetaData(b'user', b'description', extension_bytes)
self.assertEqual(t.user, b'user')
self.assertEqual(t.description, b'description')
self.assertEqual(t.extension, dict(foo='FOO'))
self.assertEqual(t.extension, extension)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self.assertEqual(t._extension, t.extension)
self.assertIs(t.extension_bytes, extension_bytes)
def test_constructor_default_args(self):
t = TransactionMetaData()
......@@ -52,23 +55,28 @@ class TransactionMetaDataTests(unittest.TestCase):
self.assertEqual(t._extension, t.extension)
def test_set_extension(self):
t = TransactionMetaData(u'', u'', b'')
data = {}
t = TransactionMetaData(u'', u'', data)
self.assertEqual(t.user, b'')
self.assertEqual(t.description, b'')
self.assertEqual(t.extension, {})
self.assertIs(t.extension, data)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
self.assertEqual(t._extension, t.extension)
self.assertEqual(t.extension_bytes, b'')
for name in 'extension', '_extension':
data = {name: name + 'foo'}
setattr(t, name, data)
self.assertEqual(t.extension, data)
self.assertEqual(t._extension, t.extension)
data = {}
setattr(t, name, data)
self.assertEqual(t.extension, data)
self.assertEqual(t._extension, t.extension)
self.assertIs(t.extension, data)
self.assertIs(t._extension, t.extension)
extension_bytes = t.extension_bytes
self.assertEqual(loads(extension_bytes), data)
empty = {}
setattr(t, name, empty)
self.assertIs(t.extension, empty)
self.assertIs(t._extension, t.extension)
self.assertEqual(t.extension_bytes, b'')
def test_used_by_connection(self):
import ZODB
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment