Commit 198b8df4 authored by Kirill Smelkov's avatar Kirill Smelkov

zodbrestore - Tool to restore content of a ZODB database from zodbdump output

Zodbrestore is long-coming counterpart to zodbdump.
Implementation is internally based on reworked zodbcommit.

For FileStorage restored database is verified via test to be bit-to-bit
identical to the original.

For NEO it won't be exactly the case, as NEO does not implement
IStorageRestoreable: there is only tpc_begin(tid=...) but no restore().

/helped-by @jerome
parent e5fb6e7c
...@@ -12,4 +12,5 @@ __ https://github.com/zopefoundation/ZODB/pull/128#issuecomment-260970932 ...@@ -12,4 +12,5 @@ __ https://github.com/zopefoundation/ZODB/pull/128#issuecomment-260970932
- `zodb cmp` - compare content of two ZODB databases bit-to-bit. - `zodb cmp` - compare content of two ZODB databases bit-to-bit.
- `zodb commit` - commit new transaction into a ZODB database. - `zodb commit` - commit new transaction into a ZODB database.
- `zodb dump` - dump content of a ZODB database. - `zodb dump` - dump content of a ZODB database.
- `zodb restore` - restore content of a ZODB database.
- `zodb info` - print general information about a ZODB database. - `zodb info` - print general information about a ZODB database.
# -*- coding: utf-8 -*-
# Copyright (C) 2021 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
from __future__ import print_function
from zodbtools.zodbrestore import zodbrestore
from zodbtools.util import storageFromURL
from os.path import dirname
from tempfile import mkdtemp
from shutil import rmtree
from golang import func, defer
# verify zodbrestore.
@func
def test_zodbrestore():
tmpd = mkdtemp('', 'zodbrestore.')
defer(lambda: rmtree(tmpd))
# restore from testdata/1.zdump.ok and verify it gives result that is
# bit-to-bit identical to testdata/1.fs
tdata = dirname(__file__) + "/testdata"
@func
def _():
zdump = open("%s/1.zdump.ok" % tdata, 'rb')
defer(zdump.close)
stor = storageFromURL('%s/2.fs' % tmpd)
defer(stor.close)
zodbrestore(stor, zdump)
_()
zfs1 = _readfile("%s/1.fs" % tdata)
zfs2 = _readfile("%s/2.fs" % tmpd)
assert zfs1 == zfs2
# _readfile reads file at path.
def _readfile(path): # -> data(bytes)
with open(path, 'rb') as _:
return _.read()
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2017-2019 Nexedi SA and Contributors. # Copyright (C) 2017-2021 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -37,7 +37,7 @@ def register_command(cmdname): ...@@ -37,7 +37,7 @@ def register_command(cmdname):
command_module = importlib.import_module('zodbtools.zodb' + cmdname) command_module = importlib.import_module('zodbtools.zodb' + cmdname)
command_dict[cmdname] = command_module command_dict[cmdname] = command_module
for _ in ('analyze', 'cmp', 'commit', 'dump', 'info'): for _ in ('analyze', 'cmp', 'commit', 'dump', 'info', 'restore'):
register_command(_) register_command(_)
......
...@@ -41,20 +41,43 @@ can query current database head (last_tid) with `zodb info <stor> last_tid`. ...@@ -41,20 +41,43 @@ can query current database head (last_tid) with `zodb info <stor> last_tid`.
from __future__ import print_function from __future__ import print_function
from zodbtools import zodbdump from zodbtools import zodbdump
from zodbtools.util import ashex, fromhex, storageFromURL from zodbtools.util import ashex, fromhex, storageFromURL
from ZODB.interfaces import IStorageRestoreable
from ZODB.utils import p64, u64, z64 from ZODB.utils import p64, u64, z64
from ZODB.POSException import POSKeyError from ZODB.POSException import POSKeyError
from ZODB._compat import BytesIO from ZODB._compat import BytesIO
from golang import func, defer, panic from golang import func, defer, panic
import warnings
# zodbcommit commits new transaction into ZODB storage with data specified by # zodbcommit commits new transaction into ZODB storage with data specified by
# zodbdump transaction. # zodbdump transaction.
# #
# txn.tid is ignored. # txn.tid acts as a flag:
# tid of committed transaction is returned. # - with tid=0 the transaction is committed regularly.
# - with tid=!0 the transaction is recreated with exactly that tid via IStorageRestoreable.
#
# tid of created transaction is returned.
_norestoreWarned = set() # of storage class
def zodbcommit(stor, at, txn): def zodbcommit(stor, at, txn):
assert isinstance(txn, zodbdump.Transaction) assert isinstance(txn, zodbdump.Transaction)
want_restore = (txn.tid != z64)
have_restore = IStorageRestoreable.providedBy(stor)
# warn once if stor does not implement IStorageRestoreable
if want_restore and (not have_restore):
if type(stor) not in _norestoreWarned:
warnings.warn("restore: %s does not provide IStorageRestoreable ...\n"
"\t... -> will try to emulate it on best-effort basis." %
type(stor), RuntimeWarning)
_norestoreWarned.add(type(stor))
if want_restore:
# even if stor might be not providing IStorageRestoreable and not
# supporting .restore, it can still support .tpc_begin(tid=...). An example
# of this is NEO. We anyway need to be able to specify which transaction ID
# we need to restore transaction with.
stor.tpc_begin(txn, tid=txn.tid)
else:
stor.tpc_begin(txn) stor.tpc_begin(txn)
def _(): def _():
...@@ -62,11 +85,9 @@ def zodbcommit(stor, at, txn): ...@@ -62,11 +85,9 @@ def zodbcommit(stor, at, txn):
return _serial_at(stor, oid, at) return _serial_at(stor, oid, at)
for obj in txn.objv: for obj in txn.objv:
data = None # data do be committed - setup vvv data = None # data do be committed - setup vvv
copy_from = None
if isinstance(obj, zodbdump.ObjectCopy): if isinstance(obj, zodbdump.ObjectCopy):
# NEO does not support restore, and even if stor supports restore, copy_from = obj.copy_from
# going that way requires to already know tid for transaction we are
# committing. -> we just imitate copy by actually copying data and
# letting the storage deduplicate it.
data, _, _ = stor.loadBefore(obj.oid, p64(u64(obj.copy_from)+1)) data, _, _ = stor.loadBefore(obj.oid, p64(u64(obj.copy_from)+1))
elif isinstance(obj, zodbdump.ObjectDelete): elif isinstance(obj, zodbdump.ObjectDelete):
...@@ -82,11 +103,19 @@ def zodbcommit(stor, at, txn): ...@@ -82,11 +103,19 @@ def zodbcommit(stor, at, txn):
else: else:
panic('invalid object record: %r' % (obj,)) panic('invalid object record: %r' % (obj,))
# we have the data -> store the object. # we have the data -> restore/store the object.
# if it will be ConflictError - we just fail and let the caller retry. # if it will be ConflictError - we just fail and let the caller retry.
if data is None: if data is None:
stor.deleteObject(obj.oid, current_serial(obj.oid), txn) stor.deleteObject(obj.oid, current_serial(obj.oid), txn)
else: else:
if want_restore and have_restore:
stor.restore(obj.oid, txn.tid, data, '', copy_from, txn)
else:
# FIXME we don't handle copy_from on commit
# NEO does not support restore, and even if stor supports restore,
# going that way requires to already know tid for transaction we are
# committing. -> we just imitate copy by actually copying data and
# letting the storage deduplicate it.
stor.store(obj.oid, current_serial(obj.oid), data, '', txn) stor.store(obj.oid, current_serial(obj.oid), data, '', txn)
try: try:
...@@ -103,6 +132,9 @@ def zodbcommit(stor, at, txn): ...@@ -103,6 +132,9 @@ def zodbcommit(stor, at, txn):
stor.tpc_finish(txn, lambda tid: _.append(tid)) stor.tpc_finish(txn, lambda tid: _.append(tid))
assert len(_) == 1 assert len(_) == 1
tid = _[0] tid = _[0]
if want_restore and (tid != txn.tid):
panic('restore: restored transaction has tid=%s, but requested was tid=%s' %
(ashex(tid), ashex(txn.tid)))
return tid return tid
# _serial_at returns oid's serial as of @at database state. # _serial_at returns oid's serial as of @at database state.
...@@ -167,7 +199,9 @@ def main(argv): ...@@ -167,7 +199,9 @@ def main(argv):
stor = storageFromURL(storurl) stor = storageFromURL(storurl)
defer(stor.close) defer(stor.close)
zin = b'txn 0000000000000000 " "\n' # artificial transaction header # artificial transaction header with tid=0 to request regular commit
zin = b'txn 0000000000000000 " "\n'
zin += sys.stdin.read() zin += sys.stdin.read()
zin = BytesIO(zin) zin = BytesIO(zin)
zr = zodbdump.DumpReader(zin) zr = zodbdump.DumpReader(zin)
......
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (C) 2016-2020 Nexedi SA and Contributors. # Copyright (C) 2016-2021 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com> # Kirill Smelkov <kirr@nexedi.com>
# Jérome Perrin <jerome@nexedi.com> # Jérome Perrin <jerome@nexedi.com>
# #
...@@ -26,8 +26,8 @@ transaction prints transaction's header and information about changed objects. ...@@ -26,8 +26,8 @@ transaction prints transaction's header and information about changed objects.
The information dumped is complete raw information as stored in ZODB storage The information dumped is complete raw information as stored in ZODB storage
and should be suitable for restoring the database from the dump file bit-to-bit and should be suitable for restoring the database from the dump file bit-to-bit
identical to its original(*). It is dumped in semi text-binary format where identical to its original(*) via Zodbrestore. It is dumped in semi text-binary
object data is output as raw binary and everything else is text. format where object data is output as raw binary and everything else is text.
There is also shortened mode activated via --hashonly where only hash of object There is also shortened mode activated via --hashonly where only hash of object
data is printed without content. data is printed without content.
......
# Copyright (C) 2021 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""Zodbrestore - Restore content of a ZODB database.
Zodbrestore reads transactions from zodbdump output and recreates them in a
ZODB storage. See Zodbdump documentation for details.
"""
from __future__ import print_function
from zodbtools.zodbdump import DumpReader
from zodbtools.zodbcommit import zodbcommit
from zodbtools.util import asbinstream, ashex, storageFromURL
from golang import func, defer
# zodbrestore restores transactions read from reader r in zodbdump format.
#
# restoredf, if !None, is called for every restored transaction.
def zodbrestore(stor, r, restoredf=None):
zr = DumpReader(r)
at = stor.lastTransaction()
while 1:
txn = zr.readtxn()
if txn is None:
break
zodbcommit(stor, at, txn)
if restoredf != None:
restoredf(txn)
at = txn.tid
# ----------------------------------------
import sys, getopt
summary = "restore content of a ZODB database"
def usage(out):
print("""\
Usage: zodb restore [OPTIONS] <storage> < input
Restore content of a ZODB database.
The transactions to restore are read from stdin in zodbdump format.
On success the ID of every restored transaction is printed to stdout.
<storage> is an URL (see 'zodb help zurl') of a ZODB-storage.
Options:
-h --help show this help
""", file=out)
@func
def main(argv):
try:
optv, argv = getopt.getopt(argv[1:], "h", ["help"])
except getopt.GetoptError as e:
print(e, file=sys.stderr)
usage(sys.stderr)
sys.exit(2)
for opt, _ in optv:
if opt in ("-h", "--help"):
usage(sys.stdout)
sys.exit(0)
if len(argv) != 1:
usage(sys.stderr)
sys.exit(2)
storurl = argv[0]
stor = storageFromURL(storurl)
defer(stor.close)
def _(txn):
print(ashex(txn.tid))
zodbrestore(stor, asbinstream(sys.stdin), _)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment