Commit 1f394d5a authored by Nicolas Wavrant's avatar Nicolas Wavrant

repozo: recover action can verify data on the fly

parent d6bcc5a4
......@@ -5,7 +5,8 @@
5.5.2 (unreleased)
==================
- TBD
- Add a new option to repozo in recover mode which allows to verify
backups integrity on the fly.
5.5.1 (2018-10-25)
==================
......
......@@ -73,6 +73,12 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored
automatically.
-w
--with-verification
Verify on the fly the backup files on recovering. This option runs
the same checks as when repozo is run in -V/--verify mode, and
allows to verify and recover a backup in one single step.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
......@@ -108,6 +114,8 @@ class WouldOverwriteFiles(Exception):
class NoFiles(Exception):
pass
class VerificationFail(Exception):
pass
class _GzipCloser(object):
......@@ -146,7 +154,7 @@ def error(msg, *args):
def parseargs(argv):
global VERBOSE
try:
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
['backup',
'recover',
'verify',
......@@ -160,6 +168,7 @@ def parseargs(argv):
'kill-old-on-full',
'date=',
'output=',
'with-verification',
])
except getopt.error as msg:
usage(1, msg)
......@@ -174,6 +183,7 @@ def parseargs(argv):
quick = False # -Q flag state
gzip = False # -z flag state
killold = False # -k flag state
withverify = False # -w flag state
options = Options()
......@@ -210,6 +220,8 @@ def parseargs(argv):
options.gzip = True
elif opt in ('-k', '--kill-old-on-full'):
options.killold = True
elif opt in ('-w', '--with-verify'):
options.withverify = True
else:
assert False, (opt, arg)
......@@ -229,6 +241,9 @@ def parseargs(argv):
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
if options.withverify is not None:
log('--with-verify option is ignored in backup mode')
options.withverify = None
elif options.mode == RECOVER:
if options.file is not None:
log('--file option is ignored in recover mode')
......@@ -256,6 +271,9 @@ def parseargs(argv):
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
if options.withverify is not None:
log('--with-verify option is ignored in verify mode')
options.withverify = None
return options
......@@ -649,10 +667,40 @@ def do_recover(options):
else:
log('Recovering file to %s', options.output)
outfp = open(options.output, 'wb')
reposz, reposum = concat(repofiles, outfp)
if options.withverify:
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
truth_dict = {}
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
truth_dict[filename] = {
'size': endpos - startpos,
'sum': sum,
}
totalsz = 0
for repofile in repofiles:
reposz, reposum = concat([repofile], outfp)
expected_truth = truth_dict[repofile]
if reposz != expected_truth['size']:
raise VerificationFail(
"%s is %d bytes, should be %d bytes" % (
repofile, reposz, expected_truth['size']))
if reposum != expected_truth['sum']:
raise VerificationFail(
"%s has checksum %s instead of %s" % (
repofile, reposum, expected_truth['sum']))
totalsz += reposz
log("Recovered chunk %s : %s bytes, md5: %s", repofile, reposz, reposum)
log("Recovered a total of %s bytes", totalsz)
else:
reposz, reposum = concat(repofiles, outfp)
log('Recovered %s bytes, md5: %s', reposz, reposum)
if outfp != sys.stdout:
outfp.close()
log('Recovered %s bytes, md5: %s', reposz, reposum)
if options.output is not None:
last_base = os.path.splitext(repofiles[-1])[0]
......@@ -728,6 +776,8 @@ def main(argv=None):
do_backup(options)
except WouldOverwriteFiles as e:
sys.exit(str(e))
except VerificationFail as e:
sys.exit(str(e))
elif options.mode == RECOVER:
try:
do_recover(options)
......
......@@ -872,7 +872,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB')
self._callFUT(options)
......@@ -884,7 +885,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC')
......@@ -898,7 +900,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._callFUT(options)
......@@ -910,7 +913,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC')
......@@ -918,6 +922,54 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC')
def test_w_incr_backup_with_verify_all_is_fine(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self._callFUT(options)
self.assertEqual(_read_file(output), b'AAABBBB')
def test_w_incr_backup_with_verify_sum_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp()
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n')
self.assertRaises(VerificationFail, self._callFUT, options)
def test_w_incr_backup_with_verify_size_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp()
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertRaises(VerificationFail, self._callFUT, options)
class Test_do_verify(OptionsTestBase, unittest.TestCase):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment