Commit 1f394d5a authored by Nicolas Wavrant's avatar Nicolas Wavrant

repozo: recover action can verify data on the fly

parent d6bcc5a4
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
5.5.2 (unreleased) 5.5.2 (unreleased)
================== ==================
- TBD - Add a new option to repozo in recover mode which allows to verify
backups integrity on the fly.
5.5.1 (2018-10-25) 5.5.1 (2018-10-25)
================== ==================
......
...@@ -73,6 +73,12 @@ Options for -R/--recover: ...@@ -73,6 +73,12 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored Note: for the stdout case, the index file will **not** be restored
automatically. automatically.
-w
--with-verification
Verify on the fly the backup files on recovering. This option runs
the same checks as when repozo is run in -V/--verify mode, and
allows to verify and recover a backup in one single step.
Options for -V/--verify: Options for -V/--verify:
-Q / --quick -Q / --quick
Verify file sizes only (skip md5 checksums). Verify file sizes only (skip md5 checksums).
...@@ -108,6 +114,8 @@ class WouldOverwriteFiles(Exception): ...@@ -108,6 +114,8 @@ class WouldOverwriteFiles(Exception):
class NoFiles(Exception): class NoFiles(Exception):
pass pass
class VerificationFail(Exception):
pass
class _GzipCloser(object): class _GzipCloser(object):
...@@ -146,7 +154,7 @@ def error(msg, *args): ...@@ -146,7 +154,7 @@ def error(msg, *args):
def parseargs(argv): def parseargs(argv):
global VERBOSE global VERBOSE
try: try:
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:', opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
['backup', ['backup',
'recover', 'recover',
'verify', 'verify',
...@@ -160,6 +168,7 @@ def parseargs(argv): ...@@ -160,6 +168,7 @@ def parseargs(argv):
'kill-old-on-full', 'kill-old-on-full',
'date=', 'date=',
'output=', 'output=',
'with-verification',
]) ])
except getopt.error as msg: except getopt.error as msg:
usage(1, msg) usage(1, msg)
...@@ -174,6 +183,7 @@ def parseargs(argv): ...@@ -174,6 +183,7 @@ def parseargs(argv):
quick = False # -Q flag state quick = False # -Q flag state
gzip = False # -z flag state gzip = False # -z flag state
killold = False # -k flag state killold = False # -k flag state
withverify = False # -w flag state
options = Options() options = Options()
...@@ -210,6 +220,8 @@ def parseargs(argv): ...@@ -210,6 +220,8 @@ def parseargs(argv):
options.gzip = True options.gzip = True
elif opt in ('-k', '--kill-old-on-full'): elif opt in ('-k', '--kill-old-on-full'):
options.killold = True options.killold = True
elif opt in ('-w', '--with-verify'):
options.withverify = True
else: else:
assert False, (opt, arg) assert False, (opt, arg)
...@@ -229,6 +241,9 @@ def parseargs(argv): ...@@ -229,6 +241,9 @@ def parseargs(argv):
if options.output is not None: if options.output is not None:
log('--output option is ignored in backup mode') log('--output option is ignored in backup mode')
options.output = None options.output = None
if options.withverify is not None:
log('--with-verify option is ignored in backup mode')
options.withverify = None
elif options.mode == RECOVER: elif options.mode == RECOVER:
if options.file is not None: if options.file is not None:
log('--file option is ignored in recover mode') log('--file option is ignored in recover mode')
...@@ -256,6 +271,9 @@ def parseargs(argv): ...@@ -256,6 +271,9 @@ def parseargs(argv):
if options.killold: if options.killold:
log('--kill-old-on-full option is ignored in verify mode') log('--kill-old-on-full option is ignored in verify mode')
options.killold = False options.killold = False
if options.withverify is not None:
log('--with-verify option is ignored in verify mode')
options.withverify = None
return options return options
...@@ -649,10 +667,40 @@ def do_recover(options): ...@@ -649,10 +667,40 @@ def do_recover(options):
else: else:
log('Recovering file to %s', options.output) log('Recovering file to %s', options.output)
outfp = open(options.output, 'wb') outfp = open(options.output, 'wb')
reposz, reposum = concat(repofiles, outfp) if options.withverify:
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
truth_dict = {}
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
truth_dict[filename] = {
'size': endpos - startpos,
'sum': sum,
}
totalsz = 0
for repofile in repofiles:
reposz, reposum = concat([repofile], outfp)
expected_truth = truth_dict[repofile]
if reposz != expected_truth['size']:
raise VerificationFail(
"%s is %d bytes, should be %d bytes" % (
repofile, reposz, expected_truth['size']))
if reposum != expected_truth['sum']:
raise VerificationFail(
"%s has checksum %s instead of %s" % (
repofile, reposum, expected_truth['sum']))
totalsz += reposz
log("Recovered chunk %s : %s bytes, md5: %s", repofile, reposz, reposum)
log("Recovered a total of %s bytes", totalsz)
else:
reposz, reposum = concat(repofiles, outfp)
log('Recovered %s bytes, md5: %s', reposz, reposum)
if outfp != sys.stdout: if outfp != sys.stdout:
outfp.close() outfp.close()
log('Recovered %s bytes, md5: %s', reposz, reposum)
if options.output is not None: if options.output is not None:
last_base = os.path.splitext(repofiles[-1])[0] last_base = os.path.splitext(repofiles[-1])[0]
...@@ -728,6 +776,8 @@ def main(argv=None): ...@@ -728,6 +776,8 @@ def main(argv=None):
do_backup(options) do_backup(options)
except WouldOverwriteFiles as e: except WouldOverwriteFiles as e:
sys.exit(str(e)) sys.exit(str(e))
except VerificationFail as e:
sys.exit(str(e))
elif options.mode == RECOVER: elif options.mode == RECOVER:
try: try:
do_recover(options) do_recover(options)
......
...@@ -872,7 +872,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -872,7 +872,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB') self._makeFile(4, 5, 6, '.fs', 'BBB')
self._callFUT(options) self._callFUT(options)
...@@ -884,7 +885,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -884,7 +885,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB') self._makeFile(4, 5, 6, '.fs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC') self._makeFile(4, 5, 6, '.index', 'CCC')
...@@ -898,7 +900,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -898,7 +900,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB') self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._callFUT(options) self._callFUT(options)
...@@ -910,7 +913,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -910,7 +913,8 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB') self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC') self._makeFile(4, 5, 6, '.index', 'CCC')
...@@ -918,6 +922,54 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -918,6 +922,54 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB') self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC') self.assertEqual(_read_file(index), b'CCC')
def test_w_incr_backup_with_verify_all_is_fine(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self._callFUT(options)
self.assertEqual(_read_file(output), b'AAABBBB')
def test_w_incr_backup_with_verify_sum_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp()
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n')
self.assertRaises(VerificationFail, self._callFUT, options)
def test_w_incr_backup_with_verify_size_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp()
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertRaises(VerificationFail, self._callFUT, options)
class Test_do_verify(OptionsTestBase, unittest.TestCase): class Test_do_verify(OptionsTestBase, unittest.TestCase):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment