Commit 15b62484 authored by Marius Gedminas's avatar Marius Gedminas

repozo --verify

Could be used as a workaround for https://bugs.launchpad.net/zodb/+bug/906057
parent 70a03207
#!/usr/bin/env python2.3 #!/usr/bin/env python
# repozo.py -- incremental and full backups of a Data.fs file. # repozo.py -- incremental and full backups of a Data.fs file.
# #
...@@ -18,6 +18,9 @@ Where: ...@@ -18,6 +18,9 @@ Where:
-R / --recover -R / --recover
Restore a ZODB file from a backup. Restore a ZODB file from a backup.
-V / --verify
Verify backup integrity.
-v / --verbose -v / --verbose
Verbose mode. Verbose mode.
...@@ -69,18 +72,17 @@ Options for -R/--recover: ...@@ -69,18 +72,17 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored Note: for the stdout case, the index file will **not** be restored
automatically. automatically.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
""" """
from __future__ import print_function from __future__ import print_function
import os import os
import shutil import shutil
import sys import sys
from six.moves import filter from six.moves import filter
try: from hashlib import md5
# the hashlib package is available from Python 2.5
from hashlib import md5
except ImportError:
# the md5 package is deprecated in Python 2.6
from md5 import new as md5
import gzip import gzip
import time import time
import errno import errno
...@@ -92,6 +94,7 @@ program = sys.argv[0] ...@@ -92,6 +94,7 @@ program = sys.argv[0]
BACKUP = 1 BACKUP = 1
RECOVER = 2 RECOVER = 2
VERIFY = 3
COMMASPACE = ', ' COMMASPACE = ', '
READCHUNK = 16 * 1024 READCHUNK = 16 * 1024
...@@ -124,12 +127,17 @@ def log(msg, *args): ...@@ -124,12 +127,17 @@ def log(msg, *args):
print(msg % args, file=sys.stderr) print(msg % args, file=sys.stderr)
def error(msg, *args):
print(msg % args, file=sys.stderr)
def parseargs(argv): def parseargs(argv):
global VERBOSE global VERBOSE
try: try:
opts, args = getopt.getopt(argv, 'BRvhr:f:FQzkD:o:', opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
['backup', ['backup',
'recover', 'recover',
'verify'
'verbose', 'verbose',
'help', 'help',
'repository=', 'repository=',
...@@ -145,7 +153,7 @@ def parseargs(argv): ...@@ -145,7 +153,7 @@ def parseargs(argv):
usage(1, msg) usage(1, msg)
class Options: class Options:
mode = None # BACKUP or RECOVER mode = None # BACKUP, RECOVER or VERIFY
file = None # name of input Data.fs file file = None # name of input Data.fs file
repository = None # name of directory holding backups repository = None # name of directory holding backups
full = False # True forces full backup full = False # True forces full backup
...@@ -164,12 +172,16 @@ def parseargs(argv): ...@@ -164,12 +172,16 @@ def parseargs(argv):
VERBOSE = True VERBOSE = True
elif opt in ('-R', '--recover'): elif opt in ('-R', '--recover'):
if options.mode is not None: if options.mode is not None:
usage(1, '-B and -R are mutually exclusive') usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = RECOVER options.mode = RECOVER
elif opt in ('-B', '--backup'): elif opt in ('-B', '--backup'):
if options.mode is not None: if options.mode is not None:
usage(1, '-B and -R are mutually exclusive') usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = BACKUP options.mode = BACKUP
elif opt in ('-V', '--verify'):
if options.mode is not None:
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = VERIFY
elif opt in ('-Q', '--quick'): elif opt in ('-Q', '--quick'):
options.quick = True options.quick = True
elif opt in ('-f', '--file'): elif opt in ('-f', '--file'):
...@@ -195,7 +207,7 @@ def parseargs(argv): ...@@ -195,7 +207,7 @@ def parseargs(argv):
# Sanity checks # Sanity checks
if options.mode is None: if options.mode is None:
usage(1, 'Either --backup or --recover is required') usage(1, 'Either --backup, --recover or --verify is required')
if options.repository is None: if options.repository is None:
usage(1, '--repository is required') usage(1, '--repository is required')
if options.mode == BACKUP: if options.mode == BACKUP:
...@@ -205,14 +217,33 @@ def parseargs(argv): ...@@ -205,14 +217,33 @@ def parseargs(argv):
if options.output is not None: if options.output is not None:
log('--output option is ignored in backup mode') log('--output option is ignored in backup mode')
options.output = None options.output = None
else: elif options.mode == RECOVER:
assert options.mode == RECOVER
if options.file is not None: if options.file is not None:
log('--file option is ignored in recover mode') log('--file option is ignored in recover mode')
options.file = None options.file = None
if options.killold is not None: if options.killold:
log('--kill-old-on-full option is ignored in recover mode') log('--kill-old-on-full option is ignored in recover mode')
options.killold = None options.killold = False
else:
assert options.mode == VERIFY
if options.date is not None:
log("--date option is ignored in verify mode")
options.date = None
if options.output is not None:
log('--output option is ignored in verify mode')
options.output = None
if options.full:
log('--full option is ignored in verify mode')
options.full = False
if options.gzip:
log('--gzip option is ignored in verify mode')
options.gzip = False
if options.file is not None:
log('--file option is ignored in verify mode')
options.file = None
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
return options return options
...@@ -608,6 +639,39 @@ def do_recover(options): ...@@ -608,6 +639,39 @@ def do_recover(options):
log('No index file to restore: %s', source_index) log('No index file to restore: %s', source_index)
def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file
repofiles = find_files(options)
if not repofiles:
raise NoFiles('No files in repository')
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
expected_size = endpos - startpos
log("Verifying %s", filename)
# XXX: if the file is gzipped, we need to unzip it
try:
fp = open(filename, 'rb')
except IOError:
error("%s is missing", filename)
else:
size = os.fstat(fp.fileno()).st_size
if size != expected_size:
error("%s is %d bytes, should be %d bytes", filename,
size, expected_size)
elif not options.quick:
actual_sum = checksum(fp, size)
if actual_sum != sum:
error("%s has checksum %s instead of %s", filename,
actual_sum, sum)
fp.close()
def main(argv=None): def main(argv=None):
if argv is None: if argv is None:
argv = sys.argv[1:] argv = sys.argv[1:]
...@@ -616,15 +680,18 @@ def main(argv=None): ...@@ -616,15 +680,18 @@ def main(argv=None):
try: try:
do_backup(options) do_backup(options)
except WouldOverwriteFiles as e: except WouldOverwriteFiles as e:
print(str(e), file=sys.stderr) sys.exit(str(e))
sys.exit(1) elif options.mode == RECOVER:
else:
assert options.mode == RECOVER
try: try:
do_recover(options) do_recover(options)
except NoFiles as e: except NoFiles as e:
print(str(e), file=sys.stderr) sys.exit(str(e))
sys.exit(1) else:
assert options.mode == VERIFY
try:
do_verify(options)
except NoFiles as e:
sys.exit(str(e))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -198,9 +198,10 @@ class OptionsTestBase: ...@@ -198,9 +198,10 @@ class OptionsTestBase:
def _makeOptions(self, **kw): def _makeOptions(self, **kw):
import tempfile import tempfile
self._repository_directory = tempfile.mkdtemp() self._repository_directory = tempfile.mkdtemp(prefix='test-repozo-')
class Options(object): class Options(object):
repository = self._repository_directory repository = self._repository_directory
date = None
def __init__(self, **kw): def __init__(self, **kw):
self.__dict__.update(kw) self.__dict__.update(kw)
return Options(**kw) return Options(**kw)
...@@ -789,6 +790,90 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -789,6 +790,90 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB') self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC') self.assertEqual(_read_file(index), b'CCC')
class Test_do_verify(OptionsTestBase, unittest.TestCase):
def _callFUT(self, options):
from ZODB.scripts import repozo
errors = []
orig_error = repozo.error
def _error(msg, *args):
errors.append(msg % args)
repozo.error = _error
try:
repozo.do_verify(options)
return errors
finally:
repozo.error = orig_error
def _makeFile(self, hour, min, sec, ext, text=None):
assert self._repository_directory, 'call _makeOptions first!'
name = '2010-05-14-%02d-%02d-%02d%s' % (hour, min, sec, ext)
if text is None:
text = name
fqn = os.path.join(self._repository_directory, name)
f = _write_file(fqn, text.encode())
return fqn
def test_no_files(self):
from ZODB.scripts.repozo import NoFiles
options = self._makeOptions()
self.assertRaises(NoFiles, self._callFUT, options)
def test_all_is_fine(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])
def test_missing_file(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is missing'])
def test_bad_size(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is 3 bytes,'
' should be 4 bytes'])
def test_bad_checksum(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BbBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs has checksum'
' 36486440db255f0ee6ab109d5d231406 instead of'
' f50881ced34c7d9e6bce100bf33dec60'])
def test_quick_ignores_checksums(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])
class MonteCarloTests(unittest.TestCase): class MonteCarloTests(unittest.TestCase):
layer = ZODB.tests.util.MininalTestLayer('repozo') layer = ZODB.tests.util.MininalTestLayer('repozo')
...@@ -902,6 +987,7 @@ def test_suite(): ...@@ -902,6 +987,7 @@ def test_suite():
unittest.makeSuite(Test_do_incremental_backup), unittest.makeSuite(Test_do_incremental_backup),
#unittest.makeSuite(Test_do_backup), #TODO #unittest.makeSuite(Test_do_backup), #TODO
unittest.makeSuite(Test_do_recover), unittest.makeSuite(Test_do_recover),
unittest.makeSuite(Test_do_verify),
# N.B.: this test take forever to run (~40sec on a fast laptop), # N.B.: this test take forever to run (~40sec on a fast laptop),
# *and* it is non-deterministic. # *and* it is non-deterministic.
unittest.makeSuite(MonteCarloTests), unittest.makeSuite(MonteCarloTests),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment