Commit 15b62484 authored by Marius Gedminas's avatar Marius Gedminas

repozo --verify

Could be used as a workaround for https://bugs.launchpad.net/zodb/+bug/906057
parent 70a03207
#!/usr/bin/env python2.3
#!/usr/bin/env python
# repozo.py -- incremental and full backups of a Data.fs file.
#
......@@ -18,6 +18,9 @@ Where:
-R / --recover
Restore a ZODB file from a backup.
-V / --verify
Verify backup integrity.
-v / --verbose
Verbose mode.
......@@ -69,18 +72,17 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored
automatically.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
"""
from __future__ import print_function
import os
import shutil
import sys
from six.moves import filter
try:
# the hashlib package is available from Python 2.5
from hashlib import md5
except ImportError:
# the md5 package is deprecated in Python 2.6
from md5 import new as md5
from hashlib import md5
import gzip
import time
import errno
......@@ -92,6 +94,7 @@ program = sys.argv[0]
BACKUP = 1
RECOVER = 2
VERIFY = 3
COMMASPACE = ', '
READCHUNK = 16 * 1024
......@@ -124,12 +127,17 @@ def log(msg, *args):
print(msg % args, file=sys.stderr)
def error(msg, *args):
print(msg % args, file=sys.stderr)
def parseargs(argv):
global VERBOSE
try:
opts, args = getopt.getopt(argv, 'BRvhr:f:FQzkD:o:',
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
['backup',
'recover',
'verify'
'verbose',
'help',
'repository=',
......@@ -145,7 +153,7 @@ def parseargs(argv):
usage(1, msg)
class Options:
mode = None # BACKUP or RECOVER
mode = None # BACKUP, RECOVER or VERIFY
file = None # name of input Data.fs file
repository = None # name of directory holding backups
full = False # True forces full backup
......@@ -164,12 +172,16 @@ def parseargs(argv):
VERBOSE = True
elif opt in ('-R', '--recover'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = RECOVER
elif opt in ('-B', '--backup'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = BACKUP
elif opt in ('-V', '--verify'):
if options.mode is not None:
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = VERIFY
elif opt in ('-Q', '--quick'):
options.quick = True
elif opt in ('-f', '--file'):
......@@ -195,7 +207,7 @@ def parseargs(argv):
# Sanity checks
if options.mode is None:
usage(1, 'Either --backup or --recover is required')
usage(1, 'Either --backup, --recover or --verify is required')
if options.repository is None:
usage(1, '--repository is required')
if options.mode == BACKUP:
......@@ -205,14 +217,33 @@ def parseargs(argv):
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
else:
assert options.mode == RECOVER
elif options.mode == RECOVER:
if options.file is not None:
log('--file option is ignored in recover mode')
options.file = None
if options.killold is not None:
if options.killold:
log('--kill-old-on-full option is ignored in recover mode')
options.killold = None
options.killold = False
else:
assert options.mode == VERIFY
if options.date is not None:
log("--date option is ignored in verify mode")
options.date = None
if options.output is not None:
log('--output option is ignored in verify mode')
options.output = None
if options.full:
log('--full option is ignored in verify mode')
options.full = False
if options.gzip:
log('--gzip option is ignored in verify mode')
options.gzip = False
if options.file is not None:
log('--file option is ignored in verify mode')
options.file = None
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
return options
......@@ -608,6 +639,39 @@ def do_recover(options):
log('No index file to restore: %s', source_index)
def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file
repofiles = find_files(options)
if not repofiles:
raise NoFiles('No files in repository')
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
expected_size = endpos - startpos
log("Verifying %s", filename)
# XXX: if the file is gzipped, we need to unzip it
try:
fp = open(filename, 'rb')
except IOError:
error("%s is missing", filename)
else:
size = os.fstat(fp.fileno()).st_size
if size != expected_size:
error("%s is %d bytes, should be %d bytes", filename,
size, expected_size)
elif not options.quick:
actual_sum = checksum(fp, size)
if actual_sum != sum:
error("%s has checksum %s instead of %s", filename,
actual_sum, sum)
fp.close()
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
......@@ -616,15 +680,18 @@ def main(argv=None):
try:
do_backup(options)
except WouldOverwriteFiles as e:
print(str(e), file=sys.stderr)
sys.exit(1)
else:
assert options.mode == RECOVER
sys.exit(str(e))
elif options.mode == RECOVER:
try:
do_recover(options)
except NoFiles as e:
print(str(e), file=sys.stderr)
sys.exit(1)
sys.exit(str(e))
else:
assert options.mode == VERIFY
try:
do_verify(options)
except NoFiles as e:
sys.exit(str(e))
if __name__ == '__main__':
......
......@@ -198,9 +198,10 @@ class OptionsTestBase:
def _makeOptions(self, **kw):
import tempfile
self._repository_directory = tempfile.mkdtemp()
self._repository_directory = tempfile.mkdtemp(prefix='test-repozo-')
class Options(object):
repository = self._repository_directory
date = None
def __init__(self, **kw):
self.__dict__.update(kw)
return Options(**kw)
......@@ -789,6 +790,90 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC')
class Test_do_verify(OptionsTestBase, unittest.TestCase):
def _callFUT(self, options):
from ZODB.scripts import repozo
errors = []
orig_error = repozo.error
def _error(msg, *args):
errors.append(msg % args)
repozo.error = _error
try:
repozo.do_verify(options)
return errors
finally:
repozo.error = orig_error
def _makeFile(self, hour, min, sec, ext, text=None):
assert self._repository_directory, 'call _makeOptions first!'
name = '2010-05-14-%02d-%02d-%02d%s' % (hour, min, sec, ext)
if text is None:
text = name
fqn = os.path.join(self._repository_directory, name)
f = _write_file(fqn, text.encode())
return fqn
def test_no_files(self):
from ZODB.scripts.repozo import NoFiles
options = self._makeOptions()
self.assertRaises(NoFiles, self._callFUT, options)
def test_all_is_fine(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])
def test_missing_file(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is missing'])
def test_bad_size(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is 3 bytes,'
' should be 4 bytes'])
def test_bad_checksum(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BbBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs has checksum'
' 36486440db255f0ee6ab109d5d231406 instead of'
' f50881ced34c7d9e6bce100bf33dec60'])
def test_quick_ignores_checksums(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])
class MonteCarloTests(unittest.TestCase):
layer = ZODB.tests.util.MininalTestLayer('repozo')
......@@ -902,6 +987,7 @@ def test_suite():
unittest.makeSuite(Test_do_incremental_backup),
#unittest.makeSuite(Test_do_backup), #TODO
unittest.makeSuite(Test_do_recover),
unittest.makeSuite(Test_do_verify),
# N.B.: this test take forever to run (~40sec on a fast laptop),
# *and* it is non-deterministic.
unittest.makeSuite(MonteCarloTests),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment