Commit a187da03 authored by Tres Seaver's avatar Tres Seaver

Merge pull request #14 from zopefoundation/repozo-verify

This adds 'repozo --verify' to check your backup integrity
parents 70a03207 54b06570
#!/usr/bin/env python2.3
#!/usr/bin/env python
# repozo.py -- incremental and full backups of a Data.fs file.
#
......@@ -18,6 +18,9 @@ Where:
-R / --recover
Restore a ZODB file from a backup.
-V / --verify
Verify backup integrity.
-v / --verbose
Verbose mode.
......@@ -69,18 +72,17 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored
automatically.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
"""
from __future__ import print_function
import os
import shutil
import sys
from six.moves import filter
try:
# the hashlib package is available from Python 2.5
from hashlib import md5
except ImportError:
# the md5 package is deprecated in Python 2.6
from md5 import new as md5
from hashlib import md5
import gzip
import time
import errno
......@@ -92,6 +94,7 @@ program = sys.argv[0]
BACKUP = 1
RECOVER = 2
VERIFY = 3
COMMASPACE = ', '
READCHUNK = 16 * 1024
......@@ -106,6 +109,18 @@ class NoFiles(Exception):
pass
class _GzipCloser(object):
def __init__(self, fqn, mode):
self._opened = gzip.open(fqn, mode)
def __enter__(self):
return self._opened
def __exit__(self, exc_type, exc_value, traceback):
self._opened.close()
def usage(code, msg=''):
outfp = sys.stderr
if code == 0:
......@@ -124,12 +139,17 @@ def log(msg, *args):
print(msg % args, file=sys.stderr)
def error(msg, *args):
print(msg % args, file=sys.stderr)
def parseargs(argv):
global VERBOSE
try:
opts, args = getopt.getopt(argv, 'BRvhr:f:FQzkD:o:',
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
['backup',
'recover',
'verify'
'verbose',
'help',
'repository=',
......@@ -145,7 +165,7 @@ def parseargs(argv):
usage(1, msg)
class Options:
mode = None # BACKUP or RECOVER
mode = None # BACKUP, RECOVER or VERIFY
file = None # name of input Data.fs file
repository = None # name of directory holding backups
full = False # True forces full backup
......@@ -164,12 +184,16 @@ def parseargs(argv):
VERBOSE = True
elif opt in ('-R', '--recover'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = RECOVER
elif opt in ('-B', '--backup'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = BACKUP
elif opt in ('-V', '--verify'):
if options.mode is not None:
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = VERIFY
elif opt in ('-Q', '--quick'):
options.quick = True
elif opt in ('-f', '--file'):
......@@ -195,7 +219,7 @@ def parseargs(argv):
# Sanity checks
if options.mode is None:
usage(1, 'Either --backup or --recover is required')
usage(1, 'Either --backup, --recover or --verify is required')
if options.repository is None:
usage(1, '--repository is required')
if options.mode == BACKUP:
......@@ -205,14 +229,33 @@ def parseargs(argv):
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
else:
assert options.mode == RECOVER
elif options.mode == RECOVER:
if options.file is not None:
log('--file option is ignored in recover mode')
options.file = None
if options.killold is not None:
if options.killold:
log('--kill-old-on-full option is ignored in recover mode')
options.killold = None
options.killold = False
else:
assert options.mode == VERIFY
if options.date is not None:
log("--date option is ignored in verify mode")
options.date = None
if options.output is not None:
log('--output option is ignored in verify mode')
options.output = None
if options.full:
log('--full option is ignored in verify mode')
options.full = False
if options.gzip:
log('--gzip option is ignored in verify mode')
options.gzip = False
if options.file is not None:
log('--file option is ignored in verify mode')
options.file = None
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
return options
......@@ -256,6 +299,22 @@ def checksum(fp, n):
return sum.hexdigest()
def file_size(fp):
# Compute number of bytes that can be read from fp
def func(data):
pass
return dofile(func, fp, None)
def checksum_and_size(fp):
# Checksum and return it with the size of the file
sum = md5()
def func(data):
sum.update(data)
size = dofile(func, fp, None)
return sum.hexdigest(), size
def copyfile(options, dst, start, n):
# Copy bytes from file src, to file dst, starting at offset start, for n
# length of bytes. For robustness, we first write, flush and fsync
......@@ -608,6 +667,60 @@ def do_recover(options):
log('No index file to restore: %s', source_index)
def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file
repofiles = find_files(options)
if not repofiles:
raise NoFiles('No files in repository')
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
expected_size = endpos - startpos
log("Verifying %s", filename)
try:
if filename.endswith('fsz'):
actual_sum, size = get_checksum_and_size_of_gzipped_file(filename, options.quick)
when_uncompressed = ' (when uncompressed)'
else:
actual_sum, size = get_checksum_and_size_of_file(filename, options.quick)
when_uncompressed = ''
except IOError:
error("%s is missing", filename)
continue
if size != expected_size:
error("%s is %d bytes%s, should be %d bytes", filename,
size, when_uncompressed, expected_size)
elif not options.quick:
if actual_sum != sum:
error("%s has checksum %s%s instead of %s", filename,
actual_sum, when_uncompressed, sum)
def get_checksum_and_size_of_gzipped_file(filename, quick):
with _GzipCloser(filename, 'rb') as fp:
if quick:
return None, file_size(fp)
else:
return checksum_and_size(fp)
def get_checksum_and_size_of_file(filename, quick):
with open(filename, 'rb') as fp:
fp.seek(0, 2)
actual_size = fp.tell()
if quick:
actual_sum = None
else:
fp.seek(0)
actual_sum = checksum(fp, actual_size)
return actual_sum, actual_size
def main(argv=None):
if argv is None:
argv = sys.argv[1:]
......@@ -616,15 +729,18 @@ def main(argv=None):
try:
do_backup(options)
except WouldOverwriteFiles as e:
print(str(e), file=sys.stderr)
sys.exit(1)
else:
assert options.mode == RECOVER
sys.exit(str(e))
elif options.mode == RECOVER:
try:
do_recover(options)
except NoFiles as e:
print(str(e), file=sys.stderr)
sys.exit(1)
sys.exit(str(e))
else:
assert options.mode == VERIFY
try:
do_verify(options)
except NoFiles as e:
sys.exit(str(e))
if __name__ == '__main__':
......
......@@ -14,12 +14,7 @@
from __future__ import print_function
import unittest
import os
try:
# the hashlib package is available from Python 2.5
from hashlib import md5
except ImportError:
# the md5 package is deprecated in Python 2.6
from md5 import new as md5
from hashlib import md5
import ZODB.tests.util # layer used at class scope
......@@ -31,19 +26,6 @@ except ImportError:
_NOISY = os.environ.get('NOISY_REPOZO_TEST_OUTPUT')
class _GzipCloser(object):
def __init__(self, fqn, mode):
import gzip
self._opened = gzip.open(fqn, mode)
def __enter__(self):
return self._opened
def __exit__(self, exc_type, exc_value, traceback):
self._opened.close()
def _write_file(name, bits, mode='wb'):
with open(name, mode) as f:
f.write(bits)
......@@ -198,9 +180,10 @@ class OptionsTestBase:
def _makeOptions(self, **kw):
import tempfile
self._repository_directory = tempfile.mkdtemp()
self._repository_directory = tempfile.mkdtemp(prefix='test-repozo-')
class Options(object):
repository = self._repository_directory
date = None
def __init__(self, **kw):
self.__dict__.update(kw)
return Options(**kw)
......@@ -222,6 +205,7 @@ class Test_copyfile(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(target), b'x' * 100)
def test_w_gzip(self):
from ZODB.scripts.repozo import _GzipCloser
options = self._makeOptions(gzip=True)
source = options.file = os.path.join(self._repository_directory,
'source.txt')
......@@ -240,6 +224,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase):
return concat(files, ofp)
def _makeFile(self, name, text, gzip_file=False):
from ZODB.scripts.repozo import _GzipCloser
import tempfile
if self._repository_directory is None:
self._repository_directory = tempfile.mkdtemp()
......@@ -789,6 +774,150 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC')
class Test_do_verify(OptionsTestBase, unittest.TestCase):
def _callFUT(self, options):
from ZODB.scripts import repozo
errors = []
orig_error = repozo.error
def _error(msg, *args):
errors.append(msg % args)
repozo.error = _error
try:
repozo.do_verify(options)
return errors
finally:
repozo.error = orig_error
def _makeFile(self, hour, min, sec, ext, text=None):
from ZODB.scripts.repozo import _GzipCloser
assert self._repository_directory, 'call _makeOptions first!'
name = '2010-05-14-%02d-%02d-%02d%s' % (hour, min, sec, ext)
if text is None:
text = name
fqn = os.path.join(self._repository_directory, name)
if ext.endswith('fsz'):
_opener = _GzipCloser
else:
_opener = open
with _opener(fqn, 'wb') as f:
f.write(text.encode())
f.flush()
return fqn
def test_no_files(self):
from ZODB.scripts.repozo import NoFiles
options = self._makeOptions()
self.assertRaises(NoFiles, self._callFUT, options)
def test_all_is_fine(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])
def test_all_is_fine_gzip(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])
def test_missing_file(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is missing'])
def test_missing_file_gzip(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafsz is missing'])
def test_bad_size(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is 3 bytes,'
' should be 4 bytes'])
def test_bad_size_gzip(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafsz is 3 bytes (when uncompressed),'
' should be 4 bytes'])
def test_bad_checksum(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BbBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs has checksum'
' 36486440db255f0ee6ab109d5d231406 instead of'
' f50881ced34c7d9e6bce100bf33dec60'])
def test_bad_checksum_gzip(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BbBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafsz has checksum'
' 36486440db255f0ee6ab109d5d231406 (when uncompressed) instead of'
' f50881ced34c7d9e6bce100bf33dec60'])
def test_quick_ignores_checksums(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])
def test_quick_ignores_checksums_gzip(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])
class MonteCarloTests(unittest.TestCase):
layer = ZODB.tests.util.MininalTestLayer('repozo')
......@@ -902,6 +1031,7 @@ def test_suite():
unittest.makeSuite(Test_do_incremental_backup),
#unittest.makeSuite(Test_do_backup), #TODO
unittest.makeSuite(Test_do_recover),
unittest.makeSuite(Test_do_verify),
# N.B.: this test take forever to run (~40sec on a fast laptop),
# *and* it is non-deterministic.
unittest.makeSuite(MonteCarloTests),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment