Commit 334282c3 authored by Nicolas Wavrant's avatar Nicolas Wavrant Committed by GitHub

Merge pull request #235 from Sebatyne/verify-on-recovery

Repozo : add an option to verify on recovery
parents b2895c09 1c6a9828
...@@ -5,7 +5,10 @@ ...@@ -5,7 +5,10 @@
5.5.2 (unreleased) 5.5.2 (unreleased)
================== ==================
- TBD - Make repozo's recover mode atomic by recovering the backup in a
temporary file which is then moved to the expected output file.
- Add a new option to repozo in recover mode which allows to verify
backups integrity on the fly.
5.5.1 (2018-10-25) 5.5.1 (2018-10-25)
================== ==================
......
...@@ -73,6 +73,13 @@ Options for -R/--recover: ...@@ -73,6 +73,13 @@ Options for -R/--recover:
Note: for the stdout case, the index file will **not** be restored Note: for the stdout case, the index file will **not** be restored
automatically. automatically.
-w
--with-verification
Verify on the fly the backup files on recovering. This option runs
the same checks as when repozo is run in -V/--verify mode, and
allows to verify and recover a backup in one single step. If a sanity
check fails, the partially recovered ZODB will be left in place.
Options for -V/--verify: Options for -V/--verify:
-Q / --quick -Q / --quick
Verify file sizes only (skip md5 checksums). Verify file sizes only (skip md5 checksums).
...@@ -101,11 +108,19 @@ READCHUNK = 16 * 1024 ...@@ -101,11 +108,19 @@ READCHUNK = 16 * 1024
VERBOSE = False VERBOSE = False
class WouldOverwriteFiles(Exception): class RepozoError(Exception):
pass
class WouldOverwriteFiles(RepozoError):
pass
class NoFiles(RepozoError):
pass pass
class NoFiles(Exception): class VerificationFail(RepozoError):
pass pass
...@@ -146,7 +161,7 @@ def error(msg, *args): ...@@ -146,7 +161,7 @@ def error(msg, *args):
def parseargs(argv): def parseargs(argv):
global VERBOSE global VERBOSE
try: try:
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:', opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
['backup', ['backup',
'recover', 'recover',
'verify', 'verify',
...@@ -160,6 +175,7 @@ def parseargs(argv): ...@@ -160,6 +175,7 @@ def parseargs(argv):
'kill-old-on-full', 'kill-old-on-full',
'date=', 'date=',
'output=', 'output=',
'with-verification',
]) ])
except getopt.error as msg: except getopt.error as msg:
usage(1, msg) usage(1, msg)
...@@ -174,6 +190,7 @@ def parseargs(argv): ...@@ -174,6 +190,7 @@ def parseargs(argv):
quick = False # -Q flag state quick = False # -Q flag state
gzip = False # -z flag state gzip = False # -z flag state
killold = False # -k flag state killold = False # -k flag state
withverify = False # -w flag state
options = Options() options = Options()
...@@ -210,6 +227,8 @@ def parseargs(argv): ...@@ -210,6 +227,8 @@ def parseargs(argv):
options.gzip = True options.gzip = True
elif opt in ('-k', '--kill-old-on-full'): elif opt in ('-k', '--kill-old-on-full'):
options.killold = True options.killold = True
elif opt in ('-w', '--with-verify'):
options.withverify = True
else: else:
assert False, (opt, arg) assert False, (opt, arg)
...@@ -229,6 +248,9 @@ def parseargs(argv): ...@@ -229,6 +248,9 @@ def parseargs(argv):
if options.output is not None: if options.output is not None:
log('--output option is ignored in backup mode') log('--output option is ignored in backup mode')
options.output = None options.output = None
if options.withverify is not None:
log('--with-verify option is ignored in backup mode')
options.withverify = None
elif options.mode == RECOVER: elif options.mode == RECOVER:
if options.file is not None: if options.file is not None:
log('--file option is ignored in recover mode') log('--file option is ignored in recover mode')
...@@ -256,6 +278,9 @@ def parseargs(argv): ...@@ -256,6 +278,9 @@ def parseargs(argv):
if options.killold: if options.killold:
log('--kill-old-on-full option is ignored in verify mode') log('--kill-old-on-full option is ignored in verify mode')
options.killold = False options.killold = False
if options.withverify is not None:
log('--with-verify option is ignored in verify mode')
options.withverify = None
return options return options
...@@ -360,8 +385,6 @@ def concat(files, ofp=None): ...@@ -360,8 +385,6 @@ def concat(files, ofp=None):
ifp = open(f, 'rb') ifp = open(f, 'rb')
bytesread += dofile(func, ifp) bytesread += dofile(func, ifp)
ifp.close() ifp.close()
if ofp:
ofp.close()
return bytesread, sum.hexdigest() return bytesread, sum.hexdigest()
...@@ -649,12 +672,46 @@ def do_recover(options): ...@@ -649,12 +672,46 @@ def do_recover(options):
log('Recovering file to stdout') log('Recovering file to stdout')
outfp = sys.stdout outfp = sys.stdout
else: else:
# Delete old ZODB before recovering backup as size of
# old ZODB + full partial file may be superior to free disk space
if os.path.exists(options.output):
log('Deleting old %s', options.output)
os.unlink(options.output)
log('Recovering file to %s', options.output) log('Recovering file to %s', options.output)
outfp = open(options.output, 'wb') temporary_output_file = options.output + '.part'
reposz, reposum = concat(repofiles, outfp) outfp = open(temporary_output_file, 'wb')
if outfp != sys.stdout: if options.withverify:
outfp.close() datfile = os.path.splitext(repofiles[0])[0] + '.dat'
log('Recovered %s bytes, md5: %s', reposz, reposum) with open(datfile) as fp:
truth_dict = {}
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
truth_dict[filename] = {
'size': endpos - startpos,
'sum': sum,
}
totalsz = 0
for repofile in repofiles:
reposz, reposum = concat([repofile], outfp)
expected_truth = truth_dict[repofile]
if reposz != expected_truth['size']:
raise VerificationFail(
"%s is %d bytes, should be %d bytes" % (
repofile, reposz, expected_truth['size']))
if reposum != expected_truth['sum']:
raise VerificationFail(
"%s has checksum %s instead of %s" % (
repofile, reposum, expected_truth['sum']))
totalsz += reposz
log("Recovered chunk %s : %s bytes, md5: %s", repofile, reposz, reposum)
log("Recovered a total of %s bytes", totalsz)
else:
reposz, reposum = concat(repofiles, outfp)
log('Recovered %s bytes, md5: %s', reposz, reposum)
if options.output is not None: if options.output is not None:
last_base = os.path.splitext(repofiles[-1])[0] last_base = os.path.splitext(repofiles[-1])[0]
...@@ -666,6 +723,15 @@ def do_recover(options): ...@@ -666,6 +723,15 @@ def do_recover(options):
else: else:
log('No index file to restore: %s', source_index) log('No index file to restore: %s', source_index)
if outfp != sys.stdout:
outfp.close()
try:
os.rename(temporary_output_file, options.output)
except OSError:
log("ZODB has been fully recovered as %s, but it cannot be renamed into : %s",
temporary_output_file, options.output)
raise
def do_verify(options): def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file # Verify the sizes and checksums of all files mentioned in the .dat file
...@@ -725,22 +791,16 @@ def main(argv=None): ...@@ -725,22 +791,16 @@ def main(argv=None):
if argv is None: if argv is None:
argv = sys.argv[1:] argv = sys.argv[1:]
options = parseargs(argv) options = parseargs(argv)
if options.mode == BACKUP: try:
try: if options.mode == BACKUP:
do_backup(options) do_backup(options)
except WouldOverwriteFiles as e: elif options.mode == RECOVER:
sys.exit(str(e))
elif options.mode == RECOVER:
try:
do_recover(options) do_recover(options)
except NoFiles as e: else:
sys.exit(str(e)) assert options.mode == VERIFY
else:
assert options.mode == VERIFY
try:
do_verify(options) do_verify(options)
except NoFiles as e: except (RepozoError, OSError) as e:
sys.exit(str(e)) sys.exit(str(e))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -371,7 +371,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase): ...@@ -371,7 +371,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase):
from ZODB.scripts.repozo import _GzipCloser from ZODB.scripts.repozo import _GzipCloser
import tempfile import tempfile
if self._repository_directory is None: if self._repository_directory is None:
self._repository_directory = tempfile.mkdtemp() self._repository_directory = tempfile.mkdtemp(prefix='zodb-test-')
fqn = os.path.join(self._repository_directory, name) fqn = os.path.join(self._repository_directory, name)
if gzip_file: if gzip_file:
_opener = _GzipCloser _opener = _GzipCloser
...@@ -414,7 +414,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase): ...@@ -414,7 +414,7 @@ class Test_concat(OptionsTestBase, unittest.TestCase):
ofp = Faux() ofp = Faux()
bytes, sum = self._callFUT(files, ofp) bytes, sum = self._callFUT(files, ofp)
self.assertEqual(ofp._written, [x.encode() for x in 'ABC']) self.assertEqual(ofp._written, [x.encode() for x in 'ABC'])
self.assertTrue(ofp._closed) self.assertFalse(ofp._closed)
_marker = object() _marker = object()
class Test_gen_filename(OptionsTestBase, unittest.TestCase): class Test_gen_filename(OptionsTestBase, unittest.TestCase):
...@@ -674,7 +674,7 @@ class Test_do_full_backup(OptionsTestBase, unittest.TestCase): ...@@ -674,7 +674,7 @@ class Test_do_full_backup(OptionsTestBase, unittest.TestCase):
def _makeDB(self): def _makeDB(self):
import tempfile import tempfile
datadir = self._data_directory = tempfile.mkdtemp() datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
return OurDB(self._data_directory) return OurDB(self._data_directory)
def test_dont_overwrite_existing_file(self): def test_dont_overwrite_existing_file(self):
...@@ -729,7 +729,7 @@ class Test_do_incremental_backup(OptionsTestBase, unittest.TestCase): ...@@ -729,7 +729,7 @@ class Test_do_incremental_backup(OptionsTestBase, unittest.TestCase):
def _makeDB(self): def _makeDB(self):
import tempfile import tempfile
datadir = self._data_directory = tempfile.mkdtemp() datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
return OurDB(self._data_directory) return OurDB(self._data_directory)
def test_dont_overwrite_existing_file(self): def test_dont_overwrite_existing_file(self):
...@@ -868,11 +868,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -868,11 +868,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_full_backup_latest_no_index(self): def test_w_full_backup_latest_no_index(self):
import tempfile import tempfile
dd = self._data_directory = tempfile.mkdtemp() dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB') self._makeFile(4, 5, 6, '.fs', 'BBB')
self._callFUT(options) self._callFUT(options)
...@@ -880,11 +881,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -880,11 +881,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_full_backup_latest_index(self): def test_w_full_backup_latest_index(self):
import tempfile import tempfile
dd = self._data_directory = tempfile.mkdtemp() dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB') self._makeFile(4, 5, 6, '.fs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC') self._makeFile(4, 5, 6, '.index', 'CCC')
...@@ -894,11 +896,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -894,11 +896,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_incr_backup_latest_no_index(self): def test_w_incr_backup_latest_no_index(self):
import tempfile import tempfile
dd = self._data_directory = tempfile.mkdtemp() dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB') self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._callFUT(options) self._callFUT(options)
...@@ -906,11 +909,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -906,11 +909,12 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
def test_w_incr_backup_latest_index(self): def test_w_incr_backup_latest_index(self):
import tempfile import tempfile
dd = self._data_directory = tempfile.mkdtemp() dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs') output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index') index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57', options = self._makeOptions(date='2010-05-15-13-30-57',
output=output) output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB') self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC') self._makeFile(4, 5, 6, '.index', 'CCC')
...@@ -918,6 +922,57 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase): ...@@ -918,6 +922,57 @@ class Test_do_recover(OptionsTestBase, unittest.TestCase):
self.assertEqual(_read_file(output), b'AAABBB') self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC') self.assertEqual(_read_file(index), b'CCC')
def test_w_incr_backup_with_verify_all_is_fine(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self._callFUT(options)
self.assertFalse(os.path.exists(output + '.part'))
self.assertEqual(_read_file(output), b'AAABBBB')
def test_w_incr_backup_with_verify_sum_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n')
self.assertRaises(VerificationFail, self._callFUT, options)
self.assertTrue(os.path.exists(output + '.part'))
def test_w_incr_backup_with_verify_size_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertRaises(VerificationFail, self._callFUT, options)
self.assertTrue(os.path.exists(output + '.part'))
class Test_do_verify(OptionsTestBase, unittest.TestCase): class Test_do_verify(OptionsTestBase, unittest.TestCase):
...@@ -1069,7 +1124,7 @@ class MonteCarloTests(unittest.TestCase): ...@@ -1069,7 +1124,7 @@ class MonteCarloTests(unittest.TestCase):
def setUp(self): def setUp(self):
# compute directory names # compute directory names
import tempfile import tempfile
self.basedir = tempfile.mkdtemp() self.basedir = tempfile.mkdtemp(prefix='zodb-test-')
self.backupdir = os.path.join(self.basedir, 'backup') self.backupdir = os.path.join(self.basedir, 'backup')
self.datadir = os.path.join(self.basedir, 'data') self.datadir = os.path.join(self.basedir, 'data')
self.restoredir = os.path.join(self.basedir, 'restore') self.restoredir = os.path.join(self.basedir, 'restore')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment