Commit af51e3ae authored by Tim Peters's avatar Tim Peters

find_files(): When trying to do recovery to a time earlier than that

of the most recent full backup, repozo.py failed to find the appropriate
files, erroneously claiming

    No files in repository before <specified time>

Repaired that.  Also made it much more robust against "junk files" the
user may create, or leave behind, in the backup directory.  Added test.
parent fa76112e
......@@ -5,6 +5,11 @@ Release date: DD-MMM-2004
ZODB
----
When trying to do recovery to a time earlier than that of the most recent
full backup, repozo.py failed to find the appropriate files, erroneously
claiming "No files in repository before <specified time>". This has
been repaired.
Collector #1330: repozo.py -R can create corrupt .fs.
When looking for the backup files needed to recreate a Data.fs file,
repozo could (unintentionally) include its meta .dat files in the list,
......
......@@ -53,7 +53,7 @@ Options for -R/--recover:
-D str
--date=str
Recover state as of this date. str is in the format
yyyy-mm-dd[-hh[-mm]]
yyyy-mm-dd[-hh[-mm[-ss]]]
By default, current time is used.
-o filename
......@@ -262,30 +262,32 @@ def gen_filename(options, ext=None):
t = time.gmtime()[:6] + (ext,)
return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
# Return a list of files needed to reproduce state at time options.date.
# This is a list, in chronological order, of the .fs[z] and .deltafs[z]
# files, from the time of the most recent full backup preceding
# options.date, up to options.date.
import re
is_data_file = re.compile(r'\d{4}(?:-\d\d){5}\.(?:delta)?fsz?$').match
del re
def find_files(options):
def rootcmp(x, y):
# This already compares in reverse order
return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0])
when = options.date
if not when:
when = gen_filename(options, '')
log('looking for files between last full backup and %s...', when)
all = os.listdir(options.repository)
all.sort(rootcmp)
all = filter(is_data_file, os.listdir(options.repository))
all.sort()
all.reverse() # newest file first
# Find the last full backup before date, then include all the
# incrementals between that full backup and "when".
needed = []
for fname in all:
root, ext = os.path.splitext(fname)
if root <= when and ext in ('.fs', '.fsz', '.deltafs', '.deltafsz'):
if root <= when:
needed.append(fname)
if ext in ('.fs', '.fsz'):
break
if ext in ('.fs', '.fsz'):
break
# Make the file names relative to the repository directory
needed = [os.path.join(options.repository, f) for f in needed]
# Restore back to chronological order
......
......@@ -26,6 +26,7 @@ import random
import time
import glob
import sys
import shutil
import ZODB
from ZODB import FileStorage
......@@ -68,34 +69,49 @@ class OurDB:
self.db.close()
self.db = None
# Do recovery to current time, and check that it's identical to Data.fs.
def check():
os.system(PYTHON + '../repozo.py -vRr backup -o Copy.fs')
f = file('Data.fs', 'rb')
# Do recovery to time 'when', and check that it's identical to correctpath.
def check(correctpath='Data.fs', when=None):
if when is None:
extra = ''
else:
extra = ' -D ' + when
cmd = PYTHON + '../repozo.py -vRr backup -o Copy.fs' + extra
os.system(cmd)
f = file(correctpath, 'rb')
g = file('Copy.fs', 'rb')
fguts = f.read()
gguts = g.read()
f.close()
g.close()
if fguts != gguts:
raise ValueError("guts don't match")
raise ValueError("guts don't match\n"
" correctpath=%r when=%r\n"
" cmd=%r" % (correctpath, when, cmd))
def mutatedb(db):
# Make random mutations to the btree in the database.
tree = db.gettree()
for dummy in range(100):
if random.random() < 0.6:
tree[random.randrange(100000)] = random.randrange(100000)
else:
keys = tree.keys()
if keys:
del tree[keys[0]]
get_transaction().commit()
db.close()
def main():
cleanup()
os.mkdir('backup')
d = OurDB()
for dummy in range(100):
# Every 9th time thru the loop, we save a full copy of Data.fs,
# and at the end we ensure we can reproduce those too.
saved_snapshots = [] # list of (name, time) pairs for copies.
for i in range(100):
# Make some mutations.
tree = d.gettree()
for dummy2 in range(100):
if random.random() < 0.6:
tree[random.randrange(100000)] = random.randrange(100000)
else:
keys = tree.keys()
if keys:
del tree[keys[0]]
get_transaction().commit()
d.close()
mutatedb(d)
# Pack about each tenth time.
if random.random() < 0.1:
......@@ -109,12 +125,23 @@ def main():
else:
os.system(PYTHON + '../repozo.py -zvBQr backup -f Data.fs')
if i % 9 == 0:
copytime = '%04d-%02d-%02d-%02d-%02d-%02d' % (time.gmtime()[:6])
copyname = os.path.join('backup', "Data%d" % i) + '.fs'
shutil.copyfile('Data.fs', copyname)
saved_snapshots.append((copyname, copytime))
# Make sure the clock moves at least a second.
time.sleep(1.01)
# Verify current Data.fs can be reproduced exactly.
check()
# Verify snapshots can be reproduced exactly.
for copyname, copytime in saved_snapshots:
print "Checking that", copyname, "at", copytime, "is reproducible."
check(copyname, copytime)
# Tear it all down.
cleanup()
print 'Test passed!'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment