Commit 595d304c authored by Tim Peters's avatar Tim Peters

Collector #1330: repozo.py -R can create corrupt .fs.

When looking for the backup files needed to recreate a Data.fs file,
repozo could (unintentionally) include its meta .dat files in the list,
or random files of any kind created by the user in the backup directory.
These would then get copied verbatim into the reconstructed file, filling
parts with junk.  Repaired by filtering the file list to include only
files with the data extensions repozo.py creates (.fs, .fsz, .deltafs,
and .deltafsz).  Thanks to James Henderson for the diagnosis.
parent 8d3f8bd4
...@@ -5,12 +5,21 @@ Release date: DD-MMM-2004 ...@@ -5,12 +5,21 @@ Release date: DD-MMM-2004
ZODB ZODB
---- ----
Collector #1330: repozo.py -R can create corrupt .fs.
When looking for the backup files needed to recreate a Data.fs file,
repozo could (unintentionally) include its meta .dat files in the list,
or random files of any kind created by the user in the backup directory.
These would then get copied verbatim into the reconstructed file, filling
parts with junk. Repaired by filtering the file list to include only
files with the data extensions repozo.py creates (.fs, .fsz, .deltafs,
and .deltafsz). Thanks to James Henderson for the diagnosis.
fsrecover.py couldn't work, because it referenced attributes that no fsrecover.py couldn't work, because it referenced attributes that no
longer existed after the MVCC changes. Repaired that, and added new longer existed after the MVCC changes. Repaired that, and added new
tests to ensure it continues working. tests to ensure it continues working.
Collector #1309: The reference counts reported by DB.cacheExtremeDetails() Collector #1309: The reference counts reported by DB.cacheExtremeDetails()
for ghosts were one too small. for ghosts were one too small. Thanks to Dieter Maurer for the diagnosis.
Collector #1208: Infinite loop in cPickleCache. Collector #1208: Infinite loop in cPickleCache.
If a persistent object had a __del__ method (probably not a good idea If a persistent object had a __del__ method (probably not a good idea
...@@ -21,7 +30,8 @@ method, the latter would load the object into cache again to ...@@ -21,7 +30,8 @@ method, the latter would load the object into cache again to
satsify the attribute reference, the cache would again decide that satsify the attribute reference, the cache would again decide that
the object should be ghostified, and so on. The infinite loop no longer the object should be ghostified, and so on. The infinite loop no longer
occurs, but note that objects of this kind still aren't sensible (they're occurs, but note that objects of this kind still aren't sensible (they're
effectively immortal). effectively immortal). Thanks to Toby Dickenson for suggesting a nice
cure.
What's new in ZODB3 3.3 alpha 3 What's new in ZODB3 3.3 alpha 3
......
...@@ -27,7 +27,7 @@ Where: ...@@ -27,7 +27,7 @@ Where:
-r dir -r dir
--repository=dir --repository=dir
Repository directory containing the backup files. This argument Repository directory containing the backup files. This argument
is required. is required. The directory must already exist.
Options for -B/--backup: Options for -B/--backup:
-f file -f file
...@@ -74,12 +74,6 @@ from ZODB.FileStorage import FileStorage ...@@ -74,12 +74,6 @@ from ZODB.FileStorage import FileStorage
program = sys.argv[0] program = sys.argv[0]
try:
True, False
except NameError:
True = 1
False = 0
BACKUP = 1 BACKUP = 1
RECOVER = 2 RECOVER = 2
...@@ -88,7 +82,6 @@ READCHUNK = 16 * 1024 ...@@ -88,7 +82,6 @@ READCHUNK = 16 * 1024
VERBOSE = False VERBOSE = False
def usage(code, msg=''): def usage(code, msg=''):
outfp = sys.stderr outfp = sys.stderr
if code == 0: if code == 0:
...@@ -107,7 +100,6 @@ def log(msg, *args): ...@@ -107,7 +100,6 @@ def log(msg, *args):
print >> sys.stderr, msg % args print >> sys.stderr, msg % args
def parseargs(): def parseargs():
global VERBOSE global VERBOSE
try: try:
...@@ -184,7 +176,6 @@ def parseargs(): ...@@ -184,7 +176,6 @@ def parseargs():
return options return options
# Read bytes (no more than n, or to EOF if n is None) in chunks from the # Read bytes (no more than n, or to EOF if n is None) in chunks from the
# current position in file fp. Pass each chunk as an argument to func(). # current position in file fp. Pass each chunk as an argument to func().
# Return the total number of bytes read == the total number of bytes # Return the total number of bytes read == the total number of bytes
...@@ -272,24 +263,27 @@ def gen_filename(options, ext=None): ...@@ -272,24 +263,27 @@ def gen_filename(options, ext=None):
return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
# Return a list of files needed to reproduce state at time options.date.
# This is a list, in chronological order, of the .fs[z] and .deltafs[z]
# files, from the time of the most recent full backup preceding
# options.date, up to options.date.
def find_files(options): def find_files(options):
def rootcmp(x, y): def rootcmp(x, y):
# This already compares in reverse order # This already compares in reverse order
return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0]) return cmp(os.path.splitext(y)[0], os.path.splitext(x)[0])
# Return a list of files needed to reproduce state at time `when'
when = options.date when = options.date
if not when: if not when:
when = gen_filename(options, '') when = gen_filename(options, '')
log('looking for files b/w last full backup and %s...', when) log('looking for files between last full backup and %s...', when)
all = os.listdir(options.repository) all = os.listdir(options.repository)
all.sort(rootcmp) all.sort(rootcmp)
# Find the last full backup before date, then include all the incrementals # Find the last full backup before date, then include all the
# between when and that full backup. # incrementals between that full backup and "when".
needed = [] needed = []
for file in all: for fname in all:
root, ext = os.path.splitext(file) root, ext = os.path.splitext(fname)
if root <= when: if root <= when and ext in ('.fs', '.fsz', '.deltafs', '.deltafsz'):
needed.append(file) needed.append(fname)
if ext in ('.fs', '.fsz'): if ext in ('.fs', '.fsz'):
break break
# Make the file names relative to the repository directory # Make the file names relative to the repository directory
...@@ -335,7 +329,6 @@ def scandat(repofiles): ...@@ -335,7 +329,6 @@ def scandat(repofiles):
return fn, startpos, endpos, sum return fn, startpos, endpos, sum
def do_full_backup(options): def do_full_backup(options):
# Find the file position of the last completed transaction. # Find the file position of the last completed transaction.
fs = FileStorage(options.file, read_only=True) fs = FileStorage(options.file, read_only=True)
...@@ -471,7 +464,6 @@ def do_backup(options): ...@@ -471,7 +464,6 @@ def do_backup(options):
do_full_backup(options) do_full_backup(options)
def do_recover(options): def do_recover(options):
# Find the first full backup at or before the specified date # Find the first full backup at or before the specified date
repofiles = find_files(options) repofiles = find_files(options)
...@@ -493,7 +485,6 @@ def do_recover(options): ...@@ -493,7 +485,6 @@ def do_recover(options):
log('Recovered %s bytes, md5: %s', reposz, reposum) log('Recovered %s bytes, md5: %s', reposz, reposum)
def main(): def main():
options = parseargs() options = parseargs()
if options.mode == BACKUP: if options.mode == BACKUP:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment