Commit bba55ef2 authored by bescoto's avatar bescoto

Added various compare options like --compare-full and --compare-hash


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@664 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent 4cb9b29f
Write PID of process to current_mirror file.
--verify switch for checking hashs, and hash check on restore
---------[ Medium term ]---------------------------------------
For comparing, check source filesystem
Look into sparse file support (requested by Stelios K. Kyriacou)
Clean up connection dropped message
Look into security.py code, do some sort of security audit.
Add metadata diffing
Look at Kent Borg's suggestion for restore options and digests.
Clean up compare reports
Add --list-files-changed-between or similar option, to list files that
have changed between two times
Test comparing of single files, and files/directories specified by
increment. Also test --include/--exclude with compare options.
Add --dry-run option (target for v1.1.x)
---------[ Medium term ]---------------------------------------
Add # of increments option to --remove-older-than
Look into sparse file support (requested by Stelios K. Kyriacou)
Look at Kent Borg's suggestion for restore options and digests.
---------[ Long term ]---------------------------------------
......
......@@ -82,9 +82,29 @@ This is equivalent to
.BI "--compare-at-time " time
Compare a directory with the backup set at the given time. This can
be useful to see how archived data differs from current data, or to
check that a backup is current.
check that a backup is current. This only compares metadata, in the same
way rdiff-backup decides whether a file has changed.
.TP
.BI "--create-full-path"
.B --compare-full
This is equivalent to
.BI '--compare-full-at-time " now" '
.TP
.BI "--compare-full-at-time " time
Compare a directory with the backup set at the given time. To compare
regular files, the repository data will be copied in its entirety to
the source side and compared byte by byte. This is the slowest but
most complete compare option.
.TP
.B --compare-hash
This is equivalent to
.BI '--compare-hash-at-time " now" '
.TP
.BI "--compare-hash-at-time " time
Compare a directory with the backup set at the given time. Regular
files will be compared by computing their SHA1 digest on the source
side and comparing it to the digest recorded in the metadata.
.TP
.B --create-full-path
Normally only the final directory of the destination path will be
created if it does not exist. With this option, all missing directories
on the destination path will be created. Use this option with care: if
......
# Copyright 2002, 2003, 2004 Ben Escoto
# Copyright 2002, 2003, 2004, 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
......@@ -24,7 +24,7 @@ import getopt, sys, re, os, cStringIO
from log import Log, LoggerError, ErrorLog
import Globals, Time, SetConnections, selection, robust, rpath, \
manage, backup, connection, restore, FilenameMapping, \
Security, Hardlink, regress, C, fs_abilities, statistics
Security, Hardlink, regress, C, fs_abilities, statistics, compare
action = None
......@@ -59,9 +59,10 @@ def parse_cmdlineoptions(arglist):
try: optlist, args = getopt.getopt(arglist, "blr:sv:V",
["backup-mode", "calculate-average", "check-destination-dir",
"compare", "compare-at-time=", "create-full-path",
"current-time=", "exclude=", "exclude-device-files",
"exclude-fifos", "exclude-filelist=",
"compare", "compare-at-time=", "compare-hash",
"compare-hash-at-time=", "compare-full", "compare-full-at-time=",
"create-full-path", "current-time=", "exclude=",
"exclude-device-files", "exclude-fifos", "exclude-filelist=",
"exclude-symbolic-links", "exclude-sockets",
"exclude-filelist-stdin", "exclude-globbing-filelist=",
"exclude-globbing-filelist-stdin", "exclude-mirror=",
......@@ -91,10 +92,12 @@ def parse_cmdlineoptions(arglist):
elif opt == "--calculate-average": action = "calculate-average"
elif opt == "--carbonfile": Globals.set("carbonfile_active", 1)
elif opt == "--check-destination-dir": action = "check-destination-dir"
elif opt == "--compare" or opt == "--compare-at-time":
action = "compare"
if opt == "--compare": restore_timestr = "now"
else: restore_timestr = arg
elif opt in ("--compare", "--compare-at-time",
"--compare-hash", "--compare-hash-at-time",
"--compare-full", "--compare-full-at-time"):
if opt[-8:] == "-at-time": restore_timestr, opt = arg, opt[:-8]
else: restore_timestr = "now"
action = opt[2:]
elif opt == "--create-full-path": create_full_path = 1
elif opt == "--current-time":
Globals.set_integer('current_time', arg)
......@@ -200,7 +203,8 @@ def check_action():
1: ['list-increments', 'list-increment-sizes',
'remove-older-than', 'list-at-time',
'list-changed-since', 'check-destination-dir'],
2: ['backup', 'restore', 'restore-as-of', 'compare']}
2: ['backup', 'restore', 'restore-as-of',
'compare', 'compare-hash', 'compare-full']}
l = len(args)
if l == 0 and action not in arg_action_dict[l]:
commandline_error("No arguments given")
......@@ -263,7 +267,7 @@ def take_action(rps):
elif action == "backup": Backup(rps[0], rps[1])
elif action == "calculate-average": CalculateAverage(rps)
elif action == "check-destination-dir": CheckDest(rps[0])
elif action == "compare": Compare(*rps)
elif action.startswith("compare"): Compare(action, rps[0], rps[1])
elif action == "list-at-time": ListAtTime(rps[0])
elif action == "list-changed-since": ListChangedSince(rps[0])
elif action == "list-increments": ListIncrements(rps[0])
......@@ -592,7 +596,7 @@ def restore_set_root(rpin):
def ListIncrements(rp):
"""Print out a summary of the increments and their times"""
rp = require_root_set(rp)
rp = require_root_set(rp, 1)
restore_check_backup_dir(restore_root)
mirror_rp = restore_root.new_index(restore_index)
inc_rpath = Globals.rbdir.append_path('increments', restore_index)
......@@ -602,24 +606,25 @@ def ListIncrements(rp):
print manage.describe_incs_parsable(incs, mirror_time, mirror_rp)
else: print manage.describe_incs_human(incs, mirror_time, mirror_rp)
def require_root_set(rp):
def require_root_set(rp, read_only):
"""Make sure rp is or is in a valid rdiff-backup dest directory.
Also initializes fs_abilities and quoting and return quoted rp if
necessary.
Also initializes fs_abilities (read or read/write) and quoting and
return quoted rp if necessary.
"""
if not restore_set_root(rp):
Log.FatalError(("Bad directory %s.\n" % (rp.path,)) +
"It doesn't appear to be an rdiff-backup destination dir")
Globals.rbdir.conn.fs_abilities.single_set_globals(Globals.rbdir)
Globals.rbdir.conn.fs_abilities.single_set_globals(Globals.rbdir,
read_only)
if Globals.chars_to_quote: return restore_init_quoting(rp)
else: return rp
def ListIncrementSizes(rp):
"""Print out a summary of the increments """
rp = require_root_set(rp)
rp = require_root_set(rp, 1)
print manage.ListIncrementSizes(restore_root, restore_index)
......@@ -634,7 +639,7 @@ def CalculateAverage(rps):
def RemoveOlderThan(rootrp):
"""Remove all increment files older than a certain time"""
rootrp = require_root_set(rootrp)
rootrp = require_root_set(rootrp, 0)
rot_require_rbdir_base(rootrp)
try: time = Time.genstrtotime(remove_older_than_string)
except Time.TimeException, exc: Log.FatalError(str(exc))
......@@ -670,7 +675,7 @@ def rot_require_rbdir_base(rootrp):
def ListChangedSince(rp):
"""List all the files under rp that have changed since restoretime"""
rp = require_root_set(rp)
rp = require_root_set(rp, 1)
try: rest_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_rp = restore_root.new_index(restore_index)
......@@ -682,7 +687,7 @@ def ListChangedSince(rp):
def ListAtTime(rp):
"""List files in archive under rp that are present at restoretime"""
rp = require_root_set(rp)
rp = require_root_set(rp, 1)
try: rest_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_rp = restore_root.new_index(restore_index)
......@@ -691,7 +696,7 @@ def ListAtTime(rp):
print rorp.get_indexpath()
def Compare(src_rp, dest_rp, compare_time = None):
def Compare(compare_type, src_rp, dest_rp, compare_time = None):
"""Compare metadata in src_rp with metadata of backup session
Prints to stdout whenever a file in the src_rp directory has
......@@ -702,16 +707,20 @@ def Compare(src_rp, dest_rp, compare_time = None):
"""
global return_val
dest_rp = require_root_set(dest_rp)
dest_rp = require_root_set(dest_rp, 1)
if not compare_time:
try: compare_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_rp = restore_root.new_index(restore_index)
inc_rp = mirror_rp.append_path("increments", restore_index)
inc_rp = Globals.rbdir.append_path("increments", restore_index)
backup_set_select(src_rp) # Sets source rorp iterator
src_iter = src_rp.conn.backup.SourceStruct.get_source_select()
return_val = restore.Compare(src_iter, mirror_rp, inc_rp, compare_time)
if compare_type == "compare": compare_func = compare.Compare
elif compare_type == "compare-hash": compare_func = compare.Compare_hash
else:
assert compare_type == "compare-full", compare_type
compare_func = compare.Compare_full
return_val = compare_func(src_rp, mirror_rp, inc_rp, compare_time)
def CheckDest(dest_rp):
......
......@@ -113,8 +113,9 @@ def set_security_level(action, cmdpairs):
sec_level = "all"
rdir = getpath(cp2)
elif action in ["test-server", "list-increments", 'list-increment-sizes',
"list-at-time", "list-changed-since",
"calculate-average", "remove-older-than", "compare"]:
"list-at-time", "list-changed-since",
"calculate-average", "remove-older-than", "compare",
"compare-hash", "compare-full"]:
sec_level = "minimal"
rdir = tempfile.gettempdir()
else: assert 0, "Unknown action %s" % action
......@@ -151,7 +152,14 @@ def set_allowed_requests(sec_level):
"restore.ListAtTime",
"backup.SourceStruct.get_source_select",
"backup.SourceStruct.set_source_select",
"backup.SourceStruct.get_diffs"])
"backup.SourceStruct.get_diffs",
"compare.RepoSide.init_and_get_iter",
"compare.RepoSide.close_rf_cache",
"compare.RepoSide.attach_files",
"compare.DataSide.get_source_select",
"compare.DataSide.compare_fast",
"compare.DataSide.compare_hash",
"compare.DataSide.compare_full"])
if sec_level == "update-only" or sec_level == "all":
l.extend(["log.Log.open_logfile_local", "log.Log.close_logfile_local",
"log.ErrorLog.open", "log.ErrorLog.isopen",
......
......@@ -101,8 +101,8 @@ class SourceStruct:
diff_rorp.set_attached_filetype('snapshot')
for dest_sig in dest_sigiter:
if dest_sig is iterfile.RORPIterFlushRepeat:
yield iterfile.RORPIterFlush # Flush buffer when get_sigs does
if dest_sig is iterfile.MiscIterFlushRepeat:
yield iterfile.MiscIterFlush # Flush buffer when get_sigs does
continue
src_rp = (source_rps.get(dest_sig.index) or
rpath.RORPath(dest_sig.index))
......@@ -172,7 +172,7 @@ class DestinationStruct:
if (Globals.backup_reader is not Globals.backup_writer and
num_rorps_skipped > flush_threshold):
num_rorps_skipped = 0
yield iterfile.RORPIterFlushRepeat
yield iterfile.MiscIterFlushRepeat
else:
index = src_rorp and src_rorp.index or dest_rorp.index
sig = cls.get_one_sig(dest_base_rpath, index,
......
# Copyright 2002, 2003, 2004, 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Perform various kinds of comparisons.
For instance, full-file compare, compare by hash, and metadata-only
compare. This uses elements of the backup and restore modules.
"""
import Globals, restore, rorpiter, log, backup, static, rpath, hash, robust
def Compare(src_rp, mirror_rp, inc_rp, compare_time):
"""Compares metadata in src_rp dir with metadata in mirror_rp at time"""
repo_side = mirror_rp.conn.compare.RepoSide
data_side = src_rp.conn.compare.DataSide
repo_iter = repo_side.init_and_get_iter(mirror_rp, inc_rp, compare_time)
return_val = print_reports(data_side.compare_fast(repo_iter))
repo_side.close_rf_cache()
return return_val
def Compare_hash(src_rp, mirror_rp, inc_rp, compare_time):
"""Compare files at src_rp with repo at compare_time
Note metadata differences, but also check to see if file data is
different. If two regular files have the same size, hash the
source and compare to the hash presumably already present in repo.
"""
repo_side = mirror_rp.conn.compare.RepoSide
data_side = src_rp.conn.compare.DataSide
repo_iter = repo_side.init_and_get_iter(mirror_rp, inc_rp, compare_time)
return_val = print_reports(data_side.compare_hash(repo_iter))
repo_side.close_rf_cache()
return return_val
def Compare_full(src_rp, mirror_rp, inc_rp, compare_time):
"""Compare full data of files at src_rp with repo at compare_time
Like Compare_hash, but do not rely on hashes, instead copy full
data over.
"""
repo_side = mirror_rp.conn.compare.RepoSide
data_side = src_rp.conn.compare.DataSide
src_iter = data_side.get_source_select()
attached_repo_iter = repo_side.attach_files(src_iter, mirror_rp,
inc_rp, compare_time)
report_iter = data_side.compare_full(src_rp, attached_repo_iter)
return_val = print_reports(report_iter)
repo_side.close_rf_cache()
return return_val
def print_reports(report_iter):
"""Given an iter of CompareReport objects, print them to screen"""
assert not Globals.server
changed_files_found = 0
for report in report_iter:
changed_files_found = 1
indexpath = report.index and "/".join(report.index) or "."
print "%s: %s" % (report.reason, indexpath)
if not changed_files_found:
log.Log("No changes found. Directory matches archive data.", 2)
return changed_files_found
def get_basic_report(src_rp, repo_rorp, comp_data_func = None):
"""Compare src_rp and repo_rorp, return CompareReport
comp_data_func should be a function that accepts (src_rp,
repo_rorp) as arguments, and return 1 if they have the same data,
0 otherwise. If comp_data_func is false, don't compare file data,
only metadata.
"""
if src_rp: index = src_rp.index
else: index = repo_rorp.index
if not repo_rorp or not repo_rorp.lstat():
return CompareReport(index, "new")
elif not src_rp or not src_rp.lstat():
return CompareReport(index, "deleted")
elif comp_data_func and src_rp.isreg() and repo_rorp.isreg():
if src_rp == repo_rorp: meta_changed = 0
else: meta_changed = 1
data_changed = comp_data_func(src_rp, repo_rorp)
if not meta_changed and not data_changed: return None
if meta_changed: meta_string = "metadata changed, "
else: meta_string = "metadata the same, "
if data_changed: data_string = "data changed"
else: data_string = "data the same"
return CompareReport(index, meta_string + data_string)
elif src_rp == repo_rorp: return None
else: return CompareReport(index, "changed")
class RepoSide(restore.MirrorStruct):
"""On the repository side, comparing is like restoring"""
def init_and_get_iter(cls, mirror_rp, inc_rp, compare_time):
"""Return rorp iter at given compare time"""
cls.set_mirror_and_rest_times(compare_time)
cls.initialize_rf_cache(mirror_rp, inc_rp)
return cls.subtract_indicies(cls.mirror_base.index,
cls.get_mirror_rorp_iter())
def attach_files(cls, src_iter, mirror_rp, inc_rp, compare_time):
"""Attach data to all the files that need checking
Return an iterator of repo rorps that includes all the files
that may have changed, and has the fileobj set on all rorps
that need it.
"""
repo_iter = cls.init_and_get_iter(mirror_rp, inc_rp, compare_time)
base_index = cls.mirror_base.index
for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
index = src_rp and src_rp.index or mir_rorp.index
if src_rp and mir_rorp:
if not src_rp.isreg() and src_rp == mir_rorp:
continue # They must be equal, nothing else to check
if (src_rp.isreg() and mir_rorp.isreg() and
src_rp.getsize() == mir_rorp.getsize()):
mir_rorp.setfile(cls.rf_cache.get_fp(base_index + index))
mir_rorp.set_attached_filetype('snapshot')
if mir_rorp: yield mir_rorp
else: yield rpath.RORPath(index) # indicate deleted mir_rorp
static.MakeClass(RepoSide)
class DataSide(backup.SourceStruct):
"""On the side that has the current data, compare is like backing up"""
def compare_fast(cls, repo_iter):
"""Compare rorps (metadata only) quickly, return report iter"""
src_iter = cls.get_source_select()
for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
report = get_basic_report(src_rorp, mir_rorp)
if report: yield report
def compare_hash(cls, repo_iter):
"""Like above, but also compare sha1 sums of any regular files"""
def hashs_changed(src_rp, mir_rorp):
"""Return 0 if their data hashes same, 1 otherwise"""
if not mir_rorp.has_sha1():
log.Log("Warning: Metadata file has no digest for %s, "
"unable to compare." % (index,), 2)
return 0
elif (src_rp.getsize() == mir_rorp.getsize() and
hash.compute_sha1(src_rp) == mir_rorp.get_sha1()):
return 0
return 1
src_iter = cls.get_source_select()
for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter):
report = get_basic_report(src_rp, mir_rorp, hashs_changed)
if report: yield report
def compare_full(cls, src_root, repo_iter):
"""Given repo iter with full data attached, return report iter"""
def error_handler(exc, src_rp, repo_rorp):
log.Log("Error reading file %s" % (src_rp.path,), 2)
return 0 # They aren't the same if we get an error
def data_changed(src_rp, repo_rorp):
"""Return 0 if full compare of data matches, 1 otherwise"""
if src_rp.getsize() != repo_rorp.getsize(): return 1
return not robust.check_common_error(error_handler,
rpath.cmpfileobj, (src_rp.open("rb"), repo_rorp.open("rb")))
for repo_rorp in repo_iter:
src_rp = src_root.new_index(repo_rorp.index)
report = get_basic_report(src_rp, repo_rorp, data_changed)
if report: yield report
static.MakeClass(DataSide)
class CompareReport:
"""When two files don't match, this tells you how they don't match
This is necessary because the system that is doing the actual
comparing may not be the one printing out the reports. For speed
the compare information can be pipelined back to the client
connection as an iter of CompareReports.
"""
# self.file is added so that CompareReports can masquerate as
# RORPaths when in an iterator, and thus get pipelined.
file = None
def __init__(self, index, reason):
self.index = index
self.reason = reason
......@@ -153,8 +153,7 @@ class LowLevelPipeConnection(Connection):
def _putiter(self, iterator, req_num):
"""Put an iterator through the pipe"""
self._write("i",
str(VirtualFile.new(iterfile.RORPIterToFile(iterator))),
req_num)
str(VirtualFile.new(iterfile.MiscIterToFile(iterator))), req_num)
def _putrpath(self, rpath, req_num):
"""Put an rpath into the pipe
......@@ -241,7 +240,7 @@ class LowLevelPipeConnection(Connection):
elif format_string == "b": result = data
elif format_string == "f": result = VirtualFile(self, int(data))
elif format_string == "i":
result = iterfile.FileToRORPIter(VirtualFile(self, int(data)))
result = iterfile.FileToMiscIter(VirtualFile(self, int(data)))
elif format_string == "r": result = self._getrorpath(data)
elif format_string == "R": result = self._getrpath(data)
elif format_string == "Q": result = self._getqrpath(data)
......@@ -535,7 +534,7 @@ import Globals, Time, Rdiff, Hardlink, FilenameMapping, C, Security, \
Main, rorpiter, selection, increment, statistics, manage, lazy, \
iterfile, rpath, robust, restore, manage, backup, connection, \
TempFile, SetConnections, librsync, log, regress, fs_abilities, \
eas_acls, user_group
eas_acls, user_group, compare
Globals.local_connection = LocalConnection()
Globals.connections.append(Globals.local_connection)
......
......@@ -20,6 +20,7 @@
"""Contains a file wrapper that returns a hash on close"""
import sha
import Globals
class FileWrapper:
"""Wrapper around a file-like object
......@@ -51,3 +52,17 @@ class Report:
def __init__(self, close_val, sha1_digest):
assert not close_val # For now just assume inner file closes correctly
self.sha1_digest = sha1_digest
def compute_sha1(rp, compressed = 0):
"""Return the hex sha1 hash of given rpath"""
assert rp.conn is Globals.local_connection # inefficient not to do locally
blocksize = Globals.blocksize
fp = FileWrapper(rp.open("r", compressed))
while 1:
if not fp.read(blocksize): break
digest = fp.close().sha1_digest
rp.set_sha1(digest)
return digest
......@@ -41,14 +41,14 @@ class UnwrapFile:
"""Return pair (type, data) next in line on the file
type is a single character which is either
"o" for object,
"o" for an object,
"f" for file,
"c" for a continution of a file,
"e" for an exception, or
None if no more data can be read.
Data is either the file's data, if type is "c" or "f", or the
actual object if the type is "o" or "e".
actual object if the type is "o", "e", or "r"
"""
header = self.file.read(8)
......@@ -57,8 +57,10 @@ class UnwrapFile:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o" or type == "e": return type, cPickle.loads(buf)
else: return type, buf
if type in ("o", "e"): return type, cPickle.loads(buf)
else:
assert type in ("f", "c")
return type, buf
class IterWrappingFile(UnwrapFile):
......@@ -213,7 +215,7 @@ class FileWrappingIter:
self.currently_in_file.read,
[Globals.blocksize])
if buf == "" or buf is None:
assert not self.currently_in_file.close()
self.currently_in_file.close()
self.currently_in_file = None
if buf is None: # error occurred above, encode exception
prefix_letter = "e"
......@@ -238,33 +240,37 @@ class FileWrappingIter:
def close(self): self.closed = 1
class RORPIterFlush:
"""Used to signal that a RORPIterToFile should flush buffer"""
class MiscIterFlush:
"""Used to signal that a MiscIterToFile should flush buffer"""
pass
class RORPIterFlushRepeat(RORPIterFlush):
"""Flush, but then cause RORPIter to yield this same object
class MiscIterFlushRepeat(MiscIterFlush):
"""Flush, but then cause Misc Iter to yield this same object
Thus if we put together a pipeline of these, one RORPIterContFlush
Thus if we put together a pipeline of these, one MiscIterFlushRepeat
can cause all the segments to flush in sequence.
"""
pass
class RORPIterToFile(FileWrappingIter):
"""Take a RORPIter and give it a file-ish interface
class MiscIterToFile(FileWrappingIter):
"""Take an iter and give it a file-ish interface
This expands on the FileWrappingIter by understanding how to
process RORPaths with file objects attached. It adds a new
character "r" to mark these.
This is how we send signatures and diffs across the line. As
sending each one separately via a read() call would result in a
lot of latency, the read()'s are buffered - a read() call with no
arguments will return a variable length string (possibly empty).
To flush the RORPIterToFile, have the iterator yield a
RORPIterFlush class.
To flush the MiscIterToFile, have the iterator yield a
MiscIterFlush class.
"""
def __init__(self, rpiter, max_buffer_bytes = None, max_buffer_rps = None):
"""RORPIterToFile initializer
"""MiscIterToFile initializer
max_buffer_bytes is the maximum size of the buffer in bytes.
max_buffer_rps is the maximum size of the buffer in rorps.
......@@ -313,17 +319,18 @@ class RORPIterToFile(FileWrappingIter):
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
self.addfromfile("f")
elif (type(currentobj) is types.ClassType and
issubclass(currentobj, iterfile.RORPIterFlush)):
if currentobj is iterfile.RORPIterFlushRepeat:
self.add_flush_repeater()
elif currentobj is iterfile.MiscIterFlush: return None
elif currentobj is iterfile.MiscIterFlushRepeat:
self.add_misc(currentobj)
return None
else: self.addrorp(currentobj)
elif isinstance(currentobj, rpath.RORPath):
self.addrorp(currentobj)
else: self.add_misc(currentobj)
return 1
def add_flush_repeater(self):
"""Add a RORPIterFlushRepeat object to the buffer"""
pickle = cPickle.dumps(iterfile.RORPIterFlushRepeat, 1)
def add_misc(self, obj):
"""Add an arbitrary pickleable object to the buffer"""
pickle = cPickle.dumps(obj, 1)
self.array_buf.fromstring("o")
self.array_buf.fromstring(C.long2str(long(len(pickle))))
self.array_buf.fromstring(pickle)
......@@ -336,7 +343,7 @@ class RORPIterToFile(FileWrappingIter):
else:
pickle = cPickle.dumps((rorp.index, rorp.data, 0), 1)
self.rorps_in_buffer += 1
self.array_buf.fromstring("o")
self.array_buf.fromstring("r")
self.array_buf.fromstring(C.long2str(long(len(pickle))))
self.array_buf.fromstring(pickle)
......@@ -348,8 +355,8 @@ class RORPIterToFile(FileWrappingIter):
def close(self): self.closed = 1
class FileToRORPIter(IterWrappingFile):
"""Take a RORPIterToFile and turn it back into a RORPIter"""
class FileToMiscIter(IterWrappingFile):
"""Take a MiscIterToFile and turn it back into a iterator"""
def __init__(self, file):
IterWrappingFile.__init__(self, file)
self.buf = ""
......@@ -363,9 +370,8 @@ class FileToRORPIter(IterWrappingFile):
type = None
while not type: type, data = self._get()
if type == "z": raise StopIteration
elif type == "o":
if data is iterfile.RORPIterFlushRepeat: return data
else: return self.get_rorp(data)
elif type == "r": return self.get_rorp(data)
elif type == "o": return data
else: raise IterFileException("Bad file type %s" % (type,))
def get_rorp(self, pickled_tuple):
......@@ -401,20 +407,21 @@ class FileToRORPIter(IterWrappingFile):
if not self.buf: self.buf += self.file.read()
if not self.buf: return None, None
assert len(self.buf) >= 8, "Unexpected end of RORPIter file"
assert len(self.buf) >= 8, "Unexpected end of MiscIter file"
type, length = self.buf[0], C.str2long(self.buf[1:8])
data = self.buf[8:8+length]
self.buf = self.buf[8+length:]
if type == "o" or type == "e": return type, cPickle.loads(data)
if type in "oer": return type, cPickle.loads(data)
else: return type, data
class ErrorFile:
"""File-like that just raises error (used by FileToRORPIter above)"""
"""File-like that just raises error (used by FileToMiscIter above)"""
def __init__(self, exc):
"""Initialize new ErrorFile. exc is the exception to raise on read"""
self.exc = exc
def read(self, l=-1): raise self.exc
def close(self): return None
import iterfile
......@@ -97,8 +97,8 @@ def set_restore_times():
backup time. _mirror_time is the unsuccessful backup time.
"""
restore._mirror_time = unsuccessful_backup_time
restore._rest_time = regress_time
restore.MirrorStruct._mirror_time = unsuccessful_backup_time
restore.MirrorStruct._rest_time = regress_time
def remove_rbdir_increments():
"""Delete the increments in the rdiff-backup-data directory
......
......@@ -25,12 +25,6 @@ import Globals, Time, Rdiff, Hardlink, rorpiter, selection, rpath, \
log, static, robust, metadata, statistics, TempFile, eas_acls
# This will be set to the time of the current mirror
_mirror_time = None
# This will be set to the exact time to restore to (not restore_to_time)
_rest_time = None
class RestoreError(Exception): pass
def Restore(mirror_rp, inc_rpath, target, restore_to_time):
......@@ -72,8 +66,8 @@ def ListChangedSince(mirror_rp, inc_rp, restore_to_time):
MirrorStruct.set_mirror_and_rest_times(restore_to_time)
MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)
old_iter = MirrorStruct.get_mirror_rorp_iter(_rest_time, 1)
cur_iter = MirrorStruct.get_mirror_rorp_iter(_mirror_time, 1)
old_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._rest_time, 1)
cur_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._mirror_time, 1)
collated = rorpiter.Collate2Iters(old_iter, cur_iter)
for old_rorp, cur_rorp in collated:
if not old_rorp: change = "new"
......@@ -94,43 +88,23 @@ def ListAtTime(mirror_rp, inc_rp, time):
assert mirror_rp.conn is Globals.local_connection, "Run locally only"
MirrorStruct.set_mirror_and_rest_times(time)
MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)
old_iter = MirrorStruct.get_mirror_rorp_iter(_rest_time, 1)
old_iter = MirrorStruct.get_mirror_rorp_iter()
for rorp in old_iter: yield rorp
def Compare(src_iter, mirror_rp, inc_rp, compare_time):
"""Compares metadata in src_rp dir with metadata in mirror_rp at time"""
MirrorStruct.set_mirror_and_rest_times(compare_time)
MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp)
mir_iter = MirrorStruct.get_mirror_rorp_iter(compare_time, 1)
collated = rorpiter.Collate2Iters(src_iter, mir_iter)
changed_files_found = 0
for src_rorp, mir_rorp in collated:
if not mir_rorp: change = "new"
elif not src_rorp: change = "deleted"
elif src_rorp == mir_rorp: continue
else: change = "changed"
changed_files_found = 1
path_desc = (src_rorp and src_rorp.get_indexpath() or
mir_rorp.get_indexpath())
log.Log("%-7s %s" % (change, path_desc), 2)
if change == "changed": # Log more description of difference
assert not src_rorp.equal_verbose_auto(mir_rorp, 3)
if not changed_files_found:
log.Log("No changes found. Directory matches archive data.", 2)
MirrorStruct.close_rf_cache()
return changed_files_found
class MirrorStruct:
"""Hold functions to be run on the mirror side"""
_select = None # If selection command line arguments given, use Select here
# If selection command line arguments given, use Select here
_select = None
# This will be set to the time of the current mirror
_mirror_time = None
# This will be set to the exact time to restore to (not restore_to_time)
_rest_time = None
def set_mirror_and_rest_times(cls, restore_to_time):
"""Set global variabels _mirror_time and _rest_time on mirror conn"""
global _mirror_time, _rest_time
_mirror_time = cls.get_mirror_time()
_rest_time = cls.get_rest_time(restore_to_time)
"""Set class variabels _mirror_time and _rest_time on mirror conn"""
MirrorStruct._mirror_time = cls.get_mirror_time()
MirrorStruct._rest_time = cls.get_rest_time(restore_to_time)
def get_mirror_time(cls):
"""Return time (in seconds) of latest mirror"""
......@@ -169,8 +143,8 @@ class MirrorStruct:
"""
# use dictionary to remove dups
if not _mirror_time: d = {cls.get_mirror_time(): None}
else: d = {_mirror_time: None}
if not cls._mirror_time: d = {cls.get_mirror_time(): None}
else: d = {cls._mirror_time: None}
if not rp or not rp.index: rp = Globals.rbdir.append("increments")
for inc in get_inclist(rp): d[inc.getinctime()] = None
for inc in get_inclist(Globals.rbdir.append("mirror_metadata")):
......@@ -201,7 +175,7 @@ class MirrorStruct:
unwanted files from the metadata_iter.
"""
if rest_time is None: rest_time = _rest_time
if rest_time is None: rest_time = cls._rest_time
rorp_iter = eas_acls.GetCombinedMetadataIter(
Globals.rbdir, rest_time, restrict_index = cls.mirror_base.index,
......@@ -371,7 +345,7 @@ class CachedRF:
rf = self.get_rf(index)
if not rf:
log.Log("""Error: Unable to retrieve data for file %s!
The cause is probably data loss from the destination directory.""" %
The cause is probably data loss from the backup repository.""" %
(index and "/".join(index) or '.',), 2)
return cStringIO.StringIO('')
return self.get_rf(index).get_restore_fp()
......@@ -434,7 +408,8 @@ class RestoreFile:
"""
self.mirror_rp.inc_type = 'snapshot'
self.mirror_rp.inc_compressed = 0
if not self.inc_list or _rest_time >= _mirror_time:
if (not self.inc_list or
MirrorStruct._rest_time >= MirrorStruct._mirror_time):
self.relevant_incs = [self.mirror_rp]
return
......@@ -461,7 +436,7 @@ class RestoreFile:
incpairs = []
for inc in self.inc_list:
time = inc.getinctime()
if time >= _rest_time: incpairs.append((time, inc))
if time >= MirrorStruct._rest_time: incpairs.append((time, inc))
incpairs.sort()
return [pair[1] for pair in incpairs]
......
......@@ -1128,16 +1128,6 @@ class RPath(RORPath):
self.fsync(fp)
if Globals.fsync_directories: self.get_parent_rp().fsync()
def sync_delete(self):
"""Delete self with sync to guarantee completion
On some filesystems (like linux's ext2), we must sync both the
file and the directory to make sure.
"""
if self.lstat() and not self.issym(): self.fsync_local(self.delete)
if Globals.fsync_directories: self.get_parent_rp().fsync()
def get_data(self):
"""Open file as a regular file, read data, close, return data"""
fp = self.open("rb")
......
......@@ -40,7 +40,8 @@ def MakeOutputDir():
return rp
def rdiff_backup(source_local, dest_local, src_dir, dest_dir,
current_time = None, extra_options = ""):
current_time = None, extra_options = "",
check_return_val = 1):
"""Run rdiff-backup with the given options
source_local and dest_local are boolean values. If either is
......@@ -70,7 +71,9 @@ def rdiff_backup(source_local, dest_local, src_dir, dest_dir,
cmdargs.extend([src_dir, dest_dir])
cmdline = " ".join(cmdargs)
print "Executing: ", cmdline
assert not os.system(cmdline)
ret_val = os.system(cmdline)
if check_return_val: assert not ret_val, ret_val
return ret_val
def InternalBackup(source_local, dest_local, src_dir, dest_dir,
current_time = None, eas = None, acls = None):
......
import unittest
from commontest import *
from rdiff_backup import compare
"""Test the compare.py module and overall compare functionality"""
class CompareTest(unittest.TestCase):
def setUp(self):
Myrm("testfiles/output")
rdiff_backup(1, 1, 'testfiles/increment2', 'testfiles/output',
current_time = 10000)
rdiff_backup(1, 1, 'testfiles/increment3', 'testfiles/output',
current_time = 20000)
def generic_test(self, local, compare_option):
"""Used for 6 tests below"""
rdiff_backup(local, local, 'testfiles/increment3', 'testfiles/output',
extra_options = compare_option)
ret_val = rdiff_backup(local, local, 'testfiles/increment2',
'testfiles/output', extra_options = compare_option,
check_return_val = 0)
assert ret_val, ret_val
rdiff_backup(local, local, 'testfiles/increment2', 'testfiles/output',
extra_options = compare_option + "-at-time 10000")
ret_val = rdiff_backup(local, local, 'testfiles/increment3',
'testfiles/output',
extra_options = compare_option + "-at-time 10000",
check_return_val = 0)
assert ret_val, ret_val
def testBasicLocal(self):
"""Test basic --compare and --compare-at-time modes"""
self.generic_test(1, "--compare")
def testBasicRemote(self):
"""Test basic --compare and --compare-at-time modes, both remote"""
self.generic_test(0, "--compare")
def testHashLocal(self):
"""Test --compare-hash and --compare-hash-at-time modes local"""
self.generic_test(1, "--compare-hash")
def testHashRemote(self):
"""Test --compare-hash and -at-time remotely"""
self.generic_test(0, "--compare-hash")
def testFullLocal(self):
"""Test --compare-full and --compare-full-at-time"""
self.generic_test(1, "--compare-full")
def testFullRemote(self):
"""Test full file compare remotely"""
self.generic_test(0, "--compare-full")
def generic_selective_test(self, local, compare_option):
"""Used for selective tests--just compare part of a backup"""
rdiff_backup(local, local, 'testfiles/increment3/various_file_types',
'testfiles/output/various_file_types',
extra_options = compare_option)
ret_val = rdiff_backup(local, local,
'testfiles/increment2/increment1',
'testfiles/output/increment1',
extra_options = compare_option,
check_return_val = 0)
assert ret_val, ret_val
rdiff_backup(local, local, 'testfiles/increment2/newdir',
'testfiles/output/newdir',
extra_options = compare_option + "-at-time 10000")
ret_val = rdiff_backup(local, local,
'testfiles/increment3/newdir',
'testfiles/output/newdir',
extra_options = compare_option + "-at-time 10000",
check_return_val = 0)
assert ret_val, ret_val
def testSelLocal(self):
"""Test basic local compare of single subdirectory"""
self.generic_selective_test(1, "--compare")
def testSelRemote(self):
"""Test --compare of single directory, remote"""
self.generic_selective_test(0, "--compare")
def testSelHashLocal(self):
"""Test --compare-hash of subdirectory, local"""
self.generic_selective_test(1, "--compare-hash")
def testSelHashRemote(self):
"""Test --compare-hash of subdirectory, remote"""
self.generic_selective_test(0, "--compare-hash")
def testSelFullLocal(self):
"""Test --compare-full of subdirectory, local"""
self.generic_selective_test(1, "--compare-full")
def testSelFullRemote(self):
"""Test --compare-full of subdirectory, remote"""
self.generic_selective_test(0, "--compare-full")
if __name__ == "__main__": unittest.main()
......@@ -133,10 +133,10 @@ class PipeConnectionTest(unittest.TestCase):
def testIterators(self):
"""Test transmission of iterators"""
i = iter(map(RORPsubstitute, range(10)))
i = iter([5, 10, 15]*100)
assert self.conn.hasattr(i, "next")
datastring = self.conn.reval("lambda i: i.next().data", i)
assert datastring == "Hello, there 0", datastring
ret_val = self.conn.reval("lambda i: i.next()*i.next()", i)
assert ret_val == 50, ret_val
def testRPaths(self):
"""Test transmission of rpaths"""
......@@ -212,12 +212,5 @@ class RedirectedConnectionTest(unittest.TestCase):
def tearDown(self):
SetConnections.CloseConnections()
class RORPsubstitute:
"""Used in testIterators above to simulate a RORP"""
def __init__(self, i):
self.index = i
self.data = "Hello, there %d" % i
self.file = None
if __name__ == "__main__":
unittest.main()
if __name__ == "__main__": unittest.main()
......@@ -447,22 +447,6 @@ class FinalMisc(PathSetter):
for inc in self.get_all_increments(rbdir):
assert inc.getinctime() >= 20000
def testCompare(self):
"""Test --compare and --compare-older-than modes"""
Myrm("testfiles/output")
self.set_connections(None, None, None, None)
self.exec_rb(10000, 'testfiles/increment1', 'testfiles/output')
self.exec_rb(20000, 'testfiles/increment2', 'testfiles/output')
self.exec_rb_extra_args_retval(20000, '--compare', 0,
'testfiles/increment2', 'testfiles/output')
self.exec_rb_extra_args_retval(20000, '--compare', 1,
'testfiles/increment1', 'testfiles/output')
self.exec_rb_extra_args_retval(20000, '--compare-at-time 10000', 1,
'testfiles/increment2', 'testfiles/output')
self.exec_rb_extra_args_retval(20000, '--compare-at-time 10000', 0,
'testfiles/increment1', 'testfiles/output')
class FinalSelection(PathSetter):
"""Test selection options"""
......
......@@ -53,7 +53,7 @@ class testIterFile(unittest.TestCase):
self.assertRaises(StopIteration, new_iter.next)
class testRORPIters(unittest.TestCase):
class testMiscIters(unittest.TestCase):
"""Test sending rorpiter back and forth"""
def setUp(self):
"""Make testfiles/output directory and a few files"""
......@@ -83,7 +83,7 @@ class testRORPIters(unittest.TestCase):
self.regfile2.setdata()
self.regfile3.setdata()
def print_RORPIterFile(self, rpiter_file):
def print_MiscIterFile(self, rpiter_file):
"""Print the given rorpiter file"""
while 1:
buf = rpiter_file.read()
......@@ -93,7 +93,7 @@ class testRORPIters(unittest.TestCase):
def testBasic(self):
"""Test basic conversion"""
l = [self.outputrp, self.regfile1, self.regfile2, self.regfile3]
i_out = FileToRORPIter(RORPIterToFile(iter(l)))
i_out = FileToMiscIter(MiscIterToFile(iter(l)))
out1 = i_out.next()
assert out1 == self.outputrp
......@@ -113,34 +113,54 @@ class testRORPIters(unittest.TestCase):
i_out.next()
self.assertRaises(StopIteration, i_out.next)
def testMix(self):
"""Test a mix of RPs and ordinary objects"""
l = [5, self.regfile3, "hello"]
s = MiscIterToFile(iter(l)).read()
i_out = FileToMiscIter(StringIO.StringIO(s))
out1 = i_out.next()
assert out1 == 5, out1
out2 = i_out.next()
assert out2 == self.regfile3
fp = out2.open("rb")
assert fp.read() == "goodbye"
assert not fp.close()
out3 = i_out.next()
assert out3 == "hello", out3
self.assertRaises(StopIteration, i_out.next)
def testFlush(self):
"""Test flushing property of RORPIterToFile"""
l = [self.outputrp, RORPIterFlush, self.outputrp]
filelike = RORPIterToFile(iter(l))
"""Test flushing property of MiscIterToFile"""
l = [self.outputrp, MiscIterFlush, self.outputrp]
filelike = MiscIterToFile(iter(l))
new_filelike = StringIO.StringIO((filelike.read() + "z" +
C.long2str(0L)))
i_out = FileToRORPIter(new_filelike)
i_out = FileToMiscIter(new_filelike)
assert i_out.next() == self.outputrp
self.assertRaises(StopIteration, i_out.next)
i_out2 = FileToRORPIter(filelike)
i_out2 = FileToMiscIter(filelike)
assert i_out2.next() == self.outputrp
self.assertRaises(StopIteration, i_out2.next)
def testFlushRepeat(self):
"""Test flushing like above, but have Flush obj emerge from iter"""
l = [self.outputrp, RORPIterFlushRepeat, self.outputrp]
filelike = RORPIterToFile(iter(l))
l = [self.outputrp, MiscIterFlushRepeat, self.outputrp]
filelike = MiscIterToFile(iter(l))
new_filelike = StringIO.StringIO((filelike.read() + "z" +
C.long2str(0L)))
i_out = FileToRORPIter(new_filelike)
i_out = FileToMiscIter(new_filelike)
assert i_out.next() == self.outputrp
assert i_out.next() is RORPIterFlushRepeat
assert i_out.next() is MiscIterFlushRepeat
self.assertRaises(StopIteration, i_out.next)
i_out2 = FileToRORPIter(filelike)
i_out2 = FileToMiscIter(filelike)
assert i_out2.next() == self.outputrp
self.assertRaises(StopIteration, i_out2.next)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment