Commit 02f71b86 authored by bescoto's avatar bescoto

First pass at integrating regress code

At this point most of the tests work, but there are still problems
with the finaltest error tests on /proc, and with some selection
options.  The regress code is totally unchecked, and regresstest.py is
unwritten.


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@277 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent d02cadfe
......@@ -99,7 +99,7 @@ client_conn = None
# list.
changed_settings = []
# The RPath of the rdiff-backup-data directory.
# The RPath or QuotedRPath of the rdiff-backup-data directory.
rbdir = None
# quoting_enabled is true if we should quote certain characters in
......
......@@ -32,7 +32,7 @@ source side should only transmit inode information.
from __future__ import generators
import cPickle
import Globals, Time, TempFile, rpath, log, robust
import Globals, Time, rpath, log, robust
# In all of these lists of indicies are the values. The keys in
# _inode_ ones are (inode, devloc) pairs.
......
......@@ -24,7 +24,7 @@ import getopt, sys, re, os
from log import Log, LoggerError
import Globals, Time, SetConnections, selection, robust, rpath, \
manage, backup, connection, restore, FilenameMapping, \
Security, Hardlink
Security, Hardlink, regress, C
action = None
......@@ -44,12 +44,12 @@ def parse_cmdlineoptions(arglist):
try: optlist, args = getopt.getopt(arglist, "blr:sv:V",
["backup-mode", "calculate-average", "chars-to-quote=",
"current-time=", "exclude=", "exclude-device-files",
"exclude-filelist=", "exclude-filelist-stdin",
"exclude-globbing-filelist=", "exclude-mirror=",
"exclude-other-filesystems", "exclude-regexp=",
"exclude-special-files", "force", "include=",
"include-filelist=", "include-filelist-stdin",
"check-destination-dir", "current-time=", "exclude=",
"exclude-device-files", "exclude-filelist=",
"exclude-filelist-stdin", "exclude-globbing-filelist=",
"exclude-mirror=", "exclude-other-filesystems",
"exclude-regexp=", "exclude-special-files", "force",
"include=", "include-filelist=", "include-filelist-stdin",
"include-globbing-filelist=", "include-regexp=",
"list-changed-since=", "list-increments", "no-compression",
"no-compression-regexp=", "no-hard-links", "null-separator",
......@@ -66,6 +66,7 @@ def parse_cmdlineoptions(arglist):
for opt, arg in optlist:
if opt == "-b" or opt == "--backup-mode": action = "backup"
elif opt == "--calculate-average": action = "calculate-average"
elif opt == "--check-destination-dir": action = "check-destination-dir"
elif opt == "--chars-to-quote":
Globals.set('chars_to_quote', arg)
Globals.set('quoting_enabled', 1)
......@@ -176,7 +177,8 @@ def set_action():
commandline_error("Two arguments are required (source, destination).")
if l == 2 and (action == "list-increments" or
action == "remove-older-than" or
action == "list-changed-since"):
action == "list-changed-since" or
action == "check-destination-dir"):
commandline_error("Only use one argument, "
"the root of the backup directory")
if l > 2 and action != "calculate-average":
......@@ -211,6 +213,7 @@ def take_action(rps):
elif action == "list-increments": ListIncrements(rps[0])
elif action == "remove-older-than": RemoveOlderThan(rps[0])
elif action == "calculate-average": CalculateAverage(rps)
elif action == "check-destination-dir": CheckDest(rps[0])
else: raise AssertionError("Unknown action " + action)
def cleanup():
......@@ -239,10 +242,13 @@ def Backup(rpin, rpout):
backup_set_select(rpin)
backup_init_dirs(rpin, rpout)
if prevtime:
rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout)
Time.setprevtime(prevtime)
backup.Mirror_and_increment(rpin, rpout, incdir)
else: backup.Mirror(rpin, rpout)
rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout)
rpout.conn.Main.backup_remove_curmirror_local()
else:
backup.Mirror(rpin, rpout)
rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout)
def backup_set_select(rpin):
"""Create Select objects on source connection"""
......@@ -266,6 +272,7 @@ def backup_init_dirs(rpin, rpout):
datadir = rpout.append_path("rdiff-backup-data")
SetConnections.UpdateGlobal('rbdir', datadir)
checkdest_if_necessary(rpout)
incdir = datadir.append_path("increments")
prevtime = backup_get_mirrortime()
......@@ -305,39 +312,45 @@ def backup_warn_if_infinite_regress(rpin, rpout):
source directory '%s'. This could cause an infinite regress. You
may need to use the --exclude option.""" % (rpout.path, rpin.path), 2)
def backup_get_mirrorrps():
"""Return list of current_mirror rps"""
datadir = Globals.rbdir
if not datadir.isdir(): return []
mirrorrps = [datadir.append(fn) for fn in datadir.listdir()
if fn.startswith("current_mirror.")]
return filter(lambda rp: rp.isincfile(), mirrorrps)
def backup_get_mirrortime():
"""Return time in seconds of previous mirror, or None if cannot"""
mirrorrps = backup_get_mirrorrps()
if not mirrorrps: return None
if len(mirrorrps) > 1:
Log(
"""Warning: duplicate current_mirror files found. Perhaps something
went wrong during your last backup? Using """ + mirrorrps[-1].path, 2)
return mirrorrps[-1].getinctime()
incbase = Globals.rbdir.append_path("current_mirror")
mirror_rps = restore.get_inclist(incbase)
assert len(mirror_rps) <= 1, \
"Found %s current_mirror rps, expected <=1" % (len(mirror_rps),)
if mirror_rps: return mirror_rps[0].getinctime()
else: return None
def backup_touch_curmirror_local(rpin, rpout):
"""Make a file like current_mirror.time.data to record time
Also updates rpout so mod times don't get messed up. This should
be run on the destination connection.
When doing an incremental backup, this should happen before any
other writes, and the file should be removed after all writes.
That way we can tell whether the previous session aborted if there
are two current_mirror files.
When doing the initial full backup, the file can be created after
everything else is in place.
"""
datadir = Globals.rbdir
map(rpath.RPath.delete, backup_get_mirrorrps())
mirrorrp = datadir.append("current_mirror.%s.%s" % (Time.curtimestr,
"data"))
mirrorrp = Globals.rbdir.append("current_mirror.%s.%s" % (Time.curtimestr,
"data"))
Log("Touching mirror marker %s" % mirrorrp.path, 6)
mirrorrp.touch()
rpath.copy_attribs(rpin, rpout)
mirrorrp.fsync_with_dir()
def backup_remove_curmirror_local():
"""Remove the older of the current_mirror files. Use at end of session"""
assert Globals.rbdir.conn is Globals.local_connection
curmir_incs = restore.get_inclist(Globals.rbdir.append("current_mirror"))
assert len(curmir_incs) == 2
if curmir_incs[0].getinctime() < curmir_incs[1].getinctime():
older_inc = curmir_incs[0]
else: older_inc = curmir_incs[1]
C.sync() # Make sure everything is written before curmirror is removed
older_inc.sync_delete()
def Restore(src_rp, dest_rp = None):
"""Main restoring function
......@@ -366,6 +379,7 @@ def restore_common(rpin, target, time):
if target.conn.os.getuid() == 0:
SetConnections.UpdateGlobal('change_ownership', 1)
mirror_root, index = restore_get_root(rpin)
restore_check_backup_dir(mirror_root)
mirror = mirror_root.new_index(index)
inc_rpath = datadir.append_path('increments', index)
restore_init_select(mirror_root, target)
......@@ -404,6 +418,17 @@ Try restoring from an increment file (the filenames look like
"specify --force to overwrite." % rpout.path)
return rpin, rpout
def restore_check_backup_dir(rpin):
"""Make sure backup dir root rpin is in consistent state"""
result = checkdest_need_check(rpin)
if result is None:
Log.FatalError("%s does not appear to be an rdiff-backup directory."
% (rpin.path,))
elif result == 1: Log.FatalError(
"Previous backup to %s seems to have failed."
"Rerun rdiff-backup with --check-destination-dir option to revert"
"directory to state before unsuccessful session." % (rpin.path,))
def restore_init_select(rpin, rpout):
"""Initialize Select
......@@ -465,6 +490,7 @@ def restore_get_root(rpin):
def ListIncrements(rp):
"""Print out a summary of the increments and their times"""
mirror_root, index = restore_get_root(rp)
restore_check_backup_dir(mirror_root)
mirror_rp = mirror_root.new_index(index)
inc_rpath = Globals.rbdir.append_path('increments', index)
incs = restore.get_inclist(inc_rpath)
......@@ -484,11 +510,7 @@ def CalculateAverage(rps):
def RemoveOlderThan(rootrp):
"""Remove all increment files older than a certain time"""
datadir = rootrp.append_path("rdiff-backup-data")
if not datadir.lstat() or not datadir.isdir():
Log.FatalError("Unable to open rdiff-backup-data dir %s" %
(datadir.path,))
rom_check_dir(rootrp)
try: time = Time.genstrtotime(remove_older_than_string)
except Time.TimeException, exc: Log.FatalError(str(exc))
timep = Time.timetopretty(time)
......@@ -512,13 +534,56 @@ def RemoveOlderThan(rootrp):
else: Log("Deleting increments at times:\n" + inc_pretty_time, 3)
manage.delete_earlier_than(datadir, time)
def rom_check_dir(rootrp):
"""Check destination dir before RemoveOlderThan"""
SetConnections.UpdateGlobal('rbdir',
rootrp.append_path("rdiff-backup-data"))
if not Globals.rbdir.isdir():
Log.FatalError("Unable to open rdiff-backup-data dir %s" %
(datadir.path,))
checkdest_if_necessary(rootrp)
def ListChangedSince(rp):
"""List all the files under rp that have changed since restoretime"""
try: rest_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_root, index = restore_get_root(rp)
restore_check_backup_dir(mirror_root)
mirror_rp = mirror_root.new_index(index)
inc_rp = mirror_rp.append_path("increments", index)
restore.ListChangedSince(mirror_rp, inc_rp, rest_time)
def CheckDest(dest_rp):
"""Check the destination directory, """
need_check = checkdest_need_check(dest_rp)
if need_check is None:
Log.FatalError("No destination dir found at %s" % (dest_rp.path,))
elif need_check == 0:
Log.FatalError("Destination dir %s does not need checking" %
(dest_rp.path,))
regress.Regress(dest_rp)
def checkdest_need_check(dest_rp):
"""Return None if no dest dir found, 1 if dest dir needs check, 0 o/w"""
assert dest_rp.conn is Globals.rbdir.conn
if not dest_rp.isdir() or not Globals.rbdir.isdir(): return None
curmirroot = Globals.rbdir.append("current_mirror")
curmir_incs = restore.get_inclist(curmirroot)
if not curmir_incs: return None
elif len(curmir_incs) == 1: return 0
else:
assert len(curmir_incs) == 2, "Found too many current_mirror incs!"
return 1
def checkdest_if_necessary(dest_rp):
"""Check the destination dir if necessary.
This can/should be run before an incremental backup.
"""
need_check = checkdest_need_check(dest_rp)
if need_check == 1:
Log("Previous backup seems to have failed, checking now.", 2)
regress.Regress(dest_rp)
......@@ -56,7 +56,7 @@ def write_via_tempfile(fp, rp):
"""Write fileobj fp to rp by writing to tempfile and renaming"""
tf = TempFile.new(rp)
tf.write_from_fileobj(fp)
tf.rename(rp)
rpath.rename(tf, rp)
def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None):
"""Patch routine that must be run locally, writes to outrp
......
......@@ -17,56 +17,37 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Manage temp files"""
"""Manage temp files
Earlier this had routines for keeping track of existing tempfiles.
Now we just use normal rpaths instead of the TempFile class.
"""
import os
import Globals, rpath
# This is a connection-specific list of temp files, to be cleaned
# up before rdiff-backup exits.
_tempfiles = []
# To make collisions less likely, this gets put in the file name
# and incremented whenever a new file is requested.
_tfindex = 0
def new(rp_base, same_dir = 1):
"""Return new tempfile that isn't in use.
If same_dir, tempfile will be in same directory as rp_base.
Otherwise, use tempfile module to get filename.
"""
conn = rp_base.conn
if conn is not Globals.local_connection:
return conn.TempFile.new(rp_base, same_dir)
def find_unused(conn, dir):
"""Find an unused tempfile with connection conn in directory dir"""
global _tfindex, tempfiles
while 1:
if _tfindex > 100000000:
Log("Resetting index", 2)
_tfindex = 0
tf = TempFile(conn, os.path.join(dir,
"rdiff-backup.tmp.%d" % _tfindex))
_tfindex = _tfindex+1
if not tf.lstat(): return tf
def new(rp_base):
"""Return new tempfile that isn't in use in same dir as rp_base"""
return new_in_dir(rp_base.get_parent_rp())
if same_dir: tf = find_unused(conn, rp_base.dirsplit()[0])
else: tf = TempFile(conn, tempfile.mktemp())
_tempfiles.append(tf)
return tf
def new_in_dir(dir_rp):
"""Return new temp rpath in directory dir_rp"""
global _tfindex
assert dir_rp.conn is Globals.local_connection
while 1:
if _tfindex > 100000000:
Log("Warning: Resetting tempfile index", 2)
_tfindex = 0
tf = dir_rp.append('rdiff-backup.tmp.%d' % _tfindex)
_tfindex = _tfindex+1
if not tf.lstat(): return tf
def remove_listing(tempfile):
"""Remove listing of tempfile"""
if Globals.local_connection is not tempfile.conn:
tempfile.conn.TempFile.remove_listing(tempfile)
elif tempfile in _tempfiles: _tempfiles.remove(tempfile)
def delete_all():
"""Delete all remaining tempfiles"""
for tf in _tempfiles[:]: tf.delete()
class TempFile(rpath.RPath):
......
......@@ -213,7 +213,7 @@ class PatchITRB(rorpiter.ITRBranch):
rp = self.get_rp_from_root(index)
tf = TempFile.new(rp)
self.patch_to_temp(rp, diff_rorp, tf)
tf.rename(rp)
rpath.rename(tf, rp)
def patch_to_temp(self, basis_rp, diff_rorp, new):
"""Patch basis_rp, writing output in new, which doesn't exist yet"""
......@@ -260,7 +260,7 @@ class PatchITRB(rorpiter.ITRBranch):
else:
assert self.dir_replacement
self.base_rp.rmdir()
self.dir_replacement.rename(self.base_rp)
rpath.rename(self.dir_replacement, self.base_rp)
class IncrementITRB(PatchITRB):
......@@ -286,7 +286,7 @@ class IncrementITRB(PatchITRB):
tf = TempFile.new(rp)
self.patch_to_temp(rp, diff_rorp, tf)
increment.Increment(tf, rp, self.get_incrp(index))
tf.rename(rp)
rpath.rename(tf, rp)
def start_process(self, index, diff_rorp):
"""Start processing directory"""
......
......@@ -36,6 +36,7 @@ static PyObject *UnknownFileTypeError;
static PyObject *c_make_file_dict(PyObject *self, PyObject *args);
static PyObject *long2str(PyObject *self, PyObject *args);
static PyObject *str2long(PyObject *self, PyObject *args);
static PyObject *my_sync(PyObject *self);
/* Turn a stat structure into a python dictionary. The preprocessor
......@@ -179,6 +180,15 @@ static PyObject *long2str(self, args)
}
/* Run sync() and return None */
static PyObject *my_sync(self)
PyObject *self;
{
sync();
return Py_BuildValue("");
}
/* Reverse of above; convert 7 byte string into python long */
static PyObject *str2long(self, args)
PyObject *self;
......@@ -201,6 +211,7 @@ static PyMethodDef CMethods[] = {
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS, "Convert python long to 7 byte string"},
{"str2long", str2long, METH_VARARGS, "Convert 7 byte string to python long"},
{"sync", my_sync, METH_VARARGS, "sync buffers to disk"},
{NULL, NULL, 0, NULL}
};
......
......@@ -95,7 +95,6 @@ class LowLevelPipeConnection(Connection):
f - file object
b - string
q - quit signal
t - TempFile
R - RPath
r - RORPath only
c - PipeConnection object
......@@ -121,8 +120,6 @@ class LowLevelPipeConnection(Connection):
Log.conn("sending", obj, req_num)
if type(obj) is types.StringType: self._putbuf(obj, req_num)
elif isinstance(obj, connection.Connection):self._putconn(obj, req_num)
elif isinstance(obj, TempFile.TempFile):
self._puttempfile(obj, req_num)
elif isinstance(obj, rpath.RPath): self._putrpath(obj, req_num)
elif isinstance(obj, rpath.RORPath): self._putrorpath(obj, req_num)
elif ((hasattr(obj, "read") or hasattr(obj, "write"))
......@@ -148,12 +145,6 @@ class LowLevelPipeConnection(Connection):
self._write("i", str(VirtualFile.new(rorpiter.ToFile(iterator))),
req_num)
def _puttempfile(self, tempfile, req_num):
"""Put a tempfile into pipe. See _putrpath"""
tf_repr = (tempfile.conn.conn_number, tempfile.base,
tempfile.index, tempfile.data)
self._write("t", cPickle.dumps(tf_repr, 1), req_num)
def _putrpath(self, rpath, req_num):
"""Put an rpath into the pipe
......@@ -235,7 +226,6 @@ class LowLevelPipeConnection(Connection):
elif format_string == "i":
result = rorpiter.FromFile(iterfile.BufferedRead(
VirtualFile(self, int(data))))
elif format_string == "t": result = self._gettempfile(data)
elif format_string == "r": result = self._getrorpath(data)
elif format_string == "R": result = self._getrpath(data)
else:
......@@ -249,12 +239,6 @@ class LowLevelPipeConnection(Connection):
index, data = cPickle.loads(raw_rorpath_buf)
return rpath.RORPath(index, data)
def _gettempfile(self, raw_tf_buf):
"""Return TempFile object indicated by raw_tf_buf"""
conn_number, base, index, data = cPickle.loads(raw_tf_buf)
return TempFile.TempFile(Globals.connection_dict[conn_number],
base, index, data)
def _getrpath(self, raw_rpath_buf):
"""Return RPath object indicated by raw_rpath_buf"""
conn_number, base, index, data = cPickle.loads(raw_rpath_buf)
......
......@@ -17,6 +17,9 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
# UPDATE: I have decided not to use journaling and use the regress
# stuff exclusively. This code is left here for posterity.
"""Application level journaling for better error recovery
This module has routines for maintaining a "journal" to keep track of
......@@ -49,12 +52,15 @@ Two caveats:
"""
import Globals, log, rpath, cPickle, TempFile
import Globals, log, rpath, cPickle, TempFile, os, restore
# Holds an rpath of the journal directory, a file object, and then
journal_dir_rp = None
journal_dir_fp = None
# Set to time in seconds of previous aborted backup
unsuccessful_backup_time = None
def open_journal():
"""Make sure the journal dir exists (creating it if necessary)"""
global journal_dir_rp, journal_dir_fp
......@@ -74,7 +80,12 @@ def close_journal():
journal_dir_rp = journal_dir_fp = None
def sync_journal():
"""fsync the journal directory"""
"""fsync the journal directory.
Note that fsync'ing a particular entry file may also be required
to guarantee writes have been committed.
"""
journal_dir_rp.fsync(journal_dir_fp)
def recover_journal():
......@@ -94,74 +105,91 @@ def get_entries_from_journal():
else: entry_list.append(e)
return entry_list
def write_entry(test_filename, test_filename_type,
increment_filename, temp_filename):
def write_entry(index, temp_index, testfile_option, testfile_type):
"""Write new entry given variables into journal, return entry"""
e = Entry()
e.test_filename = test_filename
e.test_filename_type = test_filename_type
e.increment_filename = increment_filename
e.temp_filename = temp_filename
e.index = index
e.temp_index = index
e.testfile_option = testfile_option
e.testfile_type = testfile_type
e.write()
return e
def remove_entry(entry_rp):
"""Remove the entry in entry_rp from the journal"""
entry_rp.delete()
sync_journal()
class Entry:
"""A single journal entry, describing one transaction
Although called a journal entry, this is less a description of
what is going happen than a short recipe of what to do if
what is going happen than a short recipe of how to recover if
something goes wrong.
Currently the recipe needs to be very simple and is determined by
the four variables test_filename, test_filename_type,
increment_filename, and temp_filename. See the recover() method
for details.
the four variables index, temp_index, testfile_option,
testfile_type. See the recover() method for details.
"""
test_filename = None
test_filename_type = None # None is a valid value for this variable
increment_filename = None
temp_filename = None
index = None
temp_index = None
testfile_option = None
testfile_type = None # None is a valid value for this variable
# This holds the rpath in the journal dir that holds self
# This points to the rpath in the journal dir that holds this entry
entry_rp = None
def recover(self):
"""Recover the current journal entry
See if test_filename matches test_filename_type. If so,
delete increment_filename. Delete temp_filename regardless.
self.testfile_option has 3 possibilities:
1 - testfile is mirror file
2 - testfile is increment file
3 - testfile is temp file
Either way, see if the type of the testfile matches
testfile_type. If so, delete increment file. Deleted
tempfile regardless.
We express things in terms of indicies because we need paths
relative to a fixed directory (like Globals.dest_root).
It's OK to recover the same entry multiple times.
"""
assert self.test_filename and self.temp_filename
test_rp = rpath.RPath(Globals.local_connection, self.test_filename)
temp_rp = rpath.RPath(Globals.local_connection, self.temp_filename)
inc_rp = rpath.RPath(Globals.local_connection, self.increment_filename)
if test_rp.lstat() == self.test_filename_type:
if inc_rp.lstat():
inc_rp.delete()
inc_rp.get_parent_rp().fsync()
if temp_rp.lstat():
temp_rp.delete()
temp_rp.get_parent_rp().fsync()
assert self.index is not None and self.temp_index is not None
mirror_rp = Globals.dest_root.new_index(self.index)
if self.temp_index:
temp_rp = Globals.dest_root.new_index(self.temp_index)
inc_rp = self.get_inc()
assert 1 <= self.testfile_option <= 3
if self.testfile_option == 1: test_rp = mirror_rp
elif self.testfile_option == 2: test_rp = inc_rp
else: test_rp = temp_rp
if test_rp and test_rp.lstat() == self.testfile_type:
if inc_rp and inc_rp.lstat(): inc_rp.sync_delete()
if temp_rp and temp_rp.lstat(): temp_rp.sync_delete()
def get_inc(self):
"""Return inc_rpath, if any, corresponding to self.index"""
incroot = Globals.rbdir.append_path("increments")
incbase = incroot.new_index(self.index)
inclist = restore.get_inclist(incbase)
inclist = filter(lambda inc:
inc.getinctime() == unsuccessful_backup_time, inclist)
assert len(inclist) <= 1
if inclist: return inclist[0]
else: return None
def to_string(self):
"""Return string form of entry"""
return cPickle.dumps({'test_filename': self.test_filename,
'test_filename_type': self.test_filename_type,
'increment_filename': self.increment_filename,
'temp_filename': self.temp_filename})
return cPickle.dumps({'index': self.index,
'testfile_option': self.testfile_option,
'testfile_type': self.testfile_type,
'temp_index': self.temp_index})
def write(self):
"""Write the current entry into the journal"""
entry_rp = TempFile.new(journal_dir_rp.append("foo"))
entry_rp = TempFile.new_in_dir(journal_dir_rp)
fp = entry_rp.open("wb")
fp.write(self.to_string())
entry_rp.fsync(fp)
......@@ -174,10 +202,10 @@ class Entry:
try: val_dict = cPickle.loads(s)
except cPickle.UnpicklingError: return 0
try:
self.test_filename = val_dict['test_filename']
self.test_filename_type = val_dict['test_filename_type']
self.increment_filename = val_dict['increment_filename']
self.temp_filename = val_dict['temp_filename']
self.index = val_dict['index']
self.testfile_type = val_dict['testfile_type']
self.testfile_option = val_dict['testfile_option']
self.temp_index = val_dict['temp_index']
except TypeError, KeyError: return 0
return 1
......@@ -191,5 +219,4 @@ class Entry:
def delete(self):
"""Remove entry from the journal. self.entry_rp must be set"""
self.entry_rp.delete()
sync_journal()
self.entry_rp.sync_delete()
......@@ -55,7 +55,7 @@ field names and values.
"""
from __future__ import generators
import re, gzip
import re, gzip, os
import log, Globals, rpath, Time, robust, increment
class ParsingError(Exception):
......@@ -280,6 +280,9 @@ def WriteMetadata(rorp):
def CloseMetadata():
"""Close the metadata file"""
global metadata_rp, metadata_fileobj
try: fileno = metadata_fileobj.fileno() # will not work if GzipFile
except AttributeError: fileno = metadata_fileobj.fileobj.fileno()
os.fsync(fileno)
result = metadata_fileobj.close()
metadata_fileobj = None
metadata_rp.setdata()
......
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Code for reverting the rdiff-backup directory to prev state
This module is used after an aborted session, and the rdiff-backup
destination directory may be in-between states. In this situation we
need to bring back the directory as it was after the last successful
backup. The basic strategy is to restore all the attributes from the
metadata file (which we assume is intact) and delete the extra
increments. For regular files we examine the mirror file and use the
increment file to get the old data if the mirror file is out of date.
Currently this does recover hard links. This make make the regressed
directory take up more disk space, but hard links can still be
recovered.
"""
from __future__ import generators
import Globals, restore, log, rorpiter, journal, TempFile
# regress_time should be set to the time we want to regress back to
# (usually the time of the last successful backup)
regress_time = None
# This should be set to the latest unsuccessful backup time
unsuccessful_backup_time = None
class RegressException(Exception):
"""Raised on any exception in regress process"""
pass
def Regress(mirror_rp):
"""Bring mirror and inc directory back to regress_to_time
Also affects the rdiff-backup-data directory, so Globals.rbdir
should be set. Regress should only work one step at a time
(i.e. don't "regress" through two separate backup sets. This
function should be run locally to the rdiff-backup-data directory.
"""
inc_rpath = Globals.rbdir.append_path("increments")
assert mirror_rp.index == () and inc_rpath.index == ()
assert mirror_rp.isdir() and inc_rpath.isdir()
assert mirror_rp.conn is inc_rpath.conn is Globals.local_connection
set_regress_time()
set_restore_times()
def set_regress_time():
"""Set global regress_time to previous sucessful backup
If there are two current_mirror increments, then the last one
corresponds to a backup session that failed.
"""
global regress_time, unsuccessful_backup_time
curmir_incs = restore.get_inclist(Globals.rbdir.append("current_mirror"))
assert len(curmir_incs) == 2, \
"Found %s current_mirror flags, expected 2" % len(curmir_incs)
inctimes = [inc.getinctime() for inc in curmir_incs]
inctimes.sort()
regress_time = inctimes[0]
unsucessful_backup_time = inctimes[-1]
log.Log("Regressing to " + Time.timetopretty(regress_time), 5)
def set_restore_times():
"""Set _rest_time and _mirror_time in the restore module
_rest_time (restore time) corresponds to the last successful
backup time. _mirror_time is the unsuccessful backup time.
"""
restore._mirror_time = unsuccessful_backup_time
restore._rest_time = regress_time
def iterate_raw_rfs(mirror_rp, inc_rp):
"""Iterate all RegressFile objects in mirror/inc directory"""
root_rf = RegressFile(mirror_rp, inc_rp, restore.get_inclist(inc_rp))
def helper(rf):
yield rf
if rf.mirror_rp.isdir() or rf.inc_rp.isdir():
for sub_rf in rf.yield_sub_rfs():
for sub_sub_rf in helper(sub_rf):
yield sub_sub_rf
return helper(root_rf)
def yield_metadata():
"""Iterate rorps from metadata file, if any are available"""
metadata_iter = metadata.GetMetadata_at_time(Globals.rbdir, regress_time)
if metadata_iter: return metadata_iter
log.Log.FatalError("No metadata for time %s found, cannot regress"
% Time.timetopretty(regress_time))
def iterate_meta_rfs(mirror_rp, inc_rp):
"""Yield RegressFile objects with extra metadata information added
Each RegressFile will have an extra object variable .metadata_rorp
which will contain the metadata attributes of the mirror file at
regress_time.
"""
raw_rfs = iterate_raw_rfs(mirror_rp, inc_rp)
collated = rorpiter.Collate2Iters(raw_rfs, yield_metadata())
for raw_rf, metadata_rorp in collated:
raw_rf.set_metadata_rorp(metadata_rorp)
yield raw_rf
class RegressFile(restore.RestoreFile):
"""Like RestoreFile but with metadata
Hold mirror_rp and related incs, but also put metadata info for
the mirror file at regress time in self.metadata_rorp.
self.metadata_rorp is not set in this class.
"""
def __init__(self, mirror_rp, inc_rp, inc_list):
restore.RestoreFile._init__(self, mirror_rp, inc_rp, inclist)
assert len(self.relevant_incs) <= 2, "Too many incs"
if len(self.relevant_incs) == 2:
self.regress_inc = self.relevant.incs[-1]
else: self.regress_inc = None
def set_metadata_rorp(self, metadata_rorp):
"""Set self.metadata_rorp, creating empty if given None"""
if metadata_rorp: self.metadata_rorp = metadata_rorp
else: self.metadata_rorp = rpath.RORPath(self.index)
def isdir(self):
"""Return true if regress needs before/after processing"""
return ((self.metadata_rorp and self.metadata_rorp.isdir()) or
(self.mirror_rp and self.mirror_rp.isdir()))
class RegressITRB(rorpiter.ITRBranch):
"""Turn back state of dest directory (use with IterTreeReducer)
The arguments to the ITR will be RegressFiles. There are two main
assumptions this procedure makes (besides those mentioned above):
1. The mirror_rp and the metadata_rorp cmp_attribs correctly iff
they contain the same data. If this is the case, then the inc
file is unnecessary and we can delete it.
2. If the don't match, then applying the inc file will
successfully get us back to the previous state.
Since the metadata file is required, the two above really only
matter for regular files.
"""
def __init__(self):
"""Just initialize some variables to None"""
self.rf = None # will hold RegressFile applying to a directory
def can_fast_process(self, index, rf):
"""True if none of the rps is a directory"""
return not rf.mirror_rp.isdir() and not rf.metadata_rorp.isdir()
def fast_process(self, index, rf):
"""Process when nothing is a directory"""
if not rpath.cmp_attribs(rf.metadata_rorp, rf.mirror_rp):
if rf.metadata_rorp.isreg(): self.restore_orig_regfile(rf)
else:
if rf.mirror_rp.lstat(): rf.mirror_rp.delete()
rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp)
if rf.regress_inc: rf.regress_inc.delete()
def restore_orig_regfile(self, rf):
"""Restore original regular file
This is the trickiest case for avoiding information loss,
because we don't want to delete the increment before the
mirror is fully written.
"""
assert rf.metadata_rorp.isreg()
if rf.mirror_rp.isreg():
tf = TempFile.new(rf.mirror_rp)
tf.write_from_fileobj(rf.get_restore_fp())
rpath.copy_attribs(rf.metadata_rorp, tf)
tf.fsync_with_dir() # make sure tf fully written before move
rpath.rename(tf, rf.mirror_rp) # move is atomic
else:
if rf.mirror_rp.lstat(): rf.mirror_rp.delete()
rf.mirror_rp.write_from_fileobj(rf.get_restore_fp())
rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp)
rf.mirror_rp.fsync_with_dir() # require move before inc delete
def start_process(self, index, rf):
"""Start processing directory"""
if rf.metadata_rorp.isdir():
# make sure mirror is a readable dir
if not rf.mirror_rp.isdir():
if rf.mirror_rp.lstat(): rf.mirror_rp.delete()
rf.mirror_rp.mkdir()
if not rf.mirror_rp.hasfullperms(): rf.mirror_rp.chmod(0700)
self.rf = rf
def end_process(self):
"""Finish processing a directory"""
rf = self.rf
if rf.metadata_rorp.isdir():
if rf.mirror_rp.isdir():
if not rpath.cmp_attribs(rf.metadata_rorp, rf.mirror_rp):
rpath.copy_attribs(rf.metadata_rorp, rf.mirror_rp)
else:
rf.mirror_rp.delete()
rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp)
else: # replacing a dir with some other kind of file
assert rf.mirror_rp.isdir()
if rf.metadata_rorp.isreg(): self.restore_orig_regfile(rf)
else:
rf.mirror_rp.delete()
rpath.copy_with_attribs(rf.metadata_rorp, rf.mirror_rp)
if rf.regress_inc: rf.regress_inc.delete()
def on_error(self, exc, *args):
"""This is run on any exception, raises RegressException
RegressException should be fatal. We don't want to tolerate
the kinds of errors we would when backing up.
"""
if args and args[0] and isinstance(args[0], tuple):
filename = os.path.join(*args[0])
elif self.index: filename = os.path.join(*self.index)
else: filename = "."
log.Log("Error '%s' processing %s" % (exc, filename), 2)
raise RegressException("Error during Regress")
......@@ -401,7 +401,7 @@ class RestoreFile:
else: inc_rp, inc_list = inc_pair
if not mirror_rp:
mirror_rp = self.mirror_rp.new_index(inc_rp.index)
yield RestoreFile(mirror_rp, inc_rp, inc_list)
yield self.__class__(mirror_rp, inc_rp, inc_list)
def yield_mirrorrps(self, mirrorrp):
"""Yield mirrorrps underneath given mirrorrp"""
......
......@@ -31,7 +31,7 @@ files), where files is the number of files attached (usually 1 or
from __future__ import generators
import os, tempfile, UserList, types
import librsync, Globals, Rdiff, Hardlink, robust, log, static, \
rpath, iterfile, TempFile
rpath, iterfile
class RORPIterException(Exception): pass
......
......@@ -208,10 +208,14 @@ def rename(rp_source, rp_dest):
(rp_source.path, rp_dest.path), 7)
if not rp_source.lstat(): rp_dest.delete()
else:
rp_source.conn.os.rename(rp_source.path, rp_dest.path)
if rp_dest.lstat() and rp_source.getinode() == rp_dest.getinode():
# You can't rename one hard linked file over another
rp_source.delete()
else: rp_source.conn.os.rename(rp_source.path, rp_dest.path)
rp_dest.data = rp_source.data
rp_source.data = {'type': None}
def tupled_lstat(filename):
"""Like os.lstat, but return only a tuple, or None if os.error
......@@ -872,6 +876,25 @@ class RPath(RORPath):
assert not fp.close()
else: os.fsync(fp.fileno())
def fsync_with_dir(self, fp = None):
"""fsync self and directory self is under"""
self.fsync(fp)
self.get_parent_rp().fsync()
def sync_delete(self):
"""Delete self with sync to guarantee completion
On some filesystems (like linux's ext2), we must sync both the
file and the directory to make sure.
"""
if self.lstat() and not self.issym():
fp = self.open("rb")
self.delete()
os.fsync(fp.fileno())
assert not fp.close()
self.get_parent_rp().fsync()
def get_data(self):
"""Open file as a regular file, read data, close, return data"""
fp = self.open("rb")
......@@ -895,4 +918,3 @@ class RPathFileHook:
self.closing_thunk()
return result
......@@ -20,7 +20,7 @@
"""Generate and process aggregated backup information"""
import re, os, time
import Globals, TempFile, robust, Time, rorpiter, increment
import Globals, robust, Time, rorpiter, increment
class StatsException(Exception): pass
......
......@@ -6,20 +6,26 @@ class JournalTest(unittest.TestCase):
def testBasic(self):
"""Test opening a journal, then reading, writing, and deleting"""
MakeOutputDir()
Globals.rbdir = rpath.RPath(Globals.local_connection,
"testfiles/output")
Globals.dest_root = rpath.RPath(Globals.local_connection,
"testfiles/output")
Globals.rbdir = Globals.dest_root.append("rdiff-backup-data")
Globals.rbdir.mkdir()
journal.open_journal()
assert len(journal.get_entries_from_journal()) == 0
# It's important that none of these files really exist
e1 = journal.write_entry("Hello48", "reg", "inc_file3917", "t39p")
e2 = journal.write_entry("2nd_euoeuo", None, "inc_file4832", "l389")
e1 = journal.write_entry(("Hello48",), ("temp_index", "foo"),
2, "reg")
e2 = journal.write_entry(("2nd", "Entry", "now"),
("temp_index",), 1, None)
assert e1.entry_rp and e2.entry_rp
l = journal.get_entries_from_journal()
assert len(l) == 2
first_filename = l[0].test_filename
assert first_filename == "Hello48" or first_filename == "2nd_euoeuo"
first_index = l[0].index
assert (first_index == ("Hello48",) or
first_index == ("2nd", "Entry", "now"))
# Now test recovering journal, and make sure everything deleted
journal.recover_journal()
......
"""regresstest - test the regress module. Not to be confused with the
regression tests."""
import unittest
from commontest import *
class RegressTest(unittest.TestCase):
XXX
if __name__ == "__main__": unittest.main()
......@@ -32,20 +32,6 @@ class TimeTest(unittest.TestCase):
assert cmp("2001-09-01T12:00:00-08:00",
"2001-09-01T12:00:00-07:00") == 1
def testCmp_separator(self):
"""Like testCmp but with new separator"""
Globals.time_separator = "_"
cmp = Time.cmp
assert cmp(1,2) == -1
assert cmp(2,2) == 0
assert cmp(5,1) == 1
assert cmp("2001-09-01T21_49_04Z", "2001-08-01T21_49_04Z") == 1
assert cmp("2001-09-01T04_49_04+03_23", "2001-09-01T21_49_04Z") == -1
assert cmp("2001-09-01T12_00_00Z", "2001-09-01T04_00_00-08_00") == 0
assert cmp("2001-09-01T12_00_00-08_00",
"2001-09-01T12_00_00-07_00") == 1
Globals.time_separator = ":"
def testStringtotime(self):
"""Test converting string to time"""
timesec = int(time.time())
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment