Commit 4848f4fb authored by ben's avatar ben

Removed all files from src/ directory, added to rdiff_backup/


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@237 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent 72e15c94
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Coordinate corresponding files with different names
For instance, some source filenames may contain characters not allowed
on the mirror end. Also, if a source filename is very long (say 240
characters), the extra characters added to related increments may put
them over the usual 255 character limit.
"""
import re
from log import *
from robust import *
import Globals
max_filename_length = 255
# If true, enable character quoting, and set characters making
# regex-style range.
chars_to_quote = None
# These compiled regular expressions are used in quoting and unquoting
chars_to_quote_regexp = None
unquoting_regexp = None
# Use given char to quote. Default is set in Globals.
quoting_char = None
def set_init_quote_vals():
"""Set quoting value from Globals on all conns"""
for conn in Globals.connections:
conn.FilenameMapping.set_init_quote_vals_local()
def set_init_quote_vals_local():
"""Set value on local connection, initialize regexps"""
global chars_to_quote, quoting_char
chars_to_quote = Globals.chars_to_quote
if len(Globals.quoting_char) != 1:
Log.FatalError("Expected single character for quoting char,"
"got '%s' instead" % (Globals.quoting_char,))
quoting_char = Globals.quoting_char
init_quoting_regexps()
def init_quoting_regexps():
"""Compile quoting regular expressions"""
global chars_to_quote_regexp, unquoting_regexp
try:
chars_to_quote_regexp = \
re.compile("[%s%s]" % (chars_to_quote, quoting_char), re.S)
unquoting_regexp = re.compile("%s[0-9]{3}" % quoting_char, re.S)
except re.error:
Log.FatalError("Error '%s' when processing char quote list %s" %
(re.error, chars_to_quote))
def quote(path):
"""Return quoted version of given path
Any characters quoted will be replaced by the quoting char and
the ascii number of the character. For instance, "10:11:12"
would go to "10;05811;05812" if ":" were quoted and ";" were
the quoting character.
"""
return chars_to_quote_regexp.sub(quote_single, path)
def quote_single(match):
"""Return replacement for a single character"""
return "%s%03d" % (quoting_char, ord(match.group()))
def unquote(path):
"""Return original version of quoted filename"""
return unquoting_regexp.sub(unquote_single, path)
def unquote_single(match):
"""Unquote a single quoted character"""
assert len(match.group()) == 4
return chr(int(match.group()[1:]))
def get_quoted_dir_children(rpath):
"""For rpath directory, return list of quoted children in dir"""
if not rpath.isdir(): return []
dir_pairs = [(unquote(filename), filename)
for filename in Robust.listrp(rpath)]
dir_pairs.sort() # sort by real index, not quoted part
child_list = []
for unquoted, filename in dir_pairs:
childrp = rpath.append(unquoted)
childrp.quote_path()
child_list.append(childrp)
return child_list
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Hold a variety of constants usually set at initialization."""
import re, os
# The current version of rdiff-backup
version = "$version"
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
current_time = None
# This determines how many bytes to read at a time when copying
blocksize = 32768
# This is used by the BufferedRead class to determine how many
# bytes to request from the underlying file per read(). Larger
# values may save on connection overhead and latency.
conn_bufsize = 98304
# True if script is running as a server
server = None
# uid and gid of the owner of the rdiff-backup process. This can
# vary depending on the connection.
process_uid = os.getuid()
process_gid = os.getgid()
# If true, when copying attributes, also change target's uid/gid
change_ownership = None
# If true, change the permissions of unwriteable mirror files
# (such as directories) so that they can be written, and then
# change them back. This defaults to 1 just in case the process
# is not running as root (root doesn't need to change
# permissions).
change_mirror_perms = (process_uid != 0)
# If true, temporarily change permissions of unreadable files in
# the source directory to make sure we can read all files.
change_source_perms = None
# If true, try to reset the atimes of the source partition.
preserve_atime = None
# This will be set as soon as the LocalConnection class loads
local_connection = None
# All connections should be added to the following list, so
# further global changes can be propagated to the remote systems.
# The first element should be Globals.local_connection. For a
# server, the second is the connection to the client.
connections = []
# Each process should have a connection number unique to the
# session. The client has connection number 0.
connection_number = 0
# Dictionary pairing connection numbers with connections. Set in
# SetConnections for all connections.
connection_dict = {}
# True if the script is the end that reads the source directory
# for backups. It is true for purely local sessions.
isbackup_reader = None
# Connection of the real backup reader (for which isbackup_reader
# is true)
backup_reader = None
# True if the script is the end that writes to the increment and
# mirror directories. True for purely local sessions.
isbackup_writer = None
# Connection of the backup writer
backup_writer = None
# Connection of the client
client_conn = None
# This list is used by the set function below. When a new
# connection is created with init_connection, its Globals class
# will match this one for all the variables mentioned in this
# list.
changed_settings = []
# rdiff-backup will try to checkpoint its state every
# checkpoint_interval seconds. Then when resuming, at most this
# amount of time is lost.
checkpoint_interval = 20
# The RPath of the rdiff-backup-data directory.
rbdir = None
# Indicates if a resume or a lack of resume is forced. This
# should be None for the default. 0 means don't resume, and 1
# means resume.
resume = None
# If there has been an aborted backup fewer than this many seconds
# ago, attempt to resume it where it left off instead of starting
# a new one.
resume_window = 7200
# This string is used when recognizing and creating time strings.
# If the time_separator is ":", then W3 datetime strings like
# 2001-12-07T04:22:01-07:00 are produced. It can be set to "_" to
# make filenames that don't contain colons, which aren't allowed
# under MS windows NT.
time_separator = ":"
# quoting_enabled is true if we should quote certain characters in
# filenames on the source side (see FilenameMapping for more
# info). chars_to_quote is a string whose characters should be
# quoted, and quoting_char is the character to quote with.
quoting_enabled = None
chars_to_quote = ""
quoting_char = ';'
# If true, emit output intended to be easily readable by a
# computer. False means output is intended for humans.
parsable_output = None
# If true, then hardlinks will be preserved to mirror and recorded
# in the increments directory. There is also a difference here
# between None and 0. When restoring, None or 1 means to preserve
# hardlinks iff can find a hardlink dictionary. 0 means ignore
# hardlink information regardless.
preserve_hardlinks = 1
# If this is false, then rdiff-backup will not compress any
# increments. Default is to compress based on regexp below.
compression = 1
# Increments based on files whose names match this
# case-insensitive regular expression won't be compressed (applies
# to .snapshots and .diffs). The second below will be the
# compiled version of the first.
no_compression_regexp_string = "(?i).*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
"jpg|gif|png|jp2|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
no_compression_regexp = None
# If true, filelists and directory statistics will be split on
# nulls instead of newlines.
null_separator = None
# Determines whether or not ssh will be run with the -C switch
ssh_compression = 1
# If true, print statistics after successful backup
print_statistics = None
# On the reader and writer connections, the following will be
# replaced by the source and mirror Select objects respectively.
select_source, select_mirror = None, None
# On the backup writer connection, holds the root incrementing branch
# object. Access is provided to increment error counts.
ITRB = None
# Percentage of time to spend sleeping. None means never sleep.
sleep_ratio = None
# security_level has 4 values and controls which requests from remote
# systems will be honored. "all" means anything goes. "read-only"
# means that the requests must not write to disk. "update-only" means
# that requests shouldn't destructively update the disk (but normal
# incremental updates are OK). "minimal" means only listen to a few
# basic requests.
security_level = "all"
# If this is set, it indicates that the remote connection should only
# deal with paths inside of restrict_path.
restrict_path = None
def get(name):
"""Return the value of something in this module"""
return globals()[name]
def is_not_None(name):
"""Returns true if value is not None"""
return globals()[name] is not None
def set(name, val):
"""Set the value of something in this module
Use this instead of writing the values directly if the setting
matters to remote sides. This function updates the
changed_settings list, so other connections know to copy the
changes.
"""
changed_settings.append(name)
globals()[name] = val
def set_integer(name, val):
"""Like set, but make sure val is an integer"""
try: intval = int(val)
except ValueError:
Log.FatalError("Variable %s must be set to an integer -\n"
"received %s instead." % (name, val))
set(name, intval)
def set_float(name, val, min = None, max = None, inclusive = 1):
"""Like set, but make sure val is float within given bounds"""
def error():
s = "Variable %s must be set to a float" % (name,)
if min is not None and max is not None:
s += " between %s and %s " % (min, max)
if inclusive: s += "inclusive"
else: s += "not inclusive"
elif min is not None or max is not None:
if inclusive: inclusive_string = "or equal to "
else: inclusive_string = ""
if min is not None:
s += " greater than %s%s" % (inclusive_string, min)
else: s+= " less than %s%s" % (inclusive_string, max)
Log.FatalError(s)
try: f = float(val)
except ValueError: error()
if min is not None:
if inclusive and f < min: error()
elif not inclusive and f <= min: error()
if max is not None:
if inclusive and f > max: error()
elif not inclusive and f >= max: error()
set(name, f)
def get_dict_val(name, key):
"""Return val from dictionary in this class"""
return globals()[name][key]
def set_dict_val(name, key, val):
"""Set value for dictionary in this class"""
globals()[name][key] = val
def postset_regexp(name, re_string, flags = None):
"""Compile re_string on all existing connections, set to name"""
for conn in connections:
conn.Globals.postset_regexp_local(name, re_string, flags)
def postset_regexp_local(name, re_string, flags):
"""Set name to compiled re_string locally"""
if flags: globals()[name] = re.compile(re_string, flags)
else: globals()[name] = re.compile(re_string)
def set_select(dsrpath, tuplelist, quote_mode, *filelists):
"""Initialize select object using tuplelist
Note that each list in filelists must each be passed as
separate arguments, so each is recognized as a file by the
connection. Otherwise we will get an error because a list
containing files can't be pickled.
"""
global select_source, select_mirror
if dsrpath.source:
select_source = Select(dsrpath, quote_mode)
select_source.ParseArgs(tuplelist, filelists)
else:
select_mirror = Select(dsrpath, quote_mode)
select_mirror.ParseArgs(tuplelist, filelists)
from rpath import * # kludge to avoid circularity - not needed in this module
from selection import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Preserve and restore hard links
If the preserve_hardlinks option is selected, linked files in the
source directory will be linked in the mirror directory. Linked files
are treated like any other with respect to incrementing, but a
database of all links will be recorded at each session, so linked
files can still be restored from the increments.
All these functions are meant to be executed on the destination
side. The source side should only transmit inode information.
"""
from __future__ import generators
import cPickle
# In all of these lists of indicies are the values. The keys in
# _inode_ ones are (inode, devloc) pairs.
_src_inode_indicies = {}
_dest_inode_indicies = {}
# The keys for these two are just indicies. They share values
# with the earlier dictionaries.
_src_index_indicies = {}
_dest_index_indicies = {}
# When a linked file is restored, its path is added to this dict,
# so it can be found when later paths being restored are linked to
# it.
_restore_index_path = {}
def get_inode_key(rorp):
"""Return rorp's key for _inode_ dictionaries"""
return (rorp.getinode(), rorp.getdevloc())
def get_indicies(rorp, source):
"""Return a list of similarly linked indicies, using rorp's index"""
if source: dict = _src_index_indicies
else: dict = _dest_index_indicies
try: return dict[rorp.index]
except KeyError: return []
def add_rorp(rorp, source):
"""Process new rorp and update hard link dictionaries
First enter it into src_inode_indicies. If we have already
seen all the hard links, then we can delete the entry.
Everything must stay recorded in src_index_indicies though.
"""
if not rorp.isreg() or rorp.getnumlinks() < 2: return
if source:
inode_dict, index_dict = _src_inode_indicies, _src_index_indicies
else: inode_dict, index_dict = _dest_inode_indicies, _dest_index_indicies
rp_inode_key = get_inode_key(rorp)
if inode_dict.has_key(rp_inode_key):
index_list = inode_dict[rp_inode_key]
index_list.append(rorp.index)
if len(index_list) == rorp.getnumlinks():
del inode_dict[rp_inode_key]
else: # make new entry in both src dicts
index_list = [rorp.index]
inode_dict[rp_inode_key] = index_list
index_dict[rorp.index] = index_list
def add_rorp_iter(iter, source):
"""Return new rorp iterator like iter that add_rorp's first"""
for rorp in iter:
add_rorp(rorp, source)
yield rorp
def rorp_eq(src_rorp, dest_rorp):
"""Compare hardlinked for equality
Two files may otherwise seem equal but be hardlinked in
different ways. This function considers them equal enough if
they have been hardlinked correctly to the previously seen
indicies.
"""
if not src_rorp.index == dest_rorp.index: return None
if (not src_rorp.isreg() or not dest_rorp.isreg() or
src_rorp.getnumlinks() == dest_rorp.getnumlinks() == 1):
return 1 # Hard links don't apply
src_index_list = get_indicies(src_rorp, 1)
dest_index_list = get_indicies(dest_rorp, None)
# If a list only has one element, then it is only hardlinked
# to itself so far, so that is not a genuine difference yet.
if not src_index_list or len(src_index_list) == 1:
return not dest_index_list or len(dest_index_list) == 1
if not dest_index_list or len(dest_index_list) == 1: return None
# Both index lists exist and are non-empty
return src_index_list == dest_index_list # they are always sorted
def islinked(rorp):
"""True if rorp's index is already linked to something on src side"""
return len(get_indicies(rorp, 1)) >= 2
def restore_link(index, rpath):
"""Restores a linked file by linking it
When restoring, all the hardlink data is already present, and
we can only link to something already written. In either
case, add to the _restore_index_path dict, so we know later
that the file is available for hard
linking.
Returns true if succeeded in creating rpath, false if must
restore rpath normally.
"""
if index not in _src_index_indicies: return None
for linked_index in _src_index_indicies[index]:
if linked_index in _restore_index_path:
srcpath = _restore_index_path[linked_index]
Log("Restoring %s by hard linking to %s" %
(rpath.path, srcpath), 6)
rpath.hardlink(srcpath)
return 1
_restore_index_path[index] = rpath.path
return None
def link_rp(src_rorp, dest_rpath, dest_root = None):
"""Make dest_rpath into a link analogous to that of src_rorp"""
if not dest_root: dest_root = dest_rpath # use base of dest_rpath
dest_link_rpath = RPath(dest_root.conn, dest_root.base,
get_indicies(src_rorp, 1)[0])
dest_rpath.hardlink(dest_link_rpath.path)
def write_linkdict(rpath, dict, compress = None):
"""Write link data to the rbdata dir
It is stored as the a big pickled dictionary dated to match
the current hardlinks.
"""
assert (Globals.isbackup_writer and
rpath.conn is Globals.local_connection)
tf = TempFileManager.new(rpath)
def init():
fp = tf.open("wb", compress)
cPickle.dump(dict, fp)
assert not fp.close()
tf.setdata()
Robust.make_tf_robustaction(init, (tf,), (rpath,)).execute()
def get_linkrp(data_rpath, time, prefix):
"""Return RPath of linkdata, or None if cannot find"""
for rp in map(data_rpath.append, data_rpath.listdir()):
if (rp.isincfile() and rp.getincbase_str() == prefix and
(rp.getinctype() == 'snapshot' or rp.getinctype() == 'data')
and Time.stringtotime(rp.getinctime()) == time):
return rp
return None
def get_linkdata(data_rpath, time, prefix = 'hardlink_data'):
"""Return index dictionary written by write_linkdata at time"""
rp = get_linkrp(data_rpath, time, prefix)
if not rp: return None
fp = rp.open("rb", rp.isinccompressed())
index_dict = cPickle.load(fp)
assert not fp.close()
return index_dict
def final_writedata():
"""Write final checkpoint data to rbdir after successful backup"""
global final_inc
if _src_index_indicies:
Log("Writing hard link data", 6)
if Globals.compression:
final_inc = Globals.rbdir.append("hardlink_data.%s.data.gz" %
Time.curtimestr)
else: final_inc = Globals.rbdir.append("hardlink_data.%s.data" %
Time.curtimestr)
write_linkdict(final_inc, _src_index_indicies, Globals.compression)
else: # no hardlinks, so writing unnecessary
final_inc = None
def retrieve_final(time):
"""Set source index dictionary from hardlink_data file if avail"""
global _src_index_indicies
hd = get_linkdata(Globals.rbdir, time)
if hd is None: return None
_src_index_indicies = hd
return 1
def final_checkpoint(data_rpath):
"""Write contents of the four dictionaries to the data dir
If rdiff-backup receives a fatal error, it may still be able
to save the contents of the four hard link dictionaries.
Because these dictionaries may be big, they are not saved
after every 20 seconds or whatever, but just at the end.
"""
Log("Writing intermediate hard link data to disk", 2)
src_inode_rp = data_rpath.append("hardlink_source_inode_checkpoint."
"%s.data" % Time.curtimestr)
src_index_rp = data_rpath.append("hardlink_source_index_checkpoint."
"%s.data" % Time.curtimestr)
dest_inode_rp = data_rpath.append("hardlink_dest_inode_checkpoint."
"%s.data" % Time.curtimestr)
dest_index_rp = data_rpath.append("hardlink_dest_index_checkpoint."
"%s.data" % Time.curtimestr)
for (rp, dict) in ((src_inode_rp, _src_inode_indicies),
(src_index_rp, _src_index_indicies),
(dest_inode_rp, _dest_inode_indicies),
(dest_index_rp, _dest_index_indicies)):
write_linkdict(rp, dict)
def retrieve_checkpoint(data_rpath, time):
"""Retrieve hardlink data from final checkpoint
Return true if the retrieval worked, false otherwise.
"""
global _src_inode_indicies, _src_index_indicies
global _dest_inode_indicies, _dest_index_indicies
try:
src_inode = get_linkdata(data_rpath, time,
"hardlink_source_inode_checkpoint")
src_index = get_linkdata(data_rpath, time,
"hardlink_source_index_checkpoint")
dest_inode = get_linkdata(data_rpath, time,
"hardlink_dest_inode_checkpoint")
dest_index = get_linkdata(data_rpath, time,
"hardlink_dest_index_checkpoint")
except cPickle.UnpicklingError:
Log("Unpickling Error", 2)
return None
if (src_inode is None or src_index is None or
dest_inode is None or dest_index is None): return None
_src_inode_indicies, _src_index_indicies = src_inode, src_index
_dest_inode_indicies, _dest_index_indicies = dest_inode, dest_index
return 1
def remove_all_checkpoints():
"""Remove all hardlink checkpoint information from directory"""
prefix_list = ["hardlink_source_inode_checkpoint",
"hardlink_source_index_checkpoint",
"hardlink_dest_inode_checkpoint",
"hardlink_dest_index_checkpoint"]
for rp in map(Globals.rbdir.append, Globals.rbdir.listdir()):
if (rp.isincfile() and rp.getincbase_str() in prefix_list and
(rp.getinctype() == 'snapshot' or rp.getinctype() == 'data')):
rp.delete()
from log import *
from robust import *
from rpath import *
import Globals, Time
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Start (and end) here - read arguments, set global settings, etc."""
from __future__ import generators
import getopt, sys, re
from log import *
from lazy import *
from connection import *
from rpath import *
from destructive_stepping import *
from robust import *
from restore import *
from highlevel import *
from manage import *
import Globals, Time, SetConnections
action = None
remote_cmd, remote_schema = None, None
force = None
select_opts, select_mirror_opts = [], []
select_files = []
def parse_cmdlineoptions(arglist):
"""Parse argument list and set global preferences"""
global args, action, force, restore_timestr, remote_cmd, remote_schema
global remove_older_than_string
def sel_fl(filename):
"""Helper function for including/excluding filelists below"""
try: return open(filename, "r")
except IOError: Log.FatalError("Error opening file %s" % filename)
try: optlist, args = getopt.getopt(arglist, "blmr:sv:V",
["backup-mode", "calculate-average",
"change-source-perms", "chars-to-quote=",
"checkpoint-interval=", "current-time=", "exclude=",
"exclude-device-files", "exclude-filelist=",
"exclude-filelist-stdin", "exclude-globbing-filelist=",
"exclude-mirror=", "exclude-other-filesystems",
"exclude-regexp=", "exclude-special-files", "force",
"include=", "include-filelist=", "include-filelist-stdin",
"include-globbing-filelist=", "include-regexp=",
"list-changed-since=", "list-increments", "mirror-only",
"no-compression", "no-compression-regexp=", "no-hard-links",
"no-resume", "null-separator", "parsable-output",
"print-statistics", "quoting-char=", "remote-cmd=",
"remote-schema=", "remove-older-than=", "restore-as-of=",
"restrict=", "restrict-read-only=", "restrict-update-only=",
"resume", "resume-window=", "server", "sleep-ratio=",
"ssh-no-compression", "terminal-verbosity=", "test-server",
"verbosity=", "version", "windows-mode",
"windows-time-format"])
except getopt.error, e:
commandline_error("Bad commandline options: %s" % str(e))
for opt, arg in optlist:
if opt == "-b" or opt == "--backup-mode": action = "backup"
elif opt == "--calculate-average": action = "calculate-average"
elif opt == "--change-source-perms":
Globals.set('change_source_perms', 1)
elif opt == "--chars-to-quote":
Globals.set('chars_to_quote', arg)
Globals.set('quoting_enabled', 1)
elif opt == "--checkpoint-interval":
Globals.set_integer('checkpoint_interval', arg)
elif opt == "--current-time":
Globals.set_integer('current_time', arg)
elif opt == "--exclude": select_opts.append((opt, arg))
elif opt == "--exclude-device-files": select_opts.append((opt, arg))
elif opt == "--exclude-filelist":
select_opts.append((opt, arg))
select_files.append(sel_fl(arg))
elif opt == "--exclude-filelist-stdin":
select_opts.append(("--exclude-filelist", "standard input"))
select_files.append(sys.stdin)
elif opt == "--exclude-globbing-filelist":
select_opts.append((opt, arg))
select_files.append(sel_fl(arg))
elif opt == "--exclude-mirror":
select_mirror_opts.append(("--exclude", arg))
elif (opt == "--exclude-other-filesystems" or
opt == "--exclude-regexp" or
opt == "--exclude-special-files"): select_opts.append((opt, arg))
elif opt == "--force": force = 1
elif opt == "--include": select_opts.append((opt, arg))
elif opt == "--include-filelist":
select_opts.append((opt, arg))
select_files.append(sel_fl(arg))
elif opt == "--include-filelist-stdin":
select_opts.append(("--include-filelist", "standard input"))
select_files.append(sys.stdin)
elif opt == "--include-globbing-filelist":
select_opts.append((opt, arg))
select_files.append(sel_fl(arg))
elif opt == "--include-regexp": select_opts.append((opt, arg))
elif opt == "--list-changed-since":
restore_timestr, action = arg, "list-changed-since"
elif opt == "-l" or opt == "--list-increments":
action = "list-increments"
elif opt == "-m" or opt == "--mirror-only": action = "mirror"
elif opt == "--no-compression": Globals.set("compression", None)
elif opt == "--no-compression-regexp":
Globals.set("no_compression_regexp_string", arg)
elif opt == "--no-hard-links": Globals.set('preserve_hardlinks', 0)
elif opt == '--no-resume': Globals.resume = 0
elif opt == "--null-separator": Globals.set("null_separator", 1)
elif opt == "--parsable-output": Globals.set('parsable_output', 1)
elif opt == "--print-statistics":
Globals.set('print_statistics', 1)
elif opt == "--quoting-char":
Globals.set('quoting_char', arg)
Globals.set('quoting_enabled', 1)
elif opt == "-r" or opt == "--restore-as-of":
restore_timestr, action = arg, "restore-as-of"
elif opt == "--remote-cmd": remote_cmd = arg
elif opt == "--remote-schema": remote_schema = arg
elif opt == "--remove-older-than":
remove_older_than_string = arg
action = "remove-older-than"
elif opt == "--restrict": Globals.restrict_path = arg
elif opt == "--restrict-read-only":
Globals.security_level = "read-only"
Globals.restrict_path = arg
elif opt == "--restrict-update-only":
Globals.security_level = "update-only"
Globals.restrict_path = arg
elif opt == '--resume': Globals.resume = 1
elif opt == '--resume-window':
Globals.set_integer('resume_window', arg)
elif opt == "-s" or opt == "--server":
action = "server"
Globals.server = 1
elif opt == "--sleep-ratio":
Globals.set_float("sleep_ratio", arg, 0, 1, inclusive=0)
elif opt == "--ssh-no-compression":
Globals.set('ssh_compression', None)
elif opt == "--terminal-verbosity": Log.setterm_verbosity(arg)
elif opt == "--test-server": action = "test-server"
elif opt == "-V" or opt == "--version":
print "rdiff-backup " + Globals.version
sys.exit(0)
elif opt == "-v" or opt == "--verbosity": Log.setverbosity(arg)
elif opt == "--windows-mode":
Globals.set('time_separator', "_")
Globals.set('chars_to_quote', "A-Z:")
Globals.set('quoting_enabled', 1)
Globals.set('preserve_hardlinks', 0)
select_opts.append(("--exclude-special-files", None))
elif opt == '--windows-time-format':
Globals.set('time_separator', "_")
else: Log.FatalError("Unknown option %s" % opt)
def set_action():
"""Check arguments and try to set action"""
global action
l = len(args)
if not action:
if l == 0: commandline_error("No arguments given")
elif l == 1: action = "restore"
elif l == 2:
if RPath(Globals.local_connection, args[0]).isincfile():
action = "restore"
else: action = "backup"
else: commandline_error("Too many arguments given")
if l == 0 and action != "server":
commandline_error("No arguments given")
if l > 0 and action == "server":
commandline_error("Too many arguments given")
if l < 2 and (action == "backup" or action == "mirror" or
action == "restore-as-of"):
commandline_error("Two arguments are required (source, destination).")
if l == 2 and (action == "list-increments" or
action == "remove-older-than" or
action == "list-changed-since"):
commandline_error("Only use one argument, "
"the root of the backup directory")
if l > 2 and action != "calculate-average":
commandline_error("Too many arguments given")
def commandline_error(message):
sys.stderr.write("Error: %s\n" % message)
sys.stderr.write("See the rdiff-backup manual page for instructions\n")
sys.exit(1)
def misc_setup(rps):
"""Set default change ownership flag, umask, relay regexps"""
if ((len(rps) == 2 and rps[1].conn.os.getuid() == 0) or
(len(rps) < 2 and os.getuid() == 0)):
# Allow change_ownership if destination connection is root
for conn in Globals.connections:
conn.Globals.set('change_ownership', 1)
for rp in rps: rp.setdata() # Update with userinfo
os.umask(077)
Time.setcurtime(Globals.current_time)
FilenameMapping.set_init_quote_vals()
SetConnections.UpdateGlobal("client_conn", Globals.local_connection)
# This is because I originally didn't think compiled regexps
# could be pickled, and so must be compiled on remote side.
Globals.postset_regexp('no_compression_regexp',
Globals.no_compression_regexp_string)
for conn in Globals.connections: Robust.install_signal_handlers()
def take_action(rps):
"""Do whatever action says"""
if action == "server": PipeConnection(sys.stdin, sys.stdout).Server()
elif action == "backup": Backup(rps[0], rps[1])
elif action == "restore": restore(*rps)
elif action == "restore-as-of": RestoreAsOf(rps[0], rps[1])
elif action == "mirror": Mirror(rps[0], rps[1])
elif action == "test-server": SetConnections.TestConnections()
elif action == "list-changed-since": ListChangedSince(rps[0])
elif action == "list-increments": ListIncrements(rps[0])
elif action == "remove-older-than": RemoveOlderThan(rps[0])
elif action == "calculate-average": CalculateAverage(rps)
else: raise AssertionError("Unknown action " + action)
def cleanup():
"""Do any last minute cleaning before exiting"""
Log("Cleaning up", 6)
Log.close_logfile()
if not Globals.server: SetConnections.CloseConnections()
def Main(arglist):
"""Start everything up!"""
parse_cmdlineoptions(arglist)
set_action()
cmdpairs = SetConnections.get_cmd_pairs(args, remote_schema, remote_cmd)
Security.initialize(action, cmdpairs)
rps = map(SetConnections.cmdpair2rp, cmdpairs)
misc_setup(rps)
take_action(rps)
cleanup()
def Mirror(src_rp, dest_rp):
"""Turn dest_path into a copy of src_path"""
Log("Mirroring %s to %s" % (src_rp.path, dest_rp.path), 5)
mirror_check_paths(src_rp, dest_rp)
# Since no "rdiff-backup-data" dir, use root of destination.
SetConnections.UpdateGlobal('rbdir', dest_rp)
SetConnections.BackupInitConnections(src_rp.conn, dest_rp.conn)
backup_init_select(src_rp, dest_rp)
HighLevel.Mirror(src_rp, dest_rp)
def mirror_check_paths(rpin, rpout):
"""Check paths and return rpin, rpout"""
if not rpin.lstat():
Log.FatalError("Source directory %s does not exist" % rpin.path)
if rpout.lstat() and not force: Log.FatalError(
"""Destination %s exists so continuing could mess it up. Run
rdiff-backup with the --force option if you want to mirror anyway.""" %
rpout.path)
def Backup(rpin, rpout):
"""Backup, possibly incrementally, src_path to dest_path."""
SetConnections.BackupInitConnections(rpin.conn, rpout.conn)
backup_init_select(rpin, rpout)
backup_init_dirs(rpin, rpout)
RSI = Globals.backup_writer.Resume.ResumeCheck()
SaveState.init_filenames()
if prevtime:
Time.setprevtime(prevtime)
HighLevel.Mirror_and_increment(rpin, rpout, incdir, RSI)
else: HighLevel.Mirror(rpin, rpout, incdir, RSI)
rpout.conn.Main.backup_touch_curmirror_local(rpin, rpout)
def backup_init_select(rpin, rpout):
"""Create Select objects on source and dest connections"""
rpin.conn.Globals.set_select(DSRPath(1, rpin), select_opts,
None, *select_files)
rpout.conn.Globals.set_select(DSRPath(None, rpout), select_mirror_opts, 1)
def backup_init_dirs(rpin, rpout):
"""Make sure rpin and rpout are valid, init data dir and logging"""
global datadir, incdir, prevtime
if rpout.lstat() and not rpout.isdir():
if not force: Log.FatalError("Destination %s exists and is not a "
"directory" % rpout.path)
else:
Log("Deleting %s" % rpout.path, 3)
rpout.delete()
if not rpin.lstat():
Log.FatalError("Source directory %s does not exist" % rpin.path)
elif not rpin.isdir():
Log.FatalError("Source %s is not a directory" % rpin.path)
datadir = rpout.append("rdiff-backup-data")
SetConnections.UpdateGlobal('rbdir', datadir)
incdir = RPath(rpout.conn, os.path.join(datadir.path, "increments"))
prevtime = backup_get_mirrortime()
if rpout.lstat():
if rpout.isdir() and not rpout.listdir(): # rpout is empty dir
rpout.chmod(0700) # just make sure permissions aren't too lax
elif not datadir.lstat() and not force: Log.FatalError(
"""Destination directory %s exists, but does not look like a
rdiff-backup directory. Running rdiff-backup like this could mess up
what is currently in it. If you want to update or overwrite it, run
rdiff-backup with the --force option.""" % rpout.path)
if not rpout.lstat():
try: rpout.mkdir()
except os.error:
Log.FatalError("Unable to create directory %s" % rpout.path)
if not datadir.lstat(): datadir.mkdir()
if Log.verbosity > 0:
Log.open_logfile(datadir.append("backup.log"))
backup_warn_if_infinite_regress(rpin, rpout)
def backup_warn_if_infinite_regress(rpin, rpout):
"""Warn user if destination area contained in source area"""
if rpout.conn is rpin.conn: # it's meaningful to compare paths
if ((len(rpout.path) > len(rpin.path)+1 and
rpout.path[:len(rpin.path)] == rpin.path and
rpout.path[len(rpin.path)] == '/') or
(rpin.path == "." and rpout.path[0] != '/' and
rpout.path[:2] != '..')):
# Just a few heuristics, we don't have to get every case
if Globals.backup_reader.Globals.select_source.Select(rpout): Log(
"""Warning: The destination directory '%s' may be contained in the
source directory '%s'. This could cause an infinite regress. You
may need to use the --exclude option.""" % (rpout.path, rpin.path), 2)
def backup_get_mirrorrps():
"""Return list of current_mirror rps"""
datadir = Globals.rbdir
if not datadir.isdir(): return []
mirrorrps = [datadir.append(fn) for fn in datadir.listdir()
if fn.startswith("current_mirror.")]
return filter(lambda rp: rp.isincfile(), mirrorrps)
def backup_get_mirrortime():
"""Return time in seconds of previous mirror, or None if cannot"""
mirrorrps = backup_get_mirrorrps()
if not mirrorrps: return None
if len(mirrorrps) > 1:
Log(
"""Warning: duplicate current_mirror files found. Perhaps something
went wrong during your last backup? Using """ + mirrorrps[-1].path, 2)
timestr = mirrorrps[-1].getinctime()
return Time.stringtotime(timestr)
def backup_touch_curmirror_local(rpin, rpout):
"""Make a file like current_mirror.time.data to record time
Also updates rpout so mod times don't get messed up. This should
be run on the destination connection.
"""
datadir = Globals.rbdir
map(RPath.delete, backup_get_mirrorrps())
mirrorrp = datadir.append("current_mirror.%s.%s" % (Time.curtimestr,
"data"))
Log("Touching mirror marker %s" % mirrorrp.path, 6)
mirrorrp.touch()
RPath.copy_attribs(rpin, rpout)
def restore(src_rp, dest_rp = None):
"""Main restoring function
Here src_rp should be an increment file, and if dest_rp is
missing it defaults to the base of the increment.
"""
rpin, rpout = restore_check_paths(src_rp, dest_rp)
time = Time.stringtotime(rpin.getinctime())
restore_common(rpin, rpout, time)
def RestoreAsOf(rpin, target):
"""Secondary syntax for restore operation
rpin - RPath of mirror file to restore (not nec. with correct index)
target - RPath of place to put restored file
"""
restore_check_paths(rpin, target, 1)
try: time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
restore_common(rpin, target, time)
def restore_common(rpin, target, time):
"""Restore operation common to Restore and RestoreAsOf"""
mirror_root, index = restore_get_root(rpin)
mirror = mirror_root.new_index(index)
inc_rpath = datadir.append_path('increments', index)
restore_init_select(mirror_root, target)
restore_start_log(rpin, target, time)
Restore.Restore(inc_rpath, mirror, target, time)
Log("Restore ended", 4)
def restore_start_log(rpin, target, time):
"""Open restore log file, log initial message"""
try: Log.open_logfile(datadir.append("restore.log"))
except LoggerError, e: Log("Warning, " + str(e), 2)
# Log following message at file verbosity 3, but term verbosity 4
log_message = ("Starting restore of %s to %s as it was as of %s." %
(rpin.path, target.path, Time.timetopretty(time)))
if Log.term_verbosity >= 4: Log.log_to_term(log_message, 4)
if Log.verbosity >= 3: Log.log_to_file(log_message)
def restore_check_paths(rpin, rpout, restoreasof = None):
"""Check paths and return pair of corresponding rps"""
if not restoreasof:
if not rpin.lstat():
Log.FatalError("Source file %s does not exist" % rpin.path)
elif not rpin.isincfile():
Log.FatalError("""File %s does not look like an increment file.
Try restoring from an increment file (the filenames look like
"foobar.2001-09-01T04:49:04-07:00.diff").""" % rpin.path)
if not rpout: rpout = RPath(Globals.local_connection,
rpin.getincbase_str())
if rpout.lstat():
Log.FatalError("Restore target %s already exists, "
"and will not be overwritten." % rpout.path)
return rpin, rpout
def restore_init_select(rpin, rpout):
"""Initialize Select
Unlike the backup selections, here they are on the local
connection, because the backup operation is pipelined in a way
the restore operation isn't.
"""
Globals.set_select(DSRPath(1, rpin), select_mirror_opts, None)
Globals.set_select(DSRPath(None, rpout), select_opts, None, *select_files)
def restore_get_root(rpin):
"""Return (mirror root, index) and set the data dir
The idea here is to keep backing up on the path until we find
a directory that contains "rdiff-backup-data". That is the
mirror root. If the path from there starts
"rdiff-backup-data/increments*", then the index is the
remainder minus that. Otherwise the index is just the path
minus the root.
All this could fail if the increment file is pointed to in a
funny way, using symlinks or somesuch.
"""
global datadir
if rpin.isincfile(): relpath = rpin.getincbase().path
else: relpath = rpin.path
pathcomps = os.path.join(rpin.conn.os.getcwd(), relpath).split("/")
assert len(pathcomps) >= 2 # path should be relative to /
i = len(pathcomps)
while i >= 2:
parent_dir = RPath(rpin.conn, "/".join(pathcomps[:i]))
if (parent_dir.isdir() and
"rdiff-backup-data" in parent_dir.listdir()): break
i = i-1
else: Log.FatalError("Unable to find rdiff-backup-data directory")
rootrp = parent_dir
Log("Using mirror root directory %s" % rootrp.path, 6)
datadir = rootrp.append_path("rdiff-backup-data")
SetConnections.UpdateGlobal('rbdir', datadir)
if not datadir.isdir():
Log.FatalError("Unable to read rdiff-backup-data directory %s" %
datadir.path)
from_datadir = tuple(pathcomps[i:])
if not from_datadir or from_datadir[0] != "rdiff-backup-data":
return (rootrp, from_datadir) # in mirror, not increments
assert from_datadir[1] == "increments"
return (rootrp, from_datadir[2:])
def ListIncrements(rp):
"""Print out a summary of the increments and their times"""
mirror_root, index = restore_get_root(rp)
Globals.rbdir = datadir = \
mirror_root.append_path("rdiff-backup-data")
mirrorrp = mirror_root.new_index(index)
inc_rpath = datadir.append_path('increments', index)
incs = Restore.get_inclist(inc_rpath)
mirror_time = Restore.get_mirror_time()
if Globals.parsable_output:
print Manage.describe_incs_parsable(incs, mirror_time, mirrorrp)
else: print Manage.describe_incs_human(incs, mirror_time, mirrorrp)
def CalculateAverage(rps):
"""Print out the average of the given statistics files"""
statobjs = map(lambda rp: StatsObj().read_stats_from_rp(rp), rps)
average_stats = StatsObj().set_to_average(statobjs)
print average_stats.get_stats_logstring(
"Average of %d stat files" % len(rps))
def RemoveOlderThan(rootrp):
"""Remove all increment files older than a certain time"""
datadir = rootrp.append("rdiff-backup-data")
if not datadir.lstat() or not datadir.isdir():
Log.FatalError("Unable to open rdiff-backup-data dir %s" %
(datadir.path,))
try: time = Time.genstrtotime(remove_older_than_string)
except Time.TimeException, exc: Log.FatalError(str(exc))
timep = Time.timetopretty(time)
Log("Deleting increment(s) before %s" % timep, 4)
times_in_secs = map(lambda inc: Time.stringtotime(inc.getinctime()),
Restore.get_inclist(datadir.append("increments")))
times_in_secs = filter(lambda t: t < time, times_in_secs)
if not times_in_secs:
Log.FatalError("No increments older than %s found" % timep)
times_in_secs.sort()
inc_pretty_time = "\n".join(map(Time.timetopretty, times_in_secs))
if len(times_in_secs) > 1 and not force:
Log.FatalError("Found %d relevant increments, dated:\n%s"
"\nIf you want to delete multiple increments in this way, "
"use the --force." % (len(times_in_secs), inc_pretty_time))
if len(times_in_secs) == 1:
Log("Deleting increment at time:\n" + inc_pretty_time, 3)
else: Log("Deleting increments at times:\n" + inc_pretty_time, 3)
Manage.delete_earlier_than(datadir, time)
def ListChangedSince(rp):
"""List all the files under rp that have changed since restoretime"""
try: rest_time = Time.genstrtotime(restore_timestr)
except Time.TimeException, exc: Log.FatalError(str(exc))
mirror_root, index = restore_get_root(rp)
Globals.rbdir = datadir = mirror_root.append_path("rdiff-backup-data")
mirror_time = Restore.get_mirror_time()
def get_rids_recursive(rid):
"""Yield all the rids under rid that have inc newer than rest_time"""
yield rid
for sub_rid in Restore.yield_rids(rid, rest_time, mirror_time):
for sub_sub_rid in get_rids_recursive(sub_rid): yield sub_sub_rid
inc_rpath = datadir.append_path('increments', index)
inc_list = Restore.get_inclist(inc_rpath)
root_rid = RestoreIncrementData(index, inc_rpath, inc_list)
for rid in get_rids_recursive(root_rid):
if rid.inc_list: print "/".join(rid.index)
#!/usr/bin/env python
"""Read component files of rdiff-backup, and glue them together after
removing unnecessary bits."""
import os
def mystrip(filename):
"""Open filename, read input, strip appropriately, and return contents"""
fp = open(filename, "r")
lines = fp.readlines()
fp.close()
i = 0
while(lines[i][:60] !=
"############################################################"):
i = i+1
return "".join(lines[i:]).strip() + "\n\n\n"
files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py",
"iterfile.py", "rdiff.py", "connection.py", "rpath.py",
"hardlink.py", "robust.py", "rorpiter.py",
"destructive_stepping.py", "selection.py",
"filename_mapping.py", "statistics.py", "increment.py",
"restore.py", "manage.py", "highlevel.py",
"setconnections.py", "main.py"]
os.system("cp header.py rdiff-backup")
outfp = open("rdiff-backup", "a")
for file in files:
outfp.write(mystrip(file))
outfp.close()
os.system("chmod 755 rdiff-backup")
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Misc statistics methods, pertaining to dir and session stat files"""
from statistics import *
# This is the RPath of the directory statistics file, and the
# associated open file. It will hold a line of statistics for
# each directory that is backed up.
_dir_stats_rp = None
_dir_stats_fp = None
# This goes at the beginning of the directory statistics file and
# explains the format.
_dir_stats_header = """# rdiff-backup directory statistics file
#
# Each line is in the following format:
# RelativeDirName %s
""" % " ".join(StatsObj.stat_file_attrs)
def open_dir_stats_file():
"""Open directory statistics file, write header"""
global _dir_stats_fp, _dir_stats_rp
assert not _dir_stats_fp, "Directory file already open"
if Globals.compression: suffix = "data.gz"
else: suffix = "data"
_dir_stats_rp = Inc.get_inc(Globals.rbdir.append("directory_statistics"),
Time.curtime, suffix)
if _dir_stats_rp.lstat():
Log("Warning, statistics file %s already exists, appending" %
_dir_stats_rp.path, 2)
_dir_stats_fp = _dir_stats_rp.open("ab", Globals.compression)
else: _dir_stats_fp = _dir_stats_rp.open("wb", Globals.compression)
_dir_stats_fp.write(_dir_stats_header)
def write_dir_stats_line(statobj, index):
"""Write info from statobj about rpath to statistics file"""
if Globals.null_separator:
_dir_stats_fp.write(statobj.get_stats_line(index, None) + "\0")
else: _dir_stats_fp.write(statobj.get_stats_line(index) + "\n")
def close_dir_stats_file():
"""Close directory statistics file if its open"""
global _dir_stats_fp
if _dir_stats_fp:
_dir_stats_fp.close()
_dir_stats_fp = None
def write_session_statistics(statobj):
"""Write session statistics into file, log"""
stat_inc = Inc.get_inc(Globals.rbdir.append("session_statistics"),
Time.curtime, "data")
statobj.StartTime = Time.curtime
statobj.EndTime = time.time()
# include hardlink data and dir stats in size of increments
if Globals.preserve_hardlinks and Hardlink.final_inc:
# include hardlink data in size of increments
statobj.IncrementFiles += 1
statobj.IncrementFileSize += Hardlink.final_inc.getsize()
if _dir_stats_rp and _dir_stats_rp.lstat():
statobj.IncrementFiles += 1
statobj.IncrementFileSize += _dir_stats_rp.getsize()
statobj.write_stats_to_rp(stat_inc)
if Globals.print_statistics:
message = statobj.get_stats_logstring("Session statistics")
Log.log_to_file(message)
Globals.client_conn.sys.stdout.write(message)
from increment import *
import Hardlink
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Invoke rdiff utility to make signatures, deltas, or patch
All these operations should be done in a relatively safe manner using
RobustAction and the like.
"""
import os, librsync
class RdiffException(Exception): pass
def get_signature(rp):
"""Take signature of rpin file and return in file object"""
Log("Getting signature of %s" % rp.path, 7)
return librsync.SigFile(rp.open("rb"))
def get_delta_sigfileobj(sig_fileobj, rp_new):
"""Like get_delta but signature is in a file object"""
Log("Getting delta of %s with signature stream" % (rp_new.path,), 7)
return librsync.DeltaFile(sig_fileobj, rp_new.open("rb"))
def get_delta_sigrp(rp_signature, rp_new):
"""Take signature rp and new rp, return delta file object"""
Log("Getting delta of %s with signature %s" %
(rp_new.path, rp_signature.path), 7)
return librsync.DeltaFile(rp_signature.open("rb"), rp_new.open("rb"))
def write_delta_action(basis, new, delta, compress = None):
"""Return action writing delta which brings basis to new
If compress is true, the output of rdiff will be gzipped
before written to delta.
"""
delta_tf = TempFileManager.new(delta)
def init(): write_delta(basis, new, delta_tf, compress)
return Robust.make_tf_robustaction(init, delta_tf, delta)
def write_delta(basis, new, delta, compress = None):
"""Write rdiff delta which brings basis to new"""
Log("Writing delta %s from %s -> %s" %
(basis.path, new.path, delta.path), 7)
sigfile = librsync.SigFile(basis.open("rb"))
deltafile = librsync.DeltaFile(sigfile, new.open("rb"))
delta.write_from_fileobj(deltafile, compress)
def patch_action(rp_basis, rp_delta, rp_out = None, out_tf = None,
delta_compressed = None):
"""Return RobustAction which patches rp_basis with rp_delta
If rp_out is None, put output in rp_basis. Will use TempFile
out_tf it is specified. If delta_compressed is true, the
delta file will be decompressed before processing with rdiff.
"""
if not rp_out: rp_out = rp_basis
if not out_tf: out_tf = TempFileManager.new(rp_out)
def init():
rp_basis.conn.Rdiff.patch_local(rp_basis, rp_delta,
out_tf, delta_compressed)
out_tf.setdata()
return Robust.make_tf_robustaction(init, out_tf, rp_out)
def patch_local(rp_basis, rp_delta, outrp, delta_compressed = None):
"""Patch routine that must be run on rp_basis.conn
This is because librsync may need to seek() around in rp_basis,
and so needs a real file. Other rpaths can be remote.
"""
assert rp_basis.conn is Globals.local_connection
if delta_compressed: deltafile = rp_delta.open("rb", 1)
else: deltafile = rp_delta.open("rb")
sigfile = librsync.SigFile(rp_basis.open("rb"))
patchfile = librsync.PatchedFile(rp_basis.open("rb"), deltafile)
outrp.write_from_fileobj(patchfile)
def patch_with_attribs_action(rp_basis, rp_delta, rp_out = None):
"""Like patch_action, but also transfers attributs from rp_delta"""
if not rp_out: rp_out = rp_basis
tf = TempFileManager.new(rp_out)
return Robust.chain_nested(patch_action(rp_basis, rp_delta, rp_out, tf),
Robust.copy_attribs_action(rp_delta, tf))
def copy_action(rpin, rpout):
"""Use rdiff to copy rpin to rpout, conserving bandwidth"""
if not rpin.isreg() or not rpout.isreg() or rpin.conn is rpout.conn:
# rdiff not applicable, fallback to regular copying
return Robust.copy_action(rpin, rpout)
Log("Rdiff copying %s to %s" % (rpin.path, rpout.path), 6)
out_tf = TempFileManager.new(rpout)
def init(): rpout.conn.Rdiff.copy_local(rpin, rpout, out_tf)
return Robust.make_tf_robustaction(init, out_tf, rpout)
def copy_local(rpin, rpout, rpnew):
"""Write rpnew == rpin using rpout as basis. rpout and rpnew local"""
assert rpnew.conn is rpout.conn is Globals.local_connection
sigfile = librsync.SigFile(rpout.open("rb"))
deltafile = rpin.conn.librsync.DeltaFile(sigfile, rpin.open("rb"))
rpnew.write_from_fileobj(librsync.PatchedFile(rpout.open("rb"), deltafile))
from log import *
from robust import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Functions to make sure remote requests are kosher"""
import sys, tempfile
import Globals, Main
from rpath import *
class Violation(Exception):
"""Exception that indicates an improper request has been received"""
pass
# This will store the list of functions that will be honored from
# remote connections.
allowed_requests = None
# This stores the list of global variables that the client can not
# set on the server.
disallowed_server_globals = ["server", "security_level", "restrict_path"]
def initialize(action, cmdpairs):
"""Initialize allowable request list and chroot"""
global allowed_requests
set_security_level(action, cmdpairs)
set_allowed_requests(Globals.security_level)
def set_security_level(action, cmdpairs):
"""If running client, set security level and restrict_path
To find these settings, we must look at the action to see what is
supposed to happen, and then look at the cmdpairs to see what end
the client is on.
"""
def islocal(cmdpair): return not cmdpair[0]
def bothlocal(cp1, cp2): return islocal(cp1) and islocal(cp2)
def bothremote(cp1, cp2): return not islocal(cp1) and not islocal(cp2)
def getpath(cmdpair): return cmdpair[1]
if Globals.server: return
cp1 = cmdpairs[0]
if len(cmdpairs) > 1: cp2 = cmdpairs[1]
if action == "backup":
if bothlocal(cp1, cp2) or bothremote(cp1, cp2):
sec_level = "minimal"
rdir = tempfile.gettempdir()
elif islocal(cp1):
sec_level = "read-only"
rdir = getpath(cp1)
else:
assert islocal(cp2)
sec_level = "update-only"
rdir = getpath(cp2)
elif action == "restore" or action == "restore-as-of":
if len(cmdpairs) == 1 or bothlocal(cp1, cp2) or bothremote(cp1, cp2):
sec_level = "minimal"
rdir = tempfile.gettempdir()
elif islocal(cp1):
sec_level = "read-only"
rdir = Main.restore_get_root(RPath(Globals.local_connection,
getpath(cp1)))[0].path
else:
assert islocal(cp2)
sec_level = "all"
rdir = getpath(cp2)
elif action == "mirror":
if bothlocal(cp1, cp2) or bothremote(cp1, cp2):
sec_level = "minimal"
rdir = tempfile.gettempdir()
elif islocal(cp1):
sec_level = "read-only"
rdir = getpath(cp1)
else:
assert islocal(cp2)
sec_level = "all"
rdir = getpath(cp2)
elif (action == "test-server" or action == "list-increments" or
action == "list-changed-since" or action ==
"calculate-average" or action == "remove-older-than"):
sec_level = "minimal"
rdir = tempfile.gettempdir()
else: assert 0, "Unknown action %s" % action
Globals.security_level = sec_level
Globals.restrict_path = RPath(Globals.local_connection,
rdir).normalize().path
def set_allowed_requests(sec_level):
"""Set the allowed requests list using the security level"""
global allowed_requests
if sec_level == "all": return
allowed_requests = ["VirtualFile.readfromid", "VirtualFile.closebyid",
"Globals.get", "Globals.is_not_None",
"Globals.get_dict_val",
"Log.open_logfile_allconn",
"Log.close_logfile_allconn",
"SetConnections.add_redirected_conn",
"RedirectedRun",
"sys.stdout.write"]
if sec_level == "minimal": pass
elif sec_level == "read-only" or sec_level == "update-only":
allowed_requests.extend(["C.make_file_dict",
"os.getuid",
"os.listdir",
"Time.setcurtime_local",
"Resume.ResumeCheck",
"HLSourceStruct.split_initial_dsiter",
"HLSourceStruct.get_diffs_and_finalize",
"RPathStatic.gzip_open_local_read",
"RPathStatic.open_local_read"])
if sec_level == "update-only":
allowed_requests. \
extend(["Log.open_logfile_local", "Log.close_logfile_local",
"Log.close_logfile_allconn", "Log.log_to_file",
"SaveState.init_filenames",
"SaveState.touch_last_file",
"HLDestinationStruct.get_sigs",
"HLDestinationStruct.patch_w_datadir_writes",
"HLDestinationStruct.patch_and_finalize",
"HLDestinationStruct.patch_increment_and_finalize",
"Main.backup_touch_curmirror_local",
"Globals.ITRB.increment_stat"])
if Globals.server:
allowed_requests.extend(["SetConnections.init_connection_remote",
"Log.setverbosity",
"Log.setterm_verbosity",
"Time.setprevtime_local",
"FilenameMapping.set_init_quote_vals_local",
"Globals.postset_regexp_local",
"Globals.set_select",
"HLSourceStruct.set_session_info",
"HLDestinationStruct.set_session_info"])
def vet_request(request, arglist):
"""Examine request for security violations"""
#if Globals.server: sys.stderr.write(str(request) + "\n")
security_level = Globals.security_level
if Globals.restrict_path:
for arg in arglist:
if isinstance(arg, RPath): vet_rpath(arg)
if security_level == "all": return
if request.function_string in allowed_requests: return
if request.function_string == "Globals.set":
if Globals.server and arglist[0] not in disallowed_server_globals:
return
raise Violation("\nWarning Security Violation!\n"
"Bad request for function: %s\n"
"with arguments: %s\n" % (request.function_string,
arglist))
def vet_rpath(rpath):
"""Require rpath not to step outside retricted directory"""
if Globals.restrict_path and rpath.conn is Globals.local_connection:
normalized, restrict = rpath.normalize().path, Globals.restrict_path
components = normalized.split("/")
# 3 cases for restricted dir /usr/foo: /var, /usr/foobar, /usr/foo/..
if (not normalized.startswith(restrict) or
(len(normalized) > len(restrict) and
normalized[len(restrict)] != "/") or
".." in components):
raise Violation("\nWarning Security Violation!\n"
"Request to handle path %s\n"
"which doesn't appear to be within "
"restrict path %s.\n" % (normalized, restrict))
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Parse args and setup connections
The functions in this module are used once by Main to parse file
descriptions like bescoto@folly.stanford.edu:/usr/bin/ls and to set up
the related connections.
"""
# This is the schema that determines how rdiff-backup will open a
# pipe to the remote system. If the file is given as A::B, %s will
# be substituted with A in the schema.
__cmd_schema = 'ssh -C %s rdiff-backup --server'
__cmd_schema_no_compress = 'ssh %s rdiff-backup --server'
# This is a list of remote commands used to start the connections.
# The first is None because it is the local connection.
__conn_remote_cmds = [None]
class SetConnectionsException(Exception): pass
def get_cmd_pairs(arglist, remote_schema = None, remote_cmd = None):
"""Map the given file descriptions into command pairs
Command pairs are tuples cmdpair with length 2. cmdpair[0] is
None iff it describes a local path, and cmdpair[1] is the path.
"""
global __cmd_schema
if remote_schema: __cmd_schema = remote_schema
elif not Globals.ssh_compression: __cmd_schema = __cmd_schema_no_compress
if not arglist: return []
desc_pairs = map(parse_file_desc, arglist)
if filter(lambda x: x[0], desc_pairs): # True if any host_info found
if remote_cmd:
Log.FatalError("The --remote-cmd flag is not compatible "
"with remote file descriptions.")
elif remote_schema:
Log("Remote schema option ignored - no remote file "
"descriptions.", 2)
cmdpairs = map(desc2cmd_pairs, desc_pairs)
if remote_cmd: # last file description gets remote_cmd
cmd_pairs[-1] = (remote_cmd, cmd_pairs[-1][1])
return cmdpairs
def cmdpair2rp(cmd_pair):
"""Return normalized RPath from cmd_pair (remote_cmd, filename)"""
cmd, filename = cmd_pair
if cmd: conn = init_connection(cmd)
else: conn = Globals.local_connection
return RPath(conn, filename).normalize()
def desc2cmd_pairs(desc_pair):
"""Return pair (remote_cmd, filename) from desc_pair"""
host_info, filename = desc_pair
if not host_info: return (None, filename)
else: return (fill_schema(host_info), filename)
def parse_file_desc(file_desc):
"""Parse file description returning pair (host_info, filename)
In other words, bescoto@folly.stanford.edu::/usr/bin/ls =>
("bescoto@folly.stanford.edu", "/usr/bin/ls"). The
complication is to allow for quoting of : by a \. If the
string is not separated by :, then the host_info is None.
"""
def check_len(i):
if i >= len(file_desc):
raise SetConnectionsException(
"Unexpected end to file description %s" % file_desc)
host_info_list, i, last_was_quoted = [], 0, None
while 1:
if i == len(file_desc):
return (None, file_desc)
if file_desc[i] == '\\':
i = i+1
check_len(i)
last_was_quoted = 1
elif (file_desc[i] == ":" and i > 0 and file_desc[i-1] == ":"
and not last_was_quoted):
host_info_list.pop() # Remove last colon from name
break
else: last_was_quoted = None
host_info_list.append(file_desc[i])
i = i+1
check_len(i+1)
return ("".join(host_info_list), file_desc[i+1:])
def fill_schema(host_info):
"""Fills host_info into the schema and returns remote command"""
return __cmd_schema % host_info
def init_connection(remote_cmd):
"""Run remote_cmd, register connection, and then return it
If remote_cmd is None, then the local connection will be
returned. This also updates some settings on the remote side,
like global settings, its connection number, and verbosity.
"""
if not remote_cmd: return Globals.local_connection
Log("Executing " + remote_cmd, 4)
stdin, stdout = os.popen2(remote_cmd)
conn_number = len(Globals.connections)
conn = PipeConnection(stdout, stdin, conn_number)
check_connection_version(conn, remote_cmd)
Log("Registering connection %d" % conn_number, 7)
init_connection_routing(conn, conn_number, remote_cmd)
init_connection_settings(conn)
return conn
def check_connection_version(conn, remote_cmd):
"""Log warning if connection has different version"""
try: remote_version = conn.Globals.get('version')
except ConnectionReadError, exception:
Log.FatalError("""%s
Couldn't start up the remote connection by executing
%s
Remember that, under the default settings, rdiff-backup must be
installed in the PATH on the remote system. See the man page for more
information.""" % (exception, remote_cmd))
if remote_version != Globals.version:
Log("Warning: Local version %s does not match remote version %s."
% (Globals.version, remote_version), 2)
def init_connection_routing(conn, conn_number, remote_cmd):
"""Called by init_connection, establish routing, conn dict"""
Globals.connection_dict[conn_number] = conn
conn.SetConnections.init_connection_remote(conn_number)
for other_remote_conn in Globals.connections[1:]:
conn.SetConnections.add_redirected_conn(
other_remote_conn.conn_number)
other_remote_conn.SetConnections.add_redirected_conn(conn_number)
Globals.connections.append(conn)
__conn_remote_cmds.append(remote_cmd)
def init_connection_settings(conn):
"""Tell new conn about log settings and updated globals"""
conn.Log.setverbosity(Log.verbosity)
conn.Log.setterm_verbosity(Log.term_verbosity)
for setting_name in Globals.changed_settings:
conn.Globals.set(setting_name, Globals.get(setting_name))
FilenameMapping.set_init_quote_vals()
def init_connection_remote(conn_number):
"""Run on server side to tell self that have given conn_number"""
Globals.connection_number = conn_number
Globals.local_connection.conn_number = conn_number
Globals.connection_dict[0] = Globals.connections[1]
Globals.connection_dict[conn_number] = Globals.local_connection
def add_redirected_conn(conn_number):
"""Run on server side - tell about redirected connection"""
Globals.connection_dict[conn_number] = \
RedirectedConnection(conn_number)
def UpdateGlobal(setting_name, val):
"""Update value of global variable across all connections"""
for conn in Globals.connections:
conn.Globals.set(setting_name, val)
def BackupInitConnections(reading_conn, writing_conn):
"""Backup specific connection initialization"""
reading_conn.Globals.set("isbackup_reader", 1)
writing_conn.Globals.set("isbackup_writer", 1)
UpdateGlobal("backup_reader", reading_conn)
UpdateGlobal("backup_writer", writing_conn)
if (Globals.change_source_perms and
reading_conn.Globals.get("process_uid") == 0):
Log("Warning: --change_source_perms should usually not be used when\n"
"the reading connection is running as root, because root can\n"
"read all files regardless of their permissions.", 2)
def CloseConnections():
"""Close all connections. Run by client"""
assert not Globals.server
for conn in Globals.connections: conn.quit()
del Globals.connections[1:] # Only leave local connection
Globals.connection_dict = {0: Globals.local_connection}
Globals.backup_reader = Globals.isbackup_reader = \
Globals.backup_writer = Globals.isbackup_writer = None
def TestConnections():
"""Test connections, printing results"""
if len(Globals.connections) == 1: print "No remote connections specified"
else:
for i in range(1, len(Globals.connections)): test_connection(i)
def test_connection(conn_number):
"""Test connection. conn_number 0 is the local connection"""
print "Testing server started by: ", __conn_remote_cmds[conn_number]
conn = Globals.connections[conn_number]
try:
assert conn.pow(2,3) == 8
assert conn.os.path.join("a", "b") == "a/b"
version = conn.reval("lambda: Globals.version")
except:
sys.stderr.write("Server tests failed\n")
raise
if not version == Globals.version:
print """Server may work, but there is a version mismatch:
Local version: %s
Remote version: %s""" % (Globals.version, version)
else: print "Server OK"
from log import *
from rpath import *
from connection import *
import Globals, FilenameMapping
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Provide time related exceptions and functions"""
import time, types, re
import Globals
class TimeException(Exception): pass
_interval_conv_dict = {"s": 1, "m": 60, "h": 3600, "D": 86400,
"W": 7*86400, "M": 30*86400, "Y": 365*86400}
_integer_regexp = re.compile("^[0-9]+$")
_interval_regexp = re.compile("^([0-9]+)([smhDWMY])")
_genstr_date_regexp1 = re.compile("^(?P<year>[0-9]{4})[-/]"
"(?P<month>[0-9]{1,2})[-/](?P<day>[0-9]{1,2})$")
_genstr_date_regexp2 = re.compile("^(?P<month>[0-9]{1,2})[-/]"
"(?P<day>[0-9]{1,2})[-/](?P<year>[0-9]{4})$")
curtime = curtimestr = None
been_awake_since = None # stores last time sleep() was run
def setcurtime(curtime = None):
"""Sets the current time in curtime and curtimestr on all systems"""
t = curtime or time.time()
for conn in Globals.connections:
conn.Time.setcurtime_local(t)
def setcurtime_local(timeinseconds):
"""Only set the current time locally"""
global curtime, curtimestr
curtime, curtimestr = timeinseconds, timetostring(timeinseconds)
def setprevtime(timeinseconds):
"""Sets the previous inc time in prevtime and prevtimestr"""
assert timeinseconds > 0, timeinseconds
timestr = timetostring(timeinseconds)
for conn in Globals.connections:
conn.Time.setprevtime_local(timeinseconds, timestr)
def setprevtime_local(timeinseconds, timestr):
"""Like setprevtime but only set the local version"""
global prevtime, prevtimestr
prevtime, prevtimestr = timeinseconds, timestr
def timetostring(timeinseconds):
"""Return w3 datetime compliant listing of timeinseconds"""
return time.strftime("%Y-%m-%dT%H" + Globals.time_separator +
"%M" + Globals.time_separator + "%S",
time.localtime(timeinseconds)) + gettzd()
def stringtotime(timestring):
"""Return time in seconds from w3 timestring
If there is an error parsing the string, or it doesn't look
like a w3 datetime string, return None.
"""
try:
date, daytime = timestring[:19].split("T")
year, month, day = map(int, date.split("-"))
hour, minute, second = map(int,
daytime.split(Globals.time_separator))
assert 1900 < year < 2100, year
assert 1 <= month <= 12
assert 1 <= day <= 31
assert 0 <= hour <= 23
assert 0 <= minute <= 59
assert 0 <= second <= 61 # leap seconds
timetuple = (year, month, day, hour, minute, second, -1, -1, -1)
if time.daylight:
utc_in_secs = time.mktime(timetuple) - time.altzone
else: utc_in_secs = time.mktime(timetuple) - time.timezone
return long(utc_in_secs) + tzdtoseconds(timestring[19:])
except (TypeError, ValueError, AssertionError): return None
def timetopretty(timeinseconds):
"""Return pretty version of time"""
return time.asctime(time.localtime(timeinseconds))
def stringtopretty(timestring):
"""Return pretty version of time given w3 time string"""
return timetopretty(stringtotime(timestring))
def inttopretty(seconds):
"""Convert num of seconds to readable string like "2 hours"."""
partlist = []
hours, seconds = divmod(seconds, 3600)
if hours > 1: partlist.append("%d hours" % hours)
elif hours == 1: partlist.append("1 hour")
minutes, seconds = divmod(seconds, 60)
if minutes > 1: partlist.append("%d minutes" % minutes)
elif minutes == 1: partlist.append("1 minute")
if seconds == 1: partlist.append("1 second")
elif not partlist or seconds > 1:
if isinstance(seconds, int) or isinstance(seconds, long):
partlist.append("%s seconds" % seconds)
else: partlist.append("%.2f seconds" % seconds)
return " ".join(partlist)
def intstringtoseconds(interval_string):
"""Convert a string expressing an interval (e.g. "4D2s") to seconds"""
def error():
raise TimeException("""Bad interval string "%s"
Intervals are specified like 2Y (2 years) or 2h30m (2.5 hours). The
allowed special characters are s, m, h, D, W, M, and Y. See the man
page for more information.
""" % interval_string)
if len(interval_string) < 2: error()
total = 0
while interval_string:
match = _interval_regexp.match(interval_string)
if not match: error()
num, ext = int(match.group(1)), match.group(2)
if not ext in _interval_conv_dict or num < 0: error()
total += num*_interval_conv_dict[ext]
interval_string = interval_string[match.end(0):]
return total
def gettzd():
"""Return w3's timezone identification string.
Expresed as [+/-]hh:mm. For instance, PST is -08:00. Zone is
coincides with what localtime(), etc., use.
"""
if time.daylight: offset = -1 * time.altzone/60
else: offset = -1 * time.timezone/60
if offset > 0: prefix = "+"
elif offset < 0: prefix = "-"
else: return "Z" # time is already in UTC
hours, minutes = map(abs, divmod(offset, 60))
assert 0 <= hours <= 23
assert 0 <= minutes <= 59
return "%s%02d%s%02d" % (prefix, hours,
Globals.time_separator, minutes)
def tzdtoseconds(tzd):
"""Given w3 compliant TZD, return how far ahead UTC is"""
if tzd == "Z": return 0
assert len(tzd) == 6 # only accept forms like +08:00 for now
assert (tzd[0] == "-" or tzd[0] == "+") and \
tzd[3] == Globals.time_separator
return -60 * (60 * int(tzd[:3]) + int(tzd[4:]))
def cmp(time1, time2):
"""Compare time1 and time2 and return -1, 0, or 1"""
if type(time1) is types.StringType:
time1 = stringtotime(time1)
assert time1 is not None
if type(time2) is types.StringType:
time2 = stringtotime(time2)
assert time2 is not None
if time1 < time2: return -1
elif time1 == time2: return 0
else: return 1
def sleep(sleep_ratio):
"""Sleep for period to maintain given sleep_ratio
On my system sleeping for periods less than 1/20th of a second
doesn't seem to work very accurately, so accumulate at least that
much time before sleeping.
"""
global been_awake_since
if been_awake_since is None: # first running
been_awake_since = time.time()
else:
elapsed_time = time.time() - been_awake_since
sleep_time = elapsed_time * (sleep_ratio/(1-sleep_ratio))
if sleep_time >= 0.05:
time.sleep(sleep_time)
been_awake_since = time.time()
def genstrtotime(timestr, curtime = None):
"""Convert a generic time string to a time in seconds"""
if curtime is None: curtime = globals()['curtime']
if timestr == "now": return curtime
def error():
raise TimeException("""Bad time string "%s"
The acceptible time strings are intervals (like "3D64s"), w3-datetime
strings, like "2002-04-26T04:22:01-07:00" (strings like
"2002-04-26T04:22:01" are also acceptable - rdiff-backup will use the
current time zone), or ordinary dates like 2/4/1997 or 2001-04-23
(various combinations are acceptable, but the month always precedes
the day).""" % timestr)
# Test for straight integer
if _integer_regexp.search(timestr): return int(timestr)
# Test for w3-datetime format, possibly missing tzd
t = stringtotime(timestr) or stringtotime(timestr+gettzd())
if t: return t
try: # test for an interval, like "2 days ago"
return curtime - intstringtoseconds(timestr)
except TimeException: pass
# Now check for dates like 2001/3/23
match = _genstr_date_regexp1.search(timestr) or \
_genstr_date_regexp2.search(timestr)
if not match: error()
timestr = "%s-%02d-%02dT00:00:00%s" % (match.group('year'),
int(match.group('month')), int(match.group('day')), gettzd())
t = stringtotime(timestr)
if t: return t
else: error()
/* ----------------------------------------------------------------------- *
*
* Copyright 2002 Ben Escoto
*
* This file is part of rdiff-backup.
*
* rdiff-backup is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, Inc., 675 Mass Ave,
* Cambridge MA 02139, USA; either version 2 of the License, or (at
* your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#include <Python.h>
#include <rsync.h>
#define RS_JOB_BLOCKSIZE 65536
static PyObject *librsyncError;
/* Sets python error string from result */
static void
_librsync_seterror(rs_result result, char *location)
{
char error_string[200];
sprintf(error_string, "librsync error %d while in %s", result, location);
PyErr_SetString(librsyncError, error_string);
}
/* --------------- SigMaker Object for incremental signatures */
staticforward PyTypeObject _librsync_SigMakerType;
typedef struct {
PyObject_HEAD
PyObject *x_attr;
rs_job_t *sig_job;
} _librsync_SigMakerObject;
static PyObject*
_librsync_new_sigmaker(PyObject* self, PyObject* args)
{
_librsync_SigMakerObject* sm;
if (!PyArg_ParseTuple(args,":new_sigmaker"))
return NULL;
sm = PyObject_New(_librsync_SigMakerObject, &_librsync_SigMakerType);
if (sm == NULL) return NULL;
sm->x_attr = NULL;
sm->sig_job = rs_sig_begin((size_t)RS_DEFAULT_BLOCK_LEN,
(size_t)RS_DEFAULT_STRONG_LEN);
return (PyObject*)sm;
}
static void
_librsync_sigmaker_dealloc(PyObject* self)
{
rs_job_free(((_librsync_SigMakerObject *)self)->sig_job);
PyObject_Del(self);
}
/* Take an input string, and generate a signature from it. The output
will be a triple (done, bytes_used, signature_string), where done
is true iff there is no more data coming and bytes_used is the
number of bytes of the input string processed.
*/
static PyObject *
_librsync_sigmaker_cycle(_librsync_SigMakerObject *self, PyObject *args)
{
char *inbuf, outbuf[RS_JOB_BLOCKSIZE];
long inbuf_length;
rs_buffers_t buf;
rs_result result;
if (!PyArg_ParseTuple(args, "s#:cycle", &inbuf, &inbuf_length))
return NULL;
buf.next_in = inbuf;
buf.avail_in = (size_t)inbuf_length;
buf.next_out = outbuf;
buf.avail_out = (size_t)RS_JOB_BLOCKSIZE;
buf.eof_in = (inbuf_length == 0);
result = rs_job_iter(self->sig_job, &buf);
if (result != RS_DONE && result != RS_BLOCKED) {
_librsync_seterror(result, "signature cycle");
return NULL;
}
return Py_BuildValue("(ils#)", (result == RS_DONE),
inbuf_length - (long)buf.avail_in,
outbuf, RS_JOB_BLOCKSIZE - (long)buf.avail_out);
}
static PyMethodDef _librsync_sigmaker_methods[] = {
{"cycle", (PyCFunction)_librsync_sigmaker_cycle, METH_VARARGS},
{NULL, NULL, 0, NULL} /* sentinel */
};
static PyObject *
_librsync_sigmaker_getattr(_librsync_SigMakerObject *sm,
char *name)
{
if (sm->x_attr != NULL) {
PyObject *v = PyDict_GetItemString(sm->x_attr, name);
if (v != NULL) {
Py_INCREF(v);
return v;
}
}
return Py_FindMethod(_librsync_sigmaker_methods, (PyObject *)sm, name);
}
static int
_librsync_sigmaker_setattr(_librsync_SigMakerObject *sm,
char *name, PyObject *v)
{
if (sm->x_attr == NULL) {
sm->x_attr = PyDict_New();
if (sm->x_attr == NULL) return -1;
}
if (v == NULL) {
int rv = PyDict_DelItemString(sm->x_attr, name);
if (rv < 0)
PyErr_SetString(PyExc_AttributeError,
"delete non-existing sigmaker attribute");
return rv;
}
else return PyDict_SetItemString(sm->x_attr, name, v);
}
static PyTypeObject _librsync_SigMakerType = {
PyObject_HEAD_INIT(NULL)
0,
"sigmaker",
sizeof(_librsync_SigMakerObject),
0,
_librsync_sigmaker_dealloc, /*tp_dealloc*/
0, /*tp_print*/
(getattrfunc)_librsync_sigmaker_getattr, /*tp_getattr*/
(setattrfunc)_librsync_sigmaker_setattr, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
};
/* --------------- DeltaMaker Object for incremental deltas */
staticforward PyTypeObject _librsync_DeltaMakerType;
typedef struct {
PyObject_HEAD
PyObject *x_attr;
rs_job_t *delta_job;
rs_signature_t *sig_ptr;
} _librsync_DeltaMakerObject;
/* Call with the entire signature loaded into one big string */
static PyObject*
_librsync_new_deltamaker(PyObject* self, PyObject* args)
{
_librsync_DeltaMakerObject* dm;
char *sig_string, outbuf[RS_JOB_BLOCKSIZE];
long sig_length;
rs_job_t *sig_loader;
rs_signature_t *sig_ptr;
rs_buffers_t buf;
rs_result result;
if (!PyArg_ParseTuple(args,"s#:new_deltamaker", &sig_string, &sig_length))
return NULL;
dm = PyObject_New(_librsync_DeltaMakerObject, &_librsync_DeltaMakerType);
if (dm == NULL) return NULL;
dm->x_attr = NULL;
/* Put signature at sig_ptr and build hash */
sig_loader = rs_loadsig_begin(&sig_ptr);
buf.next_in = sig_string;
buf.avail_in = (size_t)sig_length;
buf.next_out = outbuf;
buf.avail_out = (size_t)RS_JOB_BLOCKSIZE;
buf.eof_in = 1;
result = rs_job_iter(sig_loader, &buf);
rs_job_free(sig_loader);
if (result != RS_DONE) {
_librsync_seterror(result, "delta rs_signature_t builder");
return NULL;
}
if ((result = rs_build_hash_table(sig_ptr)) != RS_DONE) {
_librsync_seterror(result, "delta rs_build_hash_table");
return NULL;
}
dm->sig_ptr = sig_ptr;
dm->delta_job = rs_delta_begin(sig_ptr);
return (PyObject*)dm;
}
static void
_librsync_deltamaker_dealloc(PyObject* self)
{
_librsync_DeltaMakerObject *dm = (_librsync_DeltaMakerObject *)self;
rs_signature_t *sig_ptr = dm->sig_ptr;
rs_free_sumset(sig_ptr);
rs_job_free(dm->delta_job);
PyObject_Del(self);
}
/* Take a chunk of the new file in an input string, and return a
triple (done bytes_used, delta_string), where done is true iff no
more data is coming and bytes_used is the number of bytes of the
input string processed.
*/
static PyObject *
_librsync_deltamaker_cycle(_librsync_DeltaMakerObject *self, PyObject *args)
{
char *inbuf, outbuf[RS_JOB_BLOCKSIZE];
long inbuf_length;
rs_buffers_t buf;
rs_result result;
if (!PyArg_ParseTuple(args, "s#:cycle", &inbuf, &inbuf_length))
return NULL;
buf.next_in = inbuf;
buf.avail_in = (size_t)inbuf_length;
buf.next_out = outbuf;
buf.avail_out = (size_t)RS_JOB_BLOCKSIZE;
buf.eof_in = (inbuf_length == 0);
result = rs_job_iter(self->delta_job, &buf);
if (result != RS_DONE && result != RS_BLOCKED) {
_librsync_seterror(result, "delta cycle");
return NULL;
}
return Py_BuildValue("(ils#)", (result == RS_DONE),
inbuf_length - (long)buf.avail_in,
outbuf, RS_JOB_BLOCKSIZE - (long)buf.avail_out);
}
static PyMethodDef _librsync_deltamaker_methods[] = {
{"cycle", (PyCFunction)_librsync_deltamaker_cycle, METH_VARARGS},
{NULL, NULL, 0, NULL} /* sentinel */
};
static PyObject *
_librsync_deltamaker_getattr(_librsync_DeltaMakerObject *dm, char *name)
{
if (dm->x_attr != NULL) {
PyObject *v = PyDict_GetItemString(dm->x_attr, name);
if (v != NULL) {
Py_INCREF(v);
return v;
}
}
return Py_FindMethod(_librsync_deltamaker_methods, (PyObject *)dm, name);
}
static int
_librsync_deltamaker_setattr(_librsync_DeltaMakerObject *dm,
char *name, PyObject *v)
{
if (dm->x_attr == NULL) {
dm->x_attr = PyDict_New();
if (dm->x_attr == NULL) return -1;
}
if (v == NULL) {
int rv = PyDict_DelItemString(dm->x_attr, name);
if (rv < 0)
PyErr_SetString(PyExc_AttributeError,
"delete non-existing deltamaker attribute");
return rv;
}
else return PyDict_SetItemString(dm->x_attr, name, v);
}
static PyTypeObject _librsync_DeltaMakerType = {
PyObject_HEAD_INIT(NULL)
0,
"deltamaker",
sizeof(_librsync_DeltaMakerObject),
0,
_librsync_deltamaker_dealloc, /*tp_dealloc*/
0, /*tp_print*/
(getattrfunc)_librsync_deltamaker_getattr, /*tp_getattr*/
(setattrfunc)_librsync_deltamaker_setattr, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
};
/* --------------- PatchMaker Object for incremental patching */
staticforward PyTypeObject _librsync_PatchMakerType;
typedef struct {
PyObject_HEAD
PyObject *x_attr;
rs_job_t *patch_job;
PyObject *basis_file;
} _librsync_PatchMakerObject;
/* Call with the basis file */
static PyObject*
_librsync_new_patchmaker(PyObject* self, PyObject* args)
{
_librsync_PatchMakerObject* pm;
PyObject *python_file;
FILE *cfile;
if (!PyArg_ParseTuple(args, "O:new_patchmaker", &python_file))
return NULL;
if (!PyFile_Check(python_file)) {
PyErr_SetString(PyExc_TypeError, "Need true file object");
return NULL;
}
Py_INCREF(python_file);
pm = PyObject_New(_librsync_PatchMakerObject, &_librsync_PatchMakerType);
if (pm == NULL) return NULL;
pm->x_attr = NULL;
pm->basis_file = python_file;
cfile = PyFile_AsFile(python_file);
pm->patch_job = rs_patch_begin(rs_file_copy_cb, cfile);
return (PyObject*)pm;
}
static void
_librsync_patchmaker_dealloc(PyObject* self)
{
_librsync_PatchMakerObject *pm = (_librsync_PatchMakerObject *)self;
Py_DECREF(pm->basis_file);
rs_job_free(pm->patch_job);
PyObject_Del(self);
}
/* Take a chunk of the delta file in an input string, and return a
triple (done, bytes_used, patched_string), where done is true iff
there is no more data coming out and bytes_used is the number of
bytes of the input string processed.
*/
static PyObject *
_librsync_patchmaker_cycle(_librsync_PatchMakerObject *self, PyObject *args)
{
char *inbuf, outbuf[RS_JOB_BLOCKSIZE];
long inbuf_length;
rs_buffers_t buf;
rs_result result;
if (!PyArg_ParseTuple(args, "s#:cycle", &inbuf, &inbuf_length))
return NULL;
buf.next_in = inbuf;
buf.avail_in = (size_t)inbuf_length;
buf.next_out = outbuf;
buf.avail_out = (size_t)RS_JOB_BLOCKSIZE;
buf.eof_in = (inbuf_length == 0);
result = rs_job_iter(self->patch_job, &buf);
if (result != RS_DONE && result != RS_BLOCKED) {
_librsync_seterror(result, "patch cycle");
return NULL;
}
return Py_BuildValue("(ils#)", (result == RS_DONE),
inbuf_length - (long)buf.avail_in,
outbuf, RS_JOB_BLOCKSIZE - (long)buf.avail_out);
}
static PyMethodDef _librsync_patchmaker_methods[] = {
{"cycle", (PyCFunction)_librsync_patchmaker_cycle, METH_VARARGS},
{NULL, NULL, 0, NULL} /* sentinel */
};
static PyObject *
_librsync_patchmaker_getattr(_librsync_PatchMakerObject *pm, char *name)
{
if (pm->x_attr != NULL) {
PyObject *v = PyDict_GetItemString(pm->x_attr, name);
if (v != NULL) {
Py_INCREF(v);
return v;
}
}
return Py_FindMethod(_librsync_patchmaker_methods, (PyObject *)pm, name);
}
static int
_librsync_patchmaker_setattr(_librsync_PatchMakerObject *pm,
char *name, PyObject *v)
{
if (pm->x_attr == NULL) {
pm->x_attr = PyDict_New();
if (pm->x_attr == NULL) return -1;
}
if (v == NULL) {
int rv = PyDict_DelItemString(pm->x_attr, name);
if (rv < 0)
PyErr_SetString(PyExc_AttributeError,
"delete non-existing patchmaker attribute");
return rv;
}
else return PyDict_SetItemString(pm->x_attr, name, v);
}
static PyTypeObject _librsync_PatchMakerType = {
PyObject_HEAD_INIT(NULL)
0,
"patchmaker",
sizeof(_librsync_PatchMakerObject),
0,
_librsync_patchmaker_dealloc, /*tp_dealloc*/
0, /*tp_print*/
(getattrfunc)_librsync_patchmaker_getattr, /*tp_getattr*/
(setattrfunc)_librsync_patchmaker_setattr, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
};
/* --------------- _librsync module definition */
static PyMethodDef _librsyncMethods[] = {
{"new_sigmaker", _librsync_new_sigmaker, METH_VARARGS,
"Return a sigmaker object, for finding the signature of an object"},
{"new_deltamaker", _librsync_new_deltamaker, METH_VARARGS,
"Return a deltamaker object, for computing deltas"},
{"new_patchmaker", _librsync_new_patchmaker, METH_VARARGS,
"Return a patchmaker object, for patching basis files"},
{NULL, NULL, 0, NULL}
};
void init_librsync(void)
{
PyObject *m, *d;
_librsync_SigMakerType.ob_type = &PyType_Type;
_librsync_DeltaMakerType.ob_type = &PyType_Type;
m = Py_InitModule("_librsync", _librsyncMethods);
d = PyModule_GetDict(m);
librsyncError = PyErr_NewException("_librsync.librsyncError", NULL, NULL);
PyDict_SetItemString(d, "librsyncError", librsyncError);
PyDict_SetItemString(d, "RS_JOB_BLOCKSIZE",
Py_BuildValue("l", (long)RS_JOB_BLOCKSIZE));
PyDict_SetItemString(d, "RS_DEFAULT_BLOCK_LEN",
Py_BuildValue("l", (long)RS_DEFAULT_BLOCK_LEN));
}
/* ----------------------------------------------------------------------- *
*
* Copyright 2002 Ben Escoto
*
* This file is part of rdiff-backup.
*
* rdiff-backup is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation, Inc., 675 Mass Ave,
* Cambridge MA 02139, USA; either version 2 of the License, or (at
* your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
#include <Python.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>
/* choose the appropriate stat and fstat functions and return structs */
/* This code taken from Python's posixmodule.c */
#undef STAT
#if defined(MS_WIN64) || defined(MS_WIN32)
# define STAT _stati64
# define FSTAT _fstati64
# define STRUCT_STAT struct _stati64
#else
# define STAT stat
# define FSTAT fstat
# define STRUCT_STAT struct stat
#endif
static PyObject *UnknownFileTypeError;
static PyObject *c_make_file_dict(PyObject *self, PyObject *args);
static PyObject *long2str(PyObject *self, PyObject *args);
static PyObject *str2long(PyObject *self, PyObject *args);
/* Turn a stat structure into a python dictionary. The preprocessor
stuff taken from Python's posixmodule.c */
static PyObject *c_make_file_dict(self, args)
PyObject *self;
PyObject *args;
{
PyObject *size, *inode, *mtime, *atime, *devloc, *return_val;
char *filename, filetype[5];
STRUCT_STAT sbuf;
long int mode, perms;
int res;
if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
Py_BEGIN_ALLOW_THREADS
res = lstat(filename, &sbuf);
Py_END_ALLOW_THREADS
if (res != 0) {
if (errno == ENOENT || errno == ENOTDIR)
return Py_BuildValue("{s:s}", "type", NULL);
else {
PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
return NULL;
}
}
#ifdef HAVE_LARGEFILE_SUPPORT
size = PyLong_FromLongLong((LONG_LONG)sbuf.st_size);
inode = PyLong_FromLongLong((LONG_LONG)sbuf.st_ino);
#else
size = PyInt_FromLong(sbuf.st_size);
inode = PyInt_FromLong((long)sbuf.st_ino);
#endif
mode = (long)sbuf.st_mode;
perms = mode & 07777;
#if defined(HAVE_LONG_LONG) && !defined(MS_WINDOWS)
devloc = PyLong_FromLongLong((LONG_LONG)sbuf.st_dev);
#else
devloc = PyInt_FromLong((long)sbuf.st_dev);
#endif
#if SIZEOF_TIME_T > SIZEOF_LONG
mtime = PyLong_FromLongLong((LONG_LONG)sbuf.st_mtime);
atime = PyLong_FromLongLong((LONG_LONG)sbuf.st_atime);
#else
mtime = PyInt_FromLong((long)sbuf.st_mtime);
atime = PyInt_FromLong((long)sbuf.st_atime);
#endif
/* Build return dictionary from stat struct */
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
/* Regular files, directories, sockets, and fifos */
if S_ISREG(mode) strcpy(filetype, "reg");
else if S_ISDIR(mode) strcpy(filetype, "dir");
else if S_ISSOCK(mode) strcpy(filetype, "sock");
else strcpy(filetype, "fifo");
return_val = Py_BuildValue("{s:s,s:O,s:l,s:l,s:l,s:O,s:O,s:l,s:O,s:O}",
"type", filetype,
"size", size,
"perms", perms,
"uid", (long)sbuf.st_uid,
"gid", (long)sbuf.st_gid,
"inode", inode,
"devloc", devloc,
"nlink", (long)sbuf.st_nlink,
"mtime", mtime,
"atime", atime);
} else if S_ISLNK(mode) {
/* Symbolic links */
char linkname[1024];
int len_link = readlink(filename, linkname, 1023);
if (len_link < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return_val = NULL;
} else {
linkname[len_link] = '\0';
return_val = Py_BuildValue("{s:s,s:O,s:l,s:l,s:l,s:O,s:O,s:l,s:s}",
"type", "sym",
"size", size,
"perms", perms,
"uid", (long)sbuf.st_uid,
"gid", (long)sbuf.st_gid,
"inode", inode,
"devloc", devloc,
"nlink", (long)sbuf.st_nlink,
"linkname", linkname);
}
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
/* Device files */
char devtype[2];
#if defined(HAVE_LONG_LONG) && !defined(MS_WINDOWS)
LONG_LONG devnums = (LONG_LONG)sbuf.st_rdev;
PyObject *major_num = PyLong_FromLongLong(major(devnums));
#else
long int devnums = (long)sbuf.st_dev;
PyObject *major_num = PyInt_FromLong(devnums >> 8);
#endif
int minor_num = (int)(minor(devnums));
if S_ISCHR(mode) strcpy(devtype, "c");
else strcpy(devtype, "b");
return_val = Py_BuildValue("{s:s,s:O,s:l,s:l,s:l,s:O,s:O,s:l,s:N}",
"type", "dev",
"size", size,
"perms", perms,
"uid", (long)sbuf.st_uid,
"gid", (long)sbuf.st_gid,
"inode", inode,
"devloc", devloc,
"nlink", (long)sbuf.st_nlink,
"devnums", Py_BuildValue("(s,O,i)", devtype,
major_num, minor_num));
Py_DECREF(major_num);
} else {
/* Unrecognized file type - raise exception */
PyErr_SetString(UnknownFileTypeError, filename);
return_val = NULL;
}
Py_DECREF(size);
Py_DECREF(inode);
Py_DECREF(devloc);
Py_DECREF(mtime);
Py_DECREF(atime);
return return_val;
}
/* Convert python long into 7 byte string */
static PyObject *long2str(self, args)
PyObject *self;
PyObject *args;
{
unsigned char s[7];
PyLongObject *pylong;
PyObject *return_val;
if (!PyArg_ParseTuple(args, "O!", &PyLong_Type, &pylong)) return NULL;
if (_PyLong_AsByteArray(pylong, s, 7, 0, 0) != 0) return NULL;
else return Py_BuildValue("s#", s, 7);
return return_val;
}
/* Reverse of above; convert 7 byte string into python long */
static PyObject *str2long(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *s;
int ssize;
if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
if (ssize != 7) {
PyErr_SetString(PyExc_TypeError, "Single argument must be 7 char string");
return NULL;
}
return _PyLong_FromByteArray(s, 7, 0, 0);
}
static PyMethodDef CMethods[] = {
{"make_file_dict", c_make_file_dict, METH_VARARGS,
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS, "Convert python long to 7 byte string"},
{"str2long", str2long, METH_VARARGS, "Convert 7 byte string to python long"},
{NULL, NULL, 0, NULL}
};
void initC(void)
{
PyObject *m, *d;
m = Py_InitModule("C", CMethods);
d = PyModule_GetDict(m);
UnknownFileTypeError = PyErr_NewException("C.UnknownFileTypeError",
NULL, NULL);
PyDict_SetItemString(d, "UnknownFileTypeError", UnknownFileTypeError);
}
#!/usr/bin/env python
import sys, os
from distutils.core import setup, Extension
assert len(sys.argv) == 1
sys.argv.append("build")
setup(name="CModule",
version="0.9.0",
description="rdiff-backup's C component",
ext_modules=[Extension("C", ["cmodule.c"]),
Extension("_librsync", ["_librsyncmodule.c"],
libraries=["rsync"])])
assert not os.system("mv build/lib.linux-i686-2.2/C.so .")
assert not os.system("mv build/lib.linux-i686-2.2/_librsync.so .")
assert not os.system("rm -rf build")
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Support code for remote execution and data transfer"""
from __future__ import generators
import types, os, tempfile, cPickle, shutil, traceback, pickle, socket
class ConnectionError(Exception): pass
class ConnectionReadError(ConnectionError): pass
class ConnectionQuit(Exception): pass
class Connection:
"""Connection class - represent remote execution
The idea is that, if c is an instance of this class, c.foo will
return the object on the remote side. For functions, c.foo will
return a function that, when called, executes foo on the remote
side, sending over the arguments and sending back the result.
"""
def __repr__(self): return self.__str__()
def __str__(self): return "Simple Connection" # override later
class LocalConnection(Connection):
"""Local connection
This is a dummy connection class, so that LC.foo just evaluates to
foo using global scope.
"""
def __init__(self):
"""This prevents two instances of LocalConnection"""
assert not Globals.local_connection
self.conn_number = 0 # changed by SetConnections for server
def __getattr__(self, name):
if name in globals(): return globals()[name]
elif isinstance(__builtins__, dict): return __builtins__[name]
else: return __builtins__.__dict__[name]
def __setattr__(self, name, value): globals()[name] = value
def __delattr__(self, name): del globals()[name]
def __str__(self): return "LocalConnection"
def reval(self, function_string, *args):
return apply(eval(function_string), args)
def quit(self): pass
class ConnectionRequest:
"""Simple wrapper around a PipeConnection request"""
def __init__(self, function_string, num_args):
self.function_string = function_string
self.num_args = num_args
def __str__(self):
return "ConnectionRequest: %s with %d arguments" % \
(self.function_string, self.num_args)
class LowLevelPipeConnection(Connection):
"""Routines for just sending objects from one side of pipe to another
Each thing sent down the pipe is paired with a request number,
currently limited to be between 0 and 255. The size of each thing
should be less than 2^56.
Each thing also has a type, indicated by one of the following
characters:
o - generic object
i - iterator/generator of RORPs
f - file object
b - string
q - quit signal
t - TempFile
d - DSRPath
R - RPath
r - RORPath only
c - PipeConnection object
"""
def __init__(self, inpipe, outpipe):
"""inpipe is a file-type open for reading, outpipe for writing"""
self.inpipe = inpipe
self.outpipe = outpipe
def __str__(self):
"""Return string version
This is actually an important function, because otherwise
requests to represent this object would result in "__str__"
being executed on the other side of the connection.
"""
return "LowLevelPipeConnection"
def _put(self, obj, req_num):
"""Put an object into the pipe (will send raw if string)"""
Log.conn("sending", obj, req_num)
if type(obj) is types.StringType: self._putbuf(obj, req_num)
elif isinstance(obj, Connection): self._putconn(obj, req_num)
elif isinstance(obj, TempFile): self._puttempfile(obj, req_num)
elif isinstance(obj, DSRPath): self._putdsrpath(obj, req_num)
elif isinstance(obj, RPath): self._putrpath(obj, req_num)
elif isinstance(obj, RORPath): self._putrorpath(obj, req_num)
elif ((hasattr(obj, "read") or hasattr(obj, "write"))
and hasattr(obj, "close")): self._putfile(obj, req_num)
elif hasattr(obj, "next"): self._putiter(obj, req_num)
else: self._putobj(obj, req_num)
def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe"""
# for some reason there is an error when cPickle is used below..
self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe"""
self._write("b", buf, req_num)
def _putfile(self, fp, req_num):
"""Send a file to the client using virtual files"""
self._write("f", str(VirtualFile.new(fp)), req_num)
def _putiter(self, iterator, req_num):
"""Put an iterator through the pipe"""
self._write("i", str(VirtualFile.new(RORPIter.ToFile(iterator))),
req_num)
def _puttempfile(self, tempfile, req_num):
"""Put a tempfile into pipe. See _putrpath"""
tf_repr = (tempfile.conn.conn_number, tempfile.base,
tempfile.index, tempfile.data)
self._write("t", cPickle.dumps(tf_repr, 1), req_num)
def _putdsrpath(self, dsrpath, req_num):
"""Put DSRPath into pipe. See _putrpath"""
dsrpath_repr = (dsrpath.conn.conn_number, dsrpath.getstatedict())
self._write("d", cPickle.dumps(dsrpath_repr, 1), req_num)
def _putrpath(self, rpath, req_num):
"""Put an rpath into the pipe
The rpath's connection will be encoded as its conn_number. It
and the other information is put in a tuple.
"""
rpath_repr = (rpath.conn.conn_number, rpath.base,
rpath.index, rpath.data)
self._write("R", cPickle.dumps(rpath_repr, 1), req_num)
def _putrorpath(self, rorpath, req_num):
"""Put an rorpath into the pipe
This is only necessary because if there is a .file attached,
it must be excluded from the pickling
"""
rorpath_repr = (rorpath.index, rorpath.data)
self._write("r", cPickle.dumps(rorpath_repr, 1), req_num)
def _putconn(self, pipeconn, req_num):
"""Put a connection into the pipe
A pipe connection is represented just as the integer (in
string form) of its connection number it is *connected to*.
"""
self._write("c", str(pipeconn.conn_number), req_num)
def _putquit(self):
"""Send a string that takes down server"""
self._write("q", "", 255)
def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe"""
self.outpipe.write(headerchar + chr(req_num) +
C.long2str(long(len(data))))
self.outpipe.write(data)
self.outpipe.flush()
def _read(self, length):
"""Read length bytes from inpipe, returning result"""
return self.inpipe.read(length)
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
def _l2s_old(self, l):
"""Convert long int to string"""
s = ""
for i in range(7):
l, remainder = divmod(l, 256)
s = chr(remainder) + s
assert remainder == 0
return s
def _get(self):
"""Read an object from the pipe and return (req_num, value)"""
header_string = self.inpipe.read(9)
if not len(header_string) == 9:
raise ConnectionReadError("Truncated header string (problem "
"probably originated remotely)")
try:
format_string, req_num, length = (header_string[0],
ord(header_string[1]),
C.str2long(header_string[2:]))
except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal")
data = self._read(length)
if format_string == "o": result = cPickle.loads(data)
elif format_string == "b": result = data
elif format_string == "f": result = VirtualFile(self, int(data))
elif format_string == "i":
result = RORPIter.FromFile(BufferedRead(VirtualFile(self,
int(data))))
elif format_string == "t": result = self._gettempfile(data)
elif format_string == "r": result = self._getrorpath(data)
elif format_string == "R": result = self._getrpath(data)
elif format_string == "d": result = self._getdsrpath(data)
else:
assert format_string == "c", header_string
result = Globals.connection_dict[int(data)]
Log.conn("received", result, req_num)
return (req_num, result)
def _getrorpath(self, raw_rorpath_buf):
"""Reconstruct RORPath object from raw data"""
index, data = cPickle.loads(raw_rorpath_buf)
return RORPath(index, data)
def _gettempfile(self, raw_tf_buf):
"""Return TempFile object indicated by raw_tf_buf"""
conn_number, base, index, data = cPickle.loads(raw_tf_buf)
return TempFile(Globals.connection_dict[conn_number],
base, index, data)
def _getrpath(self, raw_rpath_buf):
"""Return RPath object indicated by raw_rpath_buf"""
conn_number, base, index, data = cPickle.loads(raw_rpath_buf)
return RPath(Globals.connection_dict[conn_number], base, index, data)
def _getdsrpath(self, raw_dsrpath_buf):
"""Return DSRPath object indicated by buf"""
conn_number, state_dict = cPickle.loads(raw_dsrpath_buf)
empty_dsrp = DSRPath("bypass", Globals.local_connection, None)
empty_dsrp.__setstate__(state_dict)
empty_dsrp.conn = Globals.connection_dict[conn_number]
empty_dsrp.file = None
return empty_dsrp
def _close(self):
"""Close the pipes associated with the connection"""
self.outpipe.close()
self.inpipe.close()
class PipeConnection(LowLevelPipeConnection):
"""Provide server and client functions for a Pipe Connection
Both sides act as modules that allows for remote execution. For
instance, self.conn.pow(2,8) will execute the operation on the
server side.
The only difference between the client and server is that the
client makes the first request, and the server listens first.
"""
def __init__(self, inpipe, outpipe, conn_number = 0):
"""Init PipeConnection
conn_number should be a unique (to the session) integer to
identify the connection. For instance, all connections to the
client have conn_number 0. Other connections can use this
number to route commands to the correct process.
"""
LowLevelPipeConnection.__init__(self, inpipe, outpipe)
self.conn_number = conn_number
self.unused_request_numbers = {}
for i in range(256): self.unused_request_numbers[i] = None
def __str__(self): return "PipeConnection %d" % self.conn_number
def get_response(self, desired_req_num):
"""Read from pipe, responding to requests until req_num.
Sometimes after a request is sent, the other side will make
another request before responding to the original one. In
that case, respond to the request. But return once the right
response is given.
"""
while 1:
try: req_num, object = self._get()
except ConnectionQuit:
self._put("quitting", self.get_new_req_num())
self._close()
return
if req_num == desired_req_num: return object
else:
assert isinstance(object, ConnectionRequest)
self.answer_request(object, req_num)
def answer_request(self, request, req_num):
"""Put the object requested by request down the pipe"""
del self.unused_request_numbers[req_num]
argument_list = []
for i in range(request.num_args):
arg_req_num, arg = self._get()
assert arg_req_num == req_num
argument_list.append(arg)
try:
Security.vet_request(request, argument_list)
result = apply(eval(request.function_string), argument_list)
except: result = self.extract_exception()
self._put(result, req_num)
self.unused_request_numbers[req_num] = None
def extract_exception(self):
"""Return active exception"""
if Log.verbosity >= 5 or Log.term_verbosity >= 5:
Log("Sending back exception %s of type %s: \n%s" %
(sys.exc_info()[1], sys.exc_info()[0],
"".join(traceback.format_tb(sys.exc_info()[2]))), 5)
return sys.exc_info()[1]
def Server(self):
"""Start server's read eval return loop"""
Globals.server = 1
Globals.connections.append(self)
Log("Starting server", 6)
self.get_response(-1)
def reval(self, function_string, *args):
"""Execute command on remote side
The first argument should be a string that evaluates to a
function, like "pow", and the remaining are arguments to that
function.
"""
req_num = self.get_new_req_num()
self._put(ConnectionRequest(function_string, len(args)), req_num)
for arg in args: self._put(arg, req_num)
result = self.get_response(req_num)
self.unused_request_numbers[req_num] = None
if isinstance(result, Exception): raise result
else: return result
def get_new_req_num(self):
"""Allot a new request number and return it"""
if not self.unused_request_numbers:
raise ConnectionError("Exhaused possible connection numbers")
req_num = self.unused_request_numbers.keys()[0]
del self.unused_request_numbers[req_num]
return req_num
def quit(self):
"""Close the associated pipes and tell server side to quit"""
assert not Globals.server
self._putquit()
self._get()
self._close()
def __getattr__(self, name):
"""Intercept attributes to allow for . invocation"""
return EmulateCallable(self, name)
class RedirectedConnection(Connection):
"""Represent a connection more than one move away
For instance, suppose things are connected like this: S1---C---S2.
If Server1 wants something done by Server2, it will have to go
through the Client. So on S1's side, S2 will be represented by a
RedirectedConnection.
"""
def __init__(self, conn_number, routing_number = 0):
"""RedirectedConnection initializer
Returns a RedirectedConnection object for the given
conn_number, where commands are routed through the connection
with the given routing_number. 0 is the client, so the
default shouldn't have to be changed.
"""
self.conn_number = conn_number
self.routing_number = routing_number
self.routing_conn = Globals.connection_dict[routing_number]
def reval(self, function_string, *args):
"""Evalution function_string on args on remote connection"""
return self.routing_conn.reval("RedirectedRun", self.conn_number,
function_string, *args)
def __str__(self):
return "RedirectedConnection %d,%d" % (self.conn_number,
self.routing_number)
def __getattr__(self, name):
return EmulateCallableRedirected(self.conn_number, self.routing_conn,
name)
def RedirectedRun(conn_number, func, *args):
"""Run func with args on connection with conn number conn_number
This function is meant to redirect requests from one connection to
another, so conn_number must not be the local connection (and also
for security reasons since this function is always made
available).
"""
conn = Globals.connection_dict[conn_number]
assert conn is not Globals.local_connection, conn
return conn.reval(func, *args)
class EmulateCallable:
"""This is used by PipeConnection in calls like conn.os.chmod(foo)"""
def __init__(self, connection, name):
self.connection = connection
self.name = name
def __call__(self, *args):
return apply(self.connection.reval, (self.name,) + args)
def __getattr__(self, attr_name):
return EmulateCallable(self.connection,
"%s.%s" % (self.name, attr_name))
class EmulateCallableRedirected:
"""Used by RedirectedConnection in calls like conn.os.chmod(foo)"""
def __init__(self, conn_number, routing_conn, name):
self.conn_number, self.routing_conn = conn_number, routing_conn
self.name = name
def __call__(self, *args):
return apply(self.routing_conn.reval,
("RedirectedRun", self.conn_number, self.name) + args)
def __getattr__(self, attr_name):
return EmulateCallableRedirected(self.conn_number, self.routing_conn,
"%s.%s" % (self.name, attr_name))
class VirtualFile:
"""When the client asks for a file over the connection, it gets this
The returned instance then forwards requests over the connection.
The class's dictionary is used by the server to associate each
with a unique file number.
"""
#### The following are used by the server
vfiles = {}
counter = 0
def getbyid(cls, id):
return cls.vfiles[id]
getbyid = classmethod(getbyid)
def readfromid(cls, id, length):
return cls.vfiles[id].read(length)
readfromid = classmethod(readfromid)
def readlinefromid(cls, id):
return cls.vfiles[id].readline()
readlinefromid = classmethod(readlinefromid)
def writetoid(cls, id, buffer):
return cls.vfiles[id].write(buffer)
writetoid = classmethod(writetoid)
def closebyid(cls, id):
fp = cls.vfiles[id]
del cls.vfiles[id]
return fp.close()
closebyid = classmethod(closebyid)
def new(cls, fileobj):
"""Associate a new VirtualFile with a read fileobject, return id"""
count = cls.counter
cls.vfiles[count] = fileobj
cls.counter = count + 1
return count
new = classmethod(new)
#### And these are used by the client
def __init__(self, connection, id):
self.connection = connection
self.id = id
def read(self, length = -1):
return self.connection.VirtualFile.readfromid(self.id, length)
def readline(self):
return self.connection.VirtualFile.readlinefromid(self.id)
def write(self, buf):
return self.connection.VirtualFile.writetoid(self.id, buf)
def close(self):
return self.connection.VirtualFile.closebyid(self.id)
def __iter__(self):
"""Iterates lines in file, like normal iter(file) behavior"""
while 1:
line = self.readline()
if not line: break
yield line
# everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities.
import Globals, Time, Rdiff, Hardlink, FilenameMapping, C, Security, Main
from static import *
from lazy import *
from log import *
from iterfile import *
from connection import *
from rpath import *
from robust import *
from rorpiter import *
from destructive_stepping import *
from selection import *
from statistics import *
from increment import *
from restore import *
from manage import *
from highlevel import *
Globals.local_connection = LocalConnection()
Globals.connections.append(Globals.local_connection)
# Following changed by server in SetConnections
Globals.connection_dict[0] = Globals.local_connection
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Deal with side effects from traversing trees"""
from __future__ import generators
import types
from rpath import *
from lazy import *
class DSRPPermError(Exception):
"""Exception used when a DSRPath can't get sufficient permissions"""
pass
class DSRPath(RPath):
"""Destructive Stepping RPath
Sometimes when we traverse the directory tree, even when we just
want to read files, we have to change things, like the permissions
of a file or directory in order to read it, or the file's access
times. This class is like an RPath, but the permission and time
modifications are delayed, so that they can be done at the very
end when they won't be disturbed later.
Here are the new class variables:
delay_perms - true iff future perm changes should be delayed
newperms - holds the perm values while they are delayed
delay_atime - true iff some atime change are being delayed
newatime - holds the new atime
delay_mtime - true if some mtime change is being delayed
newmtime - holds the new mtime
"""
def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP
Source should be true iff the DSRPath is taken from the
"source" partition and thus settings like
Globals.change_source_perms should be paid attention to.
If args is [rpath], return the dsrpath equivalent of rpath,
otherwise use the same arguments as the RPath initializer.
"""
if base == 0:
assert isinstance(conn_or_rp, RPath)
RPath.__init__(self, conn_or_rp.conn,
conn_or_rp.base, conn_or_rp.index)
self.path = conn_or_rp.path # conn_or_rp may be quoted
else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass":
# "bypass" val is used when unpackaging over connection
assert source is None or source is 1
self.source = source
self.set_delays(source)
self.set_init_perms(source)
def set_delays(self, source):
"""Delay writing permissions and times where appropriate"""
if not source or Globals.change_source_perms:
self.delay_perms, self.newperms = 1, None
else: self.delay_perms = None
if Globals.preserve_atime:
self.delay_atime = 1
# Now get atime right away if possible
if self.data.has_key('atime'): self.newatime = self.data['atime']
else: self.newatime = None
else: self.delay_atime = None
if source:
self.delay_mtime = None # we'll never change mtime of source file
else:
self.delay_mtime = 1
# Save mtime now for a dir, because it might inadvertantly change
if self.isdir(): self.newmtime = self.data['mtime']
else: self.newmtime = None
def set_init_perms(self, source):
"""If necessary, change permissions to ensure access"""
if self.isreg() and not self.readable():
if (source and Globals.change_source_perms or
not source and Globals.change_mirror_perms):
self.chmod_bypass(0400)
elif self.isdir():
if source and Globals.change_source_perms:
if not self.readable() or not self.executable():
self.chmod_bypass(0500)
elif not source and Globals.change_mirror_perms:
if not self.hasfullperms(): self.chmod_bypass(0700)
def warn(self, err):
Log("Received error '%s' when dealing with file %s, skipping..."
% (err, self.path), 1)
raise DSRPPermError(self.path)
def __getstate__(self):
"""Return picklable state. See RPath __getstate__."""
assert self.conn is Globals.local_connection # Can't pickle a conn
return self.getstatedict()
def getstatedict(self):
"""Return dictionary containing the attributes we can save"""
pickle_dict = {}
for attrib in ['index', 'data', 'delay_perms', 'newperms',
'delay_atime', 'newatime',
'delay_mtime', 'newmtime',
'path', 'base', 'source']:
if self.__dict__.has_key(attrib):
pickle_dict[attrib] = self.__dict__[attrib]
return pickle_dict
def __setstate__(self, pickle_dict):
"""Set state from object produced by getstate"""
self.conn = Globals.local_connection
for attrib in pickle_dict.keys():
self.__dict__[attrib] = pickle_dict[attrib]
def chmod(self, permissions):
"""Change permissions, delaying if self.perms_delayed is set"""
if self.delay_perms: self.newperms = self.data['perms'] = permissions
else: RPath.chmod(self, permissions)
def getperms(self):
"""Return dsrp's intended permissions"""
if self.delay_perms and self.newperms is not None:
return self.newperms
else: return self.data['perms']
def chmod_bypass(self, permissions):
"""Change permissions without updating the data dictionary"""
self.delay_perms = 1
if self.newperms is None: self.newperms = self.getperms()
Log("DSRP: Perm bypass %s to %o" % (self.path, permissions), 8)
self.conn.os.chmod(self.path, permissions)
def settime(self, accesstime, modtime):
"""Change times, delaying if self.times_delayed is set"""
if self.delay_atime: self.newatime = self.data['atime'] = accesstime
if self.delay_mtime: self.newmtime = self.data['mtime'] = modtime
if not self.delay_atime or not self.delay_mtime:
RPath.settime(self, accesstime, modtime)
def setmtime(self, modtime):
"""Change mtime, delaying if self.times_delayed is set"""
if self.delay_mtime: self.newmtime = self.data['mtime'] = modtime
else: RPath.setmtime(self, modtime)
def getmtime(self):
"""Return dsrp's intended modification time"""
if self.delay_mtime and self.newmtime is not None:
return self.newmtime
else: return self.data['mtime']
def getatime(self):
"""Return dsrp's intended access time"""
if self.delay_atime and self.newatime is not None:
return self.newatime
else: return self.data['atime']
def write_changes(self):
"""Write saved up permission/time changes"""
if not self.lstat(): return # File has been deleted in meantime
if self.delay_perms and self.newperms is not None:
Log("Finalizing permissions of dsrp %s to %s" %
(self.path, self.newperms), 8)
RPath.chmod(self, self.newperms)
do_atime = self.delay_atime and self.newatime is not None
do_mtime = self.delay_mtime and self.newmtime is not None
if do_atime and do_mtime:
RPath.settime(self, self.newatime, self.newmtime)
elif do_atime and not do_mtime:
RPath.settime(self, self.newatime, self.getmtime())
elif not do_atime and do_mtime:
RPath.setmtime(self, self.newmtime)
def newpath(self, newpath, index = ()):
"""Return similar DSRPath but with new path"""
return self.__class__(self.source, self.conn, newpath, index)
def append(self, ext):
"""Return similar DSRPath with new extension"""
return self.__class__(self.source, self.conn, self.base,
self.index + (ext,))
def new_index(self, index):
"""Return similar DSRPath with new index"""
return self.__class__(self.source, self.conn, self.base, index)
class DestructiveSteppingFinalizer(ITRBranch):
"""Finalizer that can work on an iterator of dsrpaths
The reason we have to use an IterTreeReducer is that some files
should be updated immediately, but for directories we sometimes
need to update all the files in the directory before finally
coming back to it.
"""
dsrpath = None
def start_process(self, index, dsrpath):
self.dsrpath = dsrpath
def end_process(self):
if self.dsrpath: self.dsrpath.write_changes()
def can_fast_process(self, index, dsrpath):
return not self.dsrpath.isdir()
def fast_process(self, index, dsrpath):
if self.dsrpath: self.dsrpath.write_changes()
from log import *
from robust import *
import Globals
from __future__ import generators
execfile("manage.py")
#######################################################################
#
# filelist - Some routines that help with operations over files listed
# in standard input instead of over whole directories.
#
class FilelistError(Exception): pass
class Filelist:
"""Many of these methods have analogs in highlevel.py"""
def File2Iter(fp, baserp):
"""Convert file obj with one pathname per line into rpiter
Closes fp when done. Given files are added to baserp.
"""
while 1:
line = fp.readline()
if not line: break
if line[-1] == "\n": line = line[:-1] # strip trailing newline
if not line: continue # skip blank lines
elif line[0] == "/": raise FilelistError(
"Read in absolute file name %s." % line)
yield baserp.append(line)
assert not fp.close(), "Error closing filelist fp"
def Mirror(src_rpath, dest_rpath, rpiter):
"""Copy files in fileiter from src_rpath to dest_rpath"""
sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter)
diffiter = Filelist.get_diffs(src_rpath, sigiter)
dest_rpath.conn.Filelist.patch(dest_rpath, diffiter)
dest_rpath.setdata()
def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath):
"""Mirror + put increment in tree based at inc_rpath"""
sigiter = dest_rpath.conn.Filelist.get_sigs(dest_rpath, rpiter)
diffiter = Filelist.get_diffs(src_rpath, sigiter)
dest_rpath.conn.Filelist.patch_and_increment(dest_rpath, diffiter,
inc_rpath)
dest_rpath.setdata()
def get_sigs(dest_rpbase, rpiter):
"""Get signatures of file analogs in rpiter
This is meant to be run on the destination side. Only the
extention part of the rps in rpiter will be used; the base is
ignored.
"""
def dest_iter(src_iter):
for src_rp in src_iter: yield dest_rpbase.new_index(src_rp.index)
return RORPIter.Signatures(dest_iter())
def get_diffs(src_rpbase, sigiter):
"""Get diffs based on sigiter and files in src_rpbase
This should be run on the local side.
"""
for sig_rorp in sigiter:
new_rp = src_rpbase.new_index(sig_rorp.index)
yield RORPIter.diffonce(sig_rorp, new_rp)
def patch(dest_rpbase, diffiter):
"""Process diffs in diffiter and update files in dest_rbpase.
Run remotely.
"""
for diff_rorp in diffiter:
basisrp = dest_rpbase.new_index(diff_rorp.index)
if basisrp.lstat(): Filelist.make_subdirs(basisrp)
Log("Processing %s" % basisrp.path, 7)
RORPIter.patchonce(dest_rpbase, basisrp, diff_rorp)
def patch_and_increment(dest_rpbase, diffiter, inc_rpbase):
"""Apply diffs in diffiter to dest_rpbase, and increment to inc_rpbase
Also to be run remotely.
"""
for diff_rorp in diffiter:
basisrp = dest_rpbase.new_index(diff_rorp.index)
if diff_rorp.lstat(): Filelist.make_subdirs(basisrp)
Log("Processing %s" % basisrp.path, 7)
# XXX This isn't done yet...
def make_subdirs(rpath):
"""Make sure that all the directories under the rpath exist
This function doesn't try to get the permissions right on the
underlying directories, just do the minimum to make sure the
file can be created.
"""
dirname = rpath.dirsplit()[0]
if dirname == '.' or dirname == '': return
dir_rp = RPath(rpath.conn, dirname)
Filelist.make_subdirs(dir_rp)
if not dir_rp.lstat(): dir_rp.mkdir()
MakeStatic(Filelist)
#!/usr/bin/env python
#
# rdiff-backup -- Mirror files while keeping incremental changes
# Version 0.8.0 released June 14, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
#
# This program is licensed under the GNU General Public License (GPL).
# Distributions of rdiff-backup usually include a copy of the GPL in a
# file called COPYING. The GPL is also available online at
# http://www.gnu.org/copyleft/gpl.html.
#
# See http://www.stanford.edu/~bescoto/rdiff-backup for more
# information. Please send mail to me or the mailing list if you find
# bugs or have any suggestions.
from __future__ import nested_scopes, generators
import os, stat, time, sys, getopt, re, cPickle, types, shutil, sha, marshal, traceback, popen2, tempfile, gzip, UserList, errno, signal
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""High level functions for mirroring, mirror & inc, etc."""
from __future__ import generators
from static import *
from log import *
from rpath import *
from robust import *
from increment import *
from destructive_stepping import *
from rorpiter import *
import Globals, Hardlink, MiscStats
class SkipFileException(Exception):
"""Signal that the current file should be skipped but then continue
This exception will often be raised when there is problem reading
an individual file, but it makes sense for the rest of the backup
to keep going.
"""
pass
class HighLevel:
"""High level static functions
The design of some of these functions is represented on the
accompanying diagram.
"""
def Mirror(src_rpath, dest_rpath, inc_rpath = None, session_info = None):
"""Turn dest_rpath into a copy of src_rpath
If inc_rpath is true, then this is the initial mirroring of an
incremental backup, so checkpoint and write to data_dir.
Otherwise only mirror and don't create any extra files.
"""
SourceS = src_rpath.conn.HLSourceStruct
DestS = dest_rpath.conn.HLDestinationStruct
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
if inc_rpath:
DestS.patch_w_datadir_writes(dest_rpath, diffiter, inc_rpath)
else: DestS.patch_and_finalize(dest_rpath, diffiter)
dest_rpath.setdata()
def Mirror_and_increment(src_rpath, dest_rpath, inc_rpath,
session_info = None):
"""Mirror + put increments in tree based at inc_rpath"""
SourceS = src_rpath.conn.HLSourceStruct
DestS = dest_rpath.conn.HLDestinationStruct
SourceS.set_session_info(session_info)
DestS.set_session_info(session_info)
if not session_info: dest_rpath.conn.SaveState.touch_last_file()
src_init_dsiter = SourceS.split_initial_dsiter()
dest_sigiter = DestS.get_sigs(dest_rpath, src_init_dsiter)
diffiter = SourceS.get_diffs_and_finalize(dest_sigiter)
DestS.patch_increment_and_finalize(dest_rpath, diffiter, inc_rpath)
dest_rpath.setdata()
inc_rpath.setdata()
MakeStatic(HighLevel)
class HLSourceStruct:
"""Hold info used by HL on the source side"""
_session_info = None # set to si if resuming
def set_session_info(cls, session_info):
cls._session_info = session_info
def iterate_from(cls):
"""Supply more aruments to DestructiveStepping.Iterate_from"""
if cls._session_info is None: Globals.select_source.set_iter()
else: Globals.select_source.set_iter(cls._session_info.last_index, 1)
return Globals.select_source
def split_initial_dsiter(cls):
"""Set iterators of all dsrps from rpath, returning one"""
dsiter = cls.iterate_from()
initial_dsiter1, cls.initial_dsiter2 = Iter.multiplex(dsiter, 2)
return initial_dsiter1
def get_diffs_and_finalize(cls, sigiter):
"""Return diffs and finalize any dsrp changes remaining
Return a rorpiterator with files included of signatures of
dissimilar files. This is the last operation run on the local
filestream, so finalize dsrp writes.
"""
collated = RORPIter.CollateIterators(cls.initial_dsiter2, sigiter)
finalizer = IterTreeReducer(DestructiveSteppingFinalizer, [])
def error_handler(exc, dest_sig, dsrp):
Log("Error %s producing a diff of %s" %
(exc, dsrp and dsrp.path), 2)
return None
def diffs():
for dsrp, dest_sig in collated:
if dest_sig:
if dest_sig.isplaceholder(): yield dest_sig
else:
diff = Robust.check_common_error(
error_handler, RORPIter.diffonce, [dest_sig, dsrp])
if diff: yield diff
if dsrp: finalizer(dsrp.index, dsrp)
finalizer.Finish()
return diffs()
MakeClass(HLSourceStruct)
class HLDestinationStruct:
"""Hold info used by HL on the destination side"""
_session_info = None # set to si if resuming
def set_session_info(cls, session_info):
cls._session_info = session_info
def iterate_from(cls):
"""Return selection iterator to iterate all the mirror files"""
if cls._session_info is None: Globals.select_mirror.set_iter()
else: Globals.select_mirror.set_iter(cls._session_info.last_index)
return Globals.select_mirror
def split_initial_dsiter(cls):
"""Set initial_dsiters (iteration of all dsrps from rpath)"""
result, cls.initial_dsiter2 = Iter.multiplex(cls.iterate_from(), 2)
return result
def get_dissimilar(cls, baserp, src_init_iter, dest_init_iter):
"""Get dissimilars
Returns an iterator which enumerates the dsrps which are
different on the source and destination ends. The dsrps do
not necessarily exist on the destination end.
Also, to prevent the system from getting backed up on the
remote end, if we don't get enough dissimilars, stick in a
placeholder every so often, like fiber. The more
placeholders, the more bandwidth used, but if there aren't
enough, lots of memory will be used because files will be
accumulating on the source side. How much will accumulate
will depend on the Globals.conn_bufsize value.
"""
collated = RORPIter.CollateIterators(src_init_iter, dest_init_iter)
def compare(src_rorp, dest_dsrp):
"""Return dest_dsrp if they are different, None if the same"""
if not dest_dsrp:
dest_dsrp = cls.get_dsrp(baserp, src_rorp.index)
if dest_dsrp.lstat():
Log("Warning: Found unexpected destination file %s, "
"not processing it." % dest_dsrp.path, 2)
return None
elif (src_rorp and src_rorp == dest_dsrp and
(not Globals.preserve_hardlinks or
Hardlink.rorp_eq(src_rorp, dest_dsrp))):
return None
if src_rorp and src_rorp.isreg() and Hardlink.islinked(src_rorp):
dest_dsrp.flaglinked()
return dest_dsrp
def generate_dissimilar():
counter = 0
for src_rorp, dest_dsrp in collated:
if Globals.preserve_hardlinks:
if src_rorp: Hardlink.add_rorp(src_rorp, 1)
if dest_dsrp: Hardlink.add_rorp(dest_dsrp, None)
dsrp = compare(src_rorp, dest_dsrp)
if dsrp:
counter = 0
yield dsrp
elif counter == 20:
placeholder = RORPath(src_rorp.index)
placeholder.make_placeholder()
counter = 0
yield placeholder
else: counter += 1
return generate_dissimilar()
def get_sigs(cls, baserp, src_init_iter):
"""Return signatures of all dissimilar files"""
dest_iters1 = cls.split_initial_dsiter()
dissimilars = cls.get_dissimilar(baserp, src_init_iter, dest_iters1)
return RORPIter.Signatures(dissimilars)
def get_dsrp(cls, dest_rpath, index):
"""Return initialized dsrp based on dest_rpath with given index"""
dsrp = DSRPath(None, dest_rpath.conn, dest_rpath.base, index)
if Globals.quoting_enabled: dsrp.quote_path()
return dsrp
def get_finalizer(cls):
"""Return finalizer, starting from session info if necessary"""
old_finalizer = cls._session_info and cls._session_info.finalizer
if old_finalizer: return old_finalizer
else: return IterTreeReducer(DestructiveSteppingFinalizer, [])
def get_ITR(cls, inc_rpath):
"""Return ITR, starting from state if necessary"""
if cls._session_info and cls._session_info.ITR:
return cls._session_info.ITR
else:
iitr = IterTreeReducer(IncrementITRB, [inc_rpath])
iitr.root_branch.override_changed()
Globals.ITRB = iitr.root_branch
iitr.root_branch.Errors = 0
return iitr
def get_MirrorITR(cls, inc_rpath):
"""Return MirrorITR, starting from state if available"""
if cls._session_info and cls._session_info.ITR:
return cls._session_info.ITR
ITR = IterTreeReducer(MirrorITRB, [inc_rpath])
Globals.ITRB = ITR.root_branch
ITR.root_branch.Errors = 0
return ITR
def patch_and_finalize(cls, dest_rpath, diffs):
"""Apply diffs and finalize"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer = cls.get_finalizer()
diff_rorp, dsrp = None, None
def patch(diff_rorp, dsrp):
if not dsrp: dsrp = cls.get_dsrp(dest_rpath, diff_rorp.index)
if diff_rorp and not diff_rorp.isplaceholder():
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
return dsrp
def error_handler(exc, diff_rorp, dsrp):
filename = dsrp and dsrp.path or os.path.join(*diff_rorp.index)
Log("Error: %s processing file %s" % (exc, filename), 2)
for indexed_tuple in collated:
Log(lambda: "Processing %s" % str(indexed_tuple), 7)
diff_rorp, dsrp = indexed_tuple
dsrp = Robust.check_common_error(error_handler, patch,
[diff_rorp, dsrp])
finalizer(dsrp.index, dsrp)
finalizer.Finish()
def patch_w_datadir_writes(cls, dest_rpath, diffs, inc_rpath):
"""Apply diffs and finalize, with checkpointing and statistics"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer, ITR = cls.get_finalizer(), cls.get_MirrorITR(inc_rpath)
MiscStats.open_dir_stats_file()
dsrp, finished_dsrp = None, None
try:
for indexed_tuple in collated:
Log(lambda: "Processing %s" % str(indexed_tuple), 7)
diff_rorp, dsrp = indexed_tuple
if not dsrp: dsrp = cls.get_dsrp(dest_rpath, diff_rorp.index)
if diff_rorp and diff_rorp.isplaceholder(): diff_rorp = None
ITR(dsrp.index, diff_rorp, dsrp)
finalizer(dsrp.index, dsrp)
SaveState.checkpoint(ITR, finalizer, dsrp)
finished_dsrp = dsrp
ITR.Finish()
finalizer.Finish()
except: cls.handle_last_error(finished_dsrp, finalizer, ITR)
if Globals.preserve_hardlinks: Hardlink.final_writedata()
MiscStats.close_dir_stats_file()
MiscStats.write_session_statistics(ITR.root_branch)
SaveState.checkpoint_remove()
def patch_increment_and_finalize(cls, dest_rpath, diffs, inc_rpath):
"""Apply diffs, write increment if necessary, and finalize"""
collated = RORPIter.CollateIterators(diffs, cls.initial_dsiter2)
finalizer, ITR = cls.get_finalizer(), cls.get_ITR(inc_rpath)
MiscStats.open_dir_stats_file()
dsrp, finished_dsrp = None, None
try:
for indexed_tuple in collated:
Log(lambda: "Processing %s" % str(indexed_tuple), 7)
diff_rorp, dsrp = indexed_tuple
index = indexed_tuple.index
if not dsrp: dsrp = cls.get_dsrp(dest_rpath, index)
if diff_rorp and diff_rorp.isplaceholder(): diff_rorp = None
ITR(index, diff_rorp, dsrp)
finalizer(index, dsrp)
SaveState.checkpoint(ITR, finalizer, dsrp)
finished_dsrp = dsrp
ITR.Finish()
finalizer.Finish()
except: cls.handle_last_error(finished_dsrp, finalizer, ITR)
if Globals.preserve_hardlinks: Hardlink.final_writedata()
MiscStats.close_dir_stats_file()
MiscStats.write_session_statistics(ITR.root_branch)
SaveState.checkpoint_remove()
def handle_last_error(cls, dsrp, finalizer, ITR):
"""If catch fatal error, try to checkpoint before exiting"""
Log.exception(1, 2)
TracebackArchive.log()
SaveState.checkpoint(ITR, finalizer, dsrp, 1)
if Globals.preserve_hardlinks: Hardlink.final_checkpoint(Globals.rbdir)
SaveState.touch_last_file_definitive()
raise
MakeClass(HLDestinationStruct)
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Provides Inc and *ITR classes, which relate to writing increment files"""
import traceback
from static import *
from statistics import *
from lazy import *
class Inc:
"""Class containing increment functions"""
def Increment_action(new, mirror, incpref):
"""Main file incrementing function, returns RobustAction
new is the file on the active partition,
mirror is the mirrored file from the last backup,
incpref is the prefix of the increment file.
This function basically moves the information about the mirror
file to incpref.
The returned RobustAction when executed should return the name
of the incfile, or None if none was created.
"""
if not (new and new.lstat() or mirror.lstat()):
return Robust.null_action # Files deleted in meantime, do nothing
Log("Incrementing mirror file " + mirror.path, 5)
if ((new and new.isdir()) or mirror.isdir()) and not incpref.isdir():
incpref.mkdir()
if not mirror.lstat(): return Inc.makemissing_action(incpref)
elif mirror.isdir(): return Inc.makedir_action(mirror, incpref)
elif new.isreg() and mirror.isreg():
return Inc.makediff_action(new, mirror, incpref)
else: return Inc.makesnapshot_action(mirror, incpref)
def Increment(new, mirror, incpref):
return Inc.Increment_action(new, mirror, incpref).execute()
def makemissing_action(incpref):
"""Signify that mirror file was missing"""
def final(init_val):
incrp = Inc.get_inc_ext(incpref, "missing")
incrp.touch()
return incrp
return RobustAction(None, final, None)
def makesnapshot_action(mirror, incpref):
"""Copy mirror to incfile, since new is quite different"""
if (mirror.isreg() and Globals.compression and
not Globals.no_compression_regexp.match(mirror.path)):
snapshotrp = Inc.get_inc_ext(incpref, "snapshot.gz")
return Robust.copy_with_attribs_action(mirror, snapshotrp, 1)
else:
snapshotrp = Inc.get_inc_ext(incpref, "snapshot")
return Robust.copy_with_attribs_action(mirror, snapshotrp, None)
def makediff_action(new, mirror, incpref):
"""Make incfile which is a diff new -> mirror"""
if (Globals.compression and
not Globals.no_compression_regexp.match(mirror.path)):
diff = Inc.get_inc_ext(incpref, "diff.gz")
compress = 1
else:
diff = Inc.get_inc_ext(incpref, "diff")
compress = None
diff_tf = TempFileManager.new(diff)
def init():
Rdiff.write_delta(new, mirror, diff_tf, compress)
RPath.copy_attribs(mirror, diff_tf)
return diff
return Robust.make_tf_robustaction(init, diff_tf, diff)
def makedir_action(mirrordir, incpref):
"""Make file indicating directory mirrordir has changed"""
dirsign = Inc.get_inc_ext(incpref, "dir")
tf = TempFileManager.new(dirsign)
def init():
tf.touch()
RPath.copy_attribs(mirrordir, tf)
return dirsign
return Robust.make_tf_robustaction(init, tf, dirsign)
def get_inc(rp, time, typestr):
"""Return increment like rp but with time and typestr suffixes"""
addtostr = lambda s: "%s.%s.%s" % (s, Time.timetostring(time), typestr)
if rp.index:
incrp = rp.__class__(rp.conn, rp.base, rp.index[:-1] +
(addtostr(rp.index[-1]),))
else: incrp = rp.__class__(rp.conn, addtostr(rp.base), rp.index)
if Globals.quoting_enabled: incrp.quote_path()
return incrp
def get_inc_ext(rp, typestr):
"""Return increment with specified type and correct time
If the file exists, then probably a previous backup has been
aborted. We then keep asking FindTime to get a time later
than the one that already has an inc file.
"""
inctime = 0
while 1:
inctime = Resume.FindTime(rp.index, inctime)
incrp = Inc.get_inc(rp, inctime, typestr)
if not incrp.lstat(): break
return incrp
MakeStatic(Inc)
class IncrementITRB(StatsITRB):
"""Patch and increment mirror directory
This has to be an ITR because directories that have files in them
changed are flagged with an increment marker. There are four
possibilities as to the order:
1. Normal file -> Normal file: right away
2. Directory -> Directory: wait until files in the directory
are processed, as we won't know whether to add a marker
until the end.
3. Normal file -> Directory: right away, so later files will
have a directory to go into.
4. Directory -> Normal file: Wait until the end, so we can
process all the files in the directory.
Remember this object needs to be pickable.
"""
# Iff true, mirror file was a directory
mirror_isdirectory = None
# If set, what the directory on the mirror side will be replaced with
directory_replacement = None
# True iff there has been some change at this level or lower (used
# for marking directories to be flagged)
changed = None
# Holds the RPath of the created increment file, if any
incrp = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree"""
self.inc_rpath = inc_rpath
StatsITRB.__init__(self)
def start_process(self, index, diff_rorp, dsrp):
"""Initial processing of file
diff_rorp is the RORPath of the diff from the remote side, and
dsrp is the local file to be incremented
"""
self.start_stats(dsrp)
incpref = self.inc_rpath.new_index(index)
if Globals.quoting_enabled: incpref.quote_path()
if dsrp.isdir():
self.init_dir(dsrp, diff_rorp, incpref)
self.mirror_isdirectory = 1
else: self.init_non_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref)
def override_changed(self):
"""Set changed flag to true
This is used only at the top level of a backup, to make sure
that a marker is created recording every backup session.
"""
self.changed = 1
def setvals(self, diff_rorp, dsrp, incpref):
"""Record given values in state dict since in directory
We don't do these earlier in case of a problem inside the
init_* functions. Index isn't given because it is done by the
superclass.
"""
self.diff_rorp = diff_rorp
self.dsrp = dsrp
self.incpref = incpref
def init_dir(self, dsrp, diff_rorp, incpref):
"""Process a directory (initial pass)
If the directory is changing into a normal file, we need to
save the normal file data in a temp file, and then create the
real file once we are done with everything inside the
directory.
"""
if not (incpref.lstat() and incpref.isdir()): incpref.mkdir()
if diff_rorp and diff_rorp.isreg() and diff_rorp.file:
tf = TempFileManager.new(dsrp)
def init():
RPathStatic.copy_with_attribs(diff_rorp, tf)
tf.set_attached_filetype(diff_rorp.get_attached_filetype())
def error(exc, ran_init, init_val): tf.delete()
RobustAction(init, None, error).execute()
self.directory_replacement = tf
def init_non_dir(self, dsrp, diff_rorp, incpref):
"""Process a non directory file (initial pass)"""
if not diff_rorp: return # no diff, so no change necessary
if diff_rorp.isreg() and (dsrp.isreg() or diff_rorp.isflaglinked()):
# Write updated mirror to temp file so we can compute
# reverse diff locally
mirror_tf = TempFileManager.new(dsrp)
old_dsrp_tf = TempFileManager.new(dsrp)
def init_thunk():
if diff_rorp.isflaglinked():
Hardlink.link_rp(diff_rorp, mirror_tf, dsrp)
else: Rdiff.patch_with_attribs_action(dsrp, diff_rorp,
mirror_tf).execute()
self.incrp = Inc.Increment_action(mirror_tf, dsrp,
incpref).execute()
if dsrp.lstat(): RPathStatic.rename(dsrp, old_dsrp_tf)
mirror_tf.rename(dsrp)
def final(init_val): old_dsrp_tf.delete()
def error(exc, ran_init, init_val):
if ran_init: old_dsrp_tf.delete() # everything is fine
else: # restore to previous state
if old_dsrp_tf.lstat(): old_dsrp_tf.rename(dsrp)
if self.incrp: self.incrp.delete()
mirror_tf.delete()
RobustAction(init_thunk, final, error).execute()
else: self.incrp = Robust.chain(
Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)).execute()[0]
self.changed = 1
def end_process(self):
"""Do final work when leaving a tree (directory)"""
diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref
if (self.mirror_isdirectory and (diff_rorp or self.changed)
or self.directory_replacement):
if self.directory_replacement:
tf = self.directory_replacement
self.incrp = Robust.chain(
Inc.Increment_action(tf, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, tf)).execute()[0]
tf.delete()
else:
self.incrp = Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.end_stats(diff_rorp, dsrp, self.incrp)
if self.mirror_isdirectory or dsrp.isdir():
MiscStats.write_dir_stats_line(self, dsrp.index)
def can_fast_process(self, index, diff_rorp, dsrp):
"""True if there is no change in file and is just a leaf"""
return not diff_rorp and dsrp.isreg()
def fast_process(self, index, diff_rorp, dsrp):
"""Just update statistics"""
StatsITRB.fast_process(self, dsrp)
def branch_process(self, branch):
"""Update statistics, and the has_changed flag if change in branch"""
if Globals.sleep_ratio is not None: Time.sleep(Globals.sleep_ratio)
if branch.changed: self.changed = 1
self.add_file_stats(branch)
class MirrorITRB(StatsITRB):
"""Like IncrementITR, but only patch mirror directory, don't increment"""
# This is always None since no increments will be created
incrp = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the inc tree"""
self.inc_rpath = inc_rpath
StatsITRB.__init__(self)
def start_process(self, index, diff_rorp, mirror_dsrp):
"""Initialize statistics and do actual writing to mirror"""
self.start_stats(mirror_dsrp)
if diff_rorp and not diff_rorp.isplaceholder():
RORPIter.patchonce_action(None, mirror_dsrp, diff_rorp).execute()
self.incpref = self.inc_rpath.new_index(index)
self.diff_rorp, self.mirror_dsrp = diff_rorp, mirror_dsrp
def end_process(self):
"""Update statistics when leaving"""
self.end_stats(self.diff_rorp, self.mirror_dsrp)
if self.mirror_dsrp.isdir():
MiscStats.write_dir_stats_line(self, self.mirror_dsrp.index)
def can_fast_process(self, index, diff_rorp, mirror_dsrp):
"""True if there is no change in file and it is just a leaf"""
return not diff_rorp and mirror_dsrp.isreg()
def fast_process(self, index, diff_rorp, mirror_dsrp):
"""Just update statistics"""
StatsITRB.fast_process(self, mirror_dsrp)
def branch_process(self, branch):
"""Update statistics with subdirectory results"""
if Globals.sleep_ratio is not None: Time.sleep(Globals.sleep_ratio)
self.add_file_stats(branch)
from log import *
from rpath import *
from robust import *
from rorpiter import *
import Globals, Time, MiscStats
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Convert an iterator to a file object and vice-versa"""
import cPickle, array
import Globals, C
class IterFileException(Exception): pass
class UnwrapFile:
"""Contains some basic methods for parsing a file containing an iter"""
def __init__(self, file):
self.file = file
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
def _get(self):
"""Return pair (type, data) next in line on the file
type is a single character which is either "o" for object, "f"
for file, "c" for a continution of a file, or None if no more
data can be read. Data is either the file's data, if type is
"c" or "f", or the actual object if the type is "o".
"""
header = self.file.read(8)
if not header: return None, None
if len(header) != 8:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf)
else: return type, buf
class IterWrappingFile(UnwrapFile):
"""An iterator generated from a file.
Initialize with a file type object, and then it will return the
elements of the file in order.
"""
def __init__(self, file):
UnwrapFile.__init__(self, file)
self.currently_in_file = None
def __iter__(self): return self
def next(self):
if self.currently_in_file:
self.currently_in_file.close() # no error checking by this point
type, data = self._get()
if not type: raise StopIteration
if type == "o": return data
elif type == "f":
file = IterVirtualFile(self, data)
if data: self.currently_in_file = file
else: self.currently_in_file = None
return file
else: raise IterFileException("Bad file type %s" % type)
class IterVirtualFile(UnwrapFile):
"""Another version of a pretend file
This is returned by IterWrappingFile when a file is embedded in
the main file that the IterWrappingFile is based around.
"""
def __init__(self, iwf, initial_data):
"""Initializer
initial_data is the data from the first block of the file.
iwf is the iter wrapping file that spawned this
IterVirtualFile.
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
self.buffer = initial_data
self.closed = None
def read(self, length = -1):
"""Read length bytes from the file, updating buffers as necessary"""
assert not self.closed
if self.iwf.currently_in_file:
if length >= 0:
while length >= len(self.buffer):
if not self.addtobuffer(): break
real_len = min(length, len(self.buffer))
else:
while 1:
if not self.addtobuffer(): break
real_len = len(self.buffer)
else: real_len = min(length, len(self.buffer))
return_val = self.buffer[:real_len]
self.buffer = self.buffer[real_len:]
return return_val
def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer"""
assert self.iwf.currently_in_file
type, data = self._get()
assert type == "c", "Type is %s instead of c" % type
if data:
self.buffer += data
return 1
else:
self.iwf.currently_in_file = None
return None
def close(self):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
self.buffer = ""
self.closed = 1
class FileWrappingIter:
"""A file interface wrapping around an iterator
This is initialized with an iterator, and then converts it into a
stream of characters. The object will evaluate as little of the
iterator as is necessary to provide the requested bytes.
The actual file is a sequence of marshaled objects, each preceded
by 8 bytes which identifies the following the type of object, and
specifies its length. File objects are not marshalled, but the
data is written in chunks of Globals.blocksize, and the following
blocks can identify themselves as continuations.
"""
def __init__(self, iter):
"""Initialize with iter"""
self.iter = iter
self.array_buf = array.array('c')
self.currently_in_file = None
self.closed = None
def read(self, length):
"""Return next length bytes in file"""
assert not self.closed
while len(self.array_buf) < length:
if not self.addtobuffer(): break
result = self.array_buf[:length].tostring()
del self.array_buf[:length]
return result
def addtobuffer(self):
"""Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator,
otherwise return true.
"""
array_buf = self.array_buf
if self.currently_in_file:
array_buf.fromstring("c")
array_buf.fromstring(self.addfromfile())
else:
try: currentobj = self.iter.next()
except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
array_buf.fromstring("f")
array_buf.fromstring(self.addfromfile())
else:
pickle = cPickle.dumps(currentobj, 1)
array_buf.fromstring("o")
array_buf.fromstring(C.long2str(long(len(pickle))))
array_buf.fromstring(pickle)
return 1
def addfromfile(self):
"""Read a chunk from the current file and return it"""
# Check file read for errors, buf = "" if find one
buf = Robust.check_common_error(self.read_error_handler,
self.currently_in_file.read,
[Globals.blocksize])
if not buf:
assert not self.currently_in_file.close()
self.currently_in_file = None
return C.long2str(long(len(buf))) + buf
def read_error_handler(self, exc, blocksize):
"""Log error when reading from file"""
Log("Error '%s' reading from fileobj, truncating" % (str(exc),), 2)
return ""
def _l2s_old(self, l):
"""Convert long int to string of 7 characters"""
s = ""
for i in range(7):
l, remainder = divmod(l, 256)
s = chr(remainder) + s
assert remainder == 0
return s
def close(self): self.closed = 1
class BufferedRead:
"""Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent
over a connection. Profiling said that arrays were faster than
strings here.
"""
def __init__(self, file):
self.file = file
self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize
def read(self, l = -1):
array_buf = self.array_buf
if l < 0: # Read as much as possible
result = array_buf.tostring() + self.file.read()
del array_buf[:]
return result
if len(array_buf) < l: # Try to make buffer at least as long as l
array_buf.fromstring(self.file.read(max(self.bufsize, l)))
result = array_buf[:l].tostring()
del array_buf[:l]
return result
def close(self): return self.file.close()
from log import *
from robust import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Define some lazy data structures and functions acting on them"""
from __future__ import generators
import os, stat, types
from static import *
import psyco
class Iter:
"""Hold static methods for the manipulation of lazy iterators"""
def filter(predicate, iterator):
"""Like filter in a lazy functional programming language"""
for i in iterator:
if predicate(i): yield i
def map(function, iterator):
"""Like map in a lazy functional programming language"""
for i in iterator: yield function(i)
def foreach(function, iterator):
"""Run function on each element in iterator"""
for i in iterator: function(i)
def cat(*iters):
"""Lazily concatenate iterators"""
for iter in iters:
for i in iter: yield i
def cat2(iter_of_iters):
"""Lazily concatenate iterators, iterated by big iterator"""
for iter in iter_of_iters:
for i in iter: yield i
def empty(iter):
"""True if iterator has length 0"""
for i in iter: return None
return 1
def equal(iter1, iter2, verbose = None, operator = lambda x, y: x == y):
"""True if iterator 1 has same elements as iterator 2
Use equality operator, or == if it is unspecified.
"""
for i1 in iter1:
try: i2 = iter2.next()
except StopIteration:
if verbose: print "End when i1 = %s" % (i1,)
return None
if not operator(i1, i2):
if verbose: print "%s not equal to %s" % (i1, i2)
return None
try: i2 = iter2.next()
except StopIteration: return 1
if verbose: print "End when i2 = %s" % (i2,)
return None
def Or(iter):
"""True if any element in iterator is true. Short circuiting"""
i = None
for i in iter:
if i: return i
return i
def And(iter):
"""True if all elements in iterator are true. Short circuiting"""
i = 1
for i in iter:
if not i: return i
return i
def len(iter):
"""Return length of iterator"""
i = 0
while 1:
try: iter.next()
except StopIteration: return i
i = i+1
def foldr(f, default, iter):
"""foldr the "fundamental list recursion operator"?"""
try: next = iter.next()
except StopIteration: return default
return f(next, Iter.foldr(f, default, iter))
def foldl(f, default, iter):
"""the fundamental list iteration operator.."""
while 1:
try: next = iter.next()
except StopIteration: return default
default = f(default, next)
def multiplex(iter, num_of_forks, final_func = None, closing_func = None):
"""Split a single iterater into a number of streams
The return val will be a list with length num_of_forks, each
of which will be an iterator like iter. final_func is the
function that will be called on each element in iter just as
it is being removed from the buffer. closing_func is called
when all the streams are finished.
"""
if num_of_forks == 2 and not final_func and not closing_func:
im2 = IterMultiplex2(iter)
return (im2.yielda(), im2.yieldb())
if not final_func: final_func = lambda i: None
if not closing_func: closing_func = lambda: None
# buffer is a list of elements that some iterators need and others
# don't
buffer = []
# buffer[forkposition[i]] is the next element yieled by iterator
# i. If it is -1, yield from the original iter
starting_forkposition = [-1] * num_of_forks
forkposition = starting_forkposition[:]
called_closing_func = [None]
def get_next(fork_num):
"""Return the next element requested by fork_num"""
if forkposition[fork_num] == -1:
try: buffer.insert(0, iter.next())
except StopIteration:
# call closing_func if necessary
if (forkposition == starting_forkposition and
not called_closing_func[0]):
closing_func()
called_closing_func[0] = None
raise StopIteration
for i in range(num_of_forks): forkposition[i] += 1
return_val = buffer[forkposition[fork_num]]
forkposition[fork_num] -= 1
blen = len(buffer)
if not (blen-1) in forkposition:
# Last position in buffer no longer needed
assert forkposition[fork_num] == blen-2
final_func(buffer[blen-1])
del buffer[blen-1]
return return_val
def make_iterator(fork_num):
while(1): yield get_next(fork_num)
return tuple(map(make_iterator, range(num_of_forks)))
MakeStatic(Iter)
class IterMultiplex2:
"""Multiplex an iterator into 2 parts
This is a special optimized case of the Iter.multiplex function,
used when there is no closing_func or final_func, and we only want
to split it into 2. By profiling, this is a time sensitive class.
"""
def __init__(self, iter):
self.a_leading_by = 0 # How many places a is ahead of b
self.buffer = []
self.iter = iter
def yielda(self):
"""Return first iterator"""
buf, iter = self.buffer, self.iter
while(1):
if self.a_leading_by >= 0: # a is in front, add new element
elem = iter.next() # exception will be passed
buf.append(elem)
else: elem = buf.pop(0) # b is in front, subtract an element
self.a_leading_by += 1
yield elem
def yieldb(self):
"""Return second iterator"""
buf, iter = self.buffer, self.iter
while(1):
if self.a_leading_by <= 0: # b is in front, add new element
elem = iter.next() # exception will be passed
buf.append(elem)
else: elem = buf.pop(0) # a is in front, subtract an element
self.a_leading_by -= 1
yield elem
class IterTreeReducer:
"""Tree style reducer object for iterator
The indicies of a RORPIter form a tree type structure. This class
can be used on each element of an iter in sequence and the result
will be as if the corresponding tree was reduced. This tries to
bridge the gap between the tree nature of directories, and the
iterator nature of the connection between hosts and the temporal
order in which the files are processed.
"""
def __init__(self, branch_class, branch_args):
"""ITR initializer"""
self.branch_class = branch_class
self.branch_args = branch_args
self.index = None
self.root_branch = branch_class(*branch_args)
self.branches = [self.root_branch]
def finish_branches(self, index):
"""Run Finish() on all branches index has passed
When we pass out of a branch, delete it and process it with
the parent. The innermost branches will be the last in the
list. Return None if we are out of the entire tree, and 1
otherwise.
"""
branches = self.branches
while 1:
to_be_finished = branches[-1]
base_index = to_be_finished.base_index
if base_index != index[:len(base_index)]:
# out of the tree, finish with to_be_finished
to_be_finished.call_end_proc()
del branches[-1]
if not branches: return None
branches[-1].branch_process(to_be_finished)
else: return 1
def add_branch(self, index):
"""Return branch of type self.branch_class, add to branch list"""
branch = self.branch_class(*self.branch_args)
branch.base_index = index
self.branches.append(branch)
return branch
def process_w_branch(self, branch, args):
"""Run start_process on latest branch"""
Robust.check_common_error(branch.on_error,
branch.start_process, args)
if not branch.caught_exception: branch.start_successful = 1
def Finish(self):
"""Call at end of sequence to tie everything up"""
while 1:
to_be_finished = self.branches.pop()
to_be_finished.call_end_proc()
if not self.branches: break
self.branches[-1].branch_process(to_be_finished)
def __call__(self, *args):
"""Process args, where args[0] is current position in iterator
Returns true if args successfully processed, false if index is
not in the current tree and thus the final result is
available.
Also note below we set self.index after doing the necessary
start processing, in case there is a crash in the middle.
"""
index = args[0]
if self.index is None:
self.root_branch.base_index = index
self.process_w_branch(self.root_branch, args)
self.index = index
return 1
if index <= self.index:
Log("Warning: oldindex %s >= newindex %s" % (self.index, index), 2)
return 1
if self.finish_branches(index) is None:
return None # We are no longer in the main tree
last_branch = self.branches[-1]
if last_branch.start_successful:
if last_branch.can_fast_process(*args):
last_branch.fast_process(*args)
else:
branch = self.add_branch(index)
self.process_w_branch(branch, args)
else: last_branch.log_prev_error(index)
self.index = index
return 1
psyco.bind(IterTreeReducer)
class ITRBranch:
"""Helper class for IterTreeReducer below
There are five stub functions below: start_process, end_process,
branch_process, can_fast_process, and fast_process. A class that
subclasses this one will probably fill in these functions to do
more.
It is important that this class be pickable, so keep that in mind
when subclassing (this is used to resume failed sessions).
"""
base_index = index = None
finished = None
caught_exception = start_successful = None
def call_end_proc(self):
"""Runs the end_process on self, checking for errors"""
if self.finished or not self.start_successful:
self.caught_exception = 1
if self.caught_exception: self.log_prev_error(self.base_index)
else: Robust.check_common_error(self.on_error, self.end_process)
self.finished = 1
def start_process(self, *args):
"""Do some initial processing (stub)"""
pass
def end_process(self):
"""Do any final processing before leaving branch (stub)"""
pass
def branch_process(self, branch):
"""Process a branch right after it is finished (stub)"""
assert branch.finished
pass
def can_fast_process(self, *args):
"""True if object can be processed without new branch (stub)"""
return None
def fast_process(self, *args):
"""Process args without new child branch (stub)"""
pass
def on_error(self, exc, *args):
"""This is run on any exception in start/end-process"""
self.caught_exception = 1
if args and args[0] and isinstance(args[0], tuple):
filename = os.path.join(*args[0])
elif self.index: filename = os.path.join(*self.index)
else: filename = "."
Log("Error '%s' processing %s" % (exc, filename), 2)
def log_prev_error(self, index):
"""Call function if no pending exception"""
Log("Skipping %s because of previous error" %
(os.path.join(*index),), 2)
# Put at bottom to prevent (viciously) circular module dependencies
from robust import *
from log import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Provides a high-level interface to some librsync functions
This is a python wrapper around the lower-level _librsync module,
which is written in C. The goal was to use C as little as possible...
"""
import _librsync, types, array
blocksize = _librsync.RS_JOB_BLOCKSIZE
class librsyncError(Exception):
"""Signifies error in internal librsync processing (bad signature, etc.)
underlying _librsync.librsyncError's are regenerated using this
class because the C-created exceptions are by default
unPickleable. There is probably a way to fix this in _librsync,
but this scheme was easier.
"""
pass
class LikeFile:
"""File-like object used by SigFile, DeltaFile, and PatchFile"""
mode = "rb"
# This will be replaced in subclasses by an object with
# appropriate cycle() method
maker = None
def __init__(self, infile, need_seek = None):
"""LikeFile initializer - zero buffers, set eofs off"""
self.check_file(infile, need_seek)
self.infile = infile
self.closed = self.infile_closed = None
self.inbuf = ""
self.outbuf = array.array('c')
self.eof = self.infile_eof = None
def check_file(self, file, need_seek = None):
"""Raise type error if file doesn't have necessary attributes"""
if not hasattr(file, "read"):
raise TypeError("Basis file must have a read() method")
if not hasattr(file, "close"):
raise TypeError("Basis file must have a close() method")
if need_seek and not hasattr(file, "seek"):
raise TypeError("Basis file must have a seek() method")
def read(self, length = -1):
"""Build up self.outbuf, return first length bytes"""
if length == -1:
while not self.eof: self._add_to_outbuf_once()
real_len = len(self.outbuf)
else:
while not self.eof and len(self.outbuf) < length:
self._add_to_outbuf_once()
real_len = min(length, len(self.outbuf))
return_val = self.outbuf[:real_len].tostring()
del self.outbuf[:real_len]
return return_val
def _add_to_outbuf_once(self):
"""Add one cycle's worth of output to self.outbuf"""
if not self.infile_eof: self._add_to_inbuf()
try: self.eof, len_inbuf_read, cycle_out = self.maker.cycle(self.inbuf)
except _librsync.librsyncError, e: raise librsyncError(str(e))
self.inbuf = self.inbuf[len_inbuf_read:]
self.outbuf.fromstring(cycle_out)
def _add_to_inbuf(self):
"""Make sure len(self.inbuf) >= blocksize"""
assert not self.infile_eof
while len(self.inbuf) < blocksize:
new_in = self.infile.read(blocksize)
if not new_in:
self.infile_eof = 1
assert not self.infile.close()
self.infile_closed = 1
break
self.inbuf += new_in
def close(self):
"""Close infile"""
if not self.infile_closed: assert not self.infile.close()
self.closed = 1
class SigFile(LikeFile):
"""File-like object which incrementally generates a librsync signature"""
def __init__(self, infile):
"""SigFile initializer - takes basis file
basis file only needs to have read() and close() methods. It
will be closed when we come to the end of the signature.
"""
LikeFile.__init__(self, infile)
try: self.maker = _librsync.new_sigmaker()
except _librsync.librsyncError, e: raise librsyncError(str(e))
class DeltaFile(LikeFile):
"""File-like object which incrementally generates a librsync delta"""
def __init__(self, signature, new_file):
"""DeltaFile initializer - call with signature and new file
Signature can either be a string or a file with read() and
close() methods. New_file also only needs to have read() and
close() methods. It will be closed when self is closed.
"""
LikeFile.__init__(self, new_file)
if type(signature) is types.StringType: sig_string = signature
else:
self.check_file(signature)
sig_string = signature.read()
assert not signature.close()
try: self.maker = _librsync.new_deltamaker(sig_string)
except _librsync.librsyncError, e: raise librsyncError(str(e))
class PatchedFile(LikeFile):
"""File-like object which applies a librsync delta incrementally"""
def __init__(self, basis_file, delta_file):
"""PatchedFile initializer - call with basis delta
Here basis_file must be a true Python file, because we may
need to seek() around in it a lot, and this is done in C.
delta_file only needs read() and close() methods.
"""
LikeFile.__init__(self, delta_file)
if type(basis_file) is not types.FileType:
raise TypeError("basis_file must be a (true) file")
try: self.maker = _librsync.new_patchmaker(basis_file)
except _librsync.librsyncError, e: raise librsyncError(str(e))
class SigGenerator:
"""Calculate signature.
Input and output is same as SigFile, but the interface is like md5
module, not filelike object
"""
def __init__(self):
"""Return new signature instance"""
try: self.sig_maker = _librsync.new_sigmaker()
except _librsync.librsyncError, e: raise librsyncError(str(e))
self.gotsig = None
self.buffer = ""
self.sig_string = ""
def update(self, buf):
"""Add buf to data that signature will be calculated over"""
if self.gotsig:
raise librsyncError("SigGenerator already provided signature")
self.buffer += buf
while len(self.buffer) >= blocksize:
if self.process_buffer():
raise librsyncError("Premature EOF received from sig_maker")
def process_buffer(self):
"""Run self.buffer through sig_maker, add to self.sig_string"""
try: eof, len_buf_read, cycle_out = self.sig_maker.cycle(self.buffer)
except _librsync.librsyncError, e: raise librsyncError(str(e))
self.buffer = self.buffer[len_buf_read:]
self.sig_string += cycle_out
return eof
def getsig(self):
"""Return signature over given data"""
while not self.process_buffer(): pass # keep running until eof
return self.sig_string
#!/usr/bin/env python
"""Demonstrate a memory leak in pysync/librsync"""
import os, _librsync
from librsync import *
os.chdir("/tmp")
# Write 2 1 byte files
afile = open("a", "wb")
afile.write("a")
afile.close()
efile = open("e", "wb")
efile.write("e")
efile.close()
def copy(infileobj, outpath):
outfile = open(outpath, "wb")
while 1:
buf = infileobj.read(32768)
if not buf: break
outfile.write(buf)
assert not outfile.close()
assert not infileobj.close()
def test_cycle():
for i in xrange(100000):
sm = _librsync.new_sigmaker()
sm.cycle("a")
def main_test():
for i in xrange(100000):
# Write signature file
afile = open("a", "rb")
copy(SigFile(afile), "sig")
# Write delta file
efile = open("e", "r")
sigfile = open("sig", "rb")
copy(DeltaFile(sigfile, efile), "delta")
# Write patched file
afile = open("e", "rb")
deltafile = open("delta", "rb")
copy(PatchedFile(afile, deltafile), "a.out")
main_test()
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Manage logging, displaying and recording messages with required verbosity"""
import time, sys, traceback, types
class LoggerError(Exception): pass
class Logger:
"""All functions which deal with logging"""
def __init__(self):
self.log_file_open = None
self.log_file_local = None
self.verbosity = self.term_verbosity = 3
# termverbset is true if the term_verbosity has been explicity set
self.termverbset = None
def setverbosity(self, verbosity_string):
"""Set verbosity levels. Takes a number string"""
try: self.verbosity = int(verbosity_string)
except ValueError:
Log.FatalError("Verbosity must be a number, received '%s' "
"instead." % verbosity_string)
if not self.termverbset: self.term_verbosity = self.verbosity
def setterm_verbosity(self, termverb_string):
"""Set verbosity to terminal. Takes a number string"""
try: self.term_verbosity = int(termverb_string)
except ValueError:
Log.FatalError("Terminal verbosity must be a number, received "
"'%s' instead." % termverb_string)
self.termverbset = 1
def open_logfile(self, rpath):
"""Inform all connections of an open logfile.
rpath.conn will write to the file, and the others will pass
write commands off to it.
"""
assert not self.log_file_open
rpath.conn.Log.open_logfile_local(rpath)
for conn in Globals.connections:
conn.Log.open_logfile_allconn(rpath.conn)
def open_logfile_allconn(self, log_file_conn):
"""Run on all connections to signal log file is open"""
self.log_file_open = 1
self.log_file_conn = log_file_conn
def open_logfile_local(self, rpath):
"""Open logfile locally - should only be run on one connection"""
assert rpath.conn is Globals.local_connection
try: self.logfp = rpath.open("a")
except (OSError, IOError), e:
raise LoggerError("Unable to open logfile %s: %s"
% (rpath.path, e))
self.log_file_local = 1
self.logrp = rpath
def close_logfile(self):
"""Close logfile and inform all connections"""
if self.log_file_open:
for conn in Globals.connections:
conn.Log.close_logfile_allconn()
self.log_file_conn.Log.close_logfile_local()
def close_logfile_allconn(self):
"""Run on every connection"""
self.log_file_open = None
def close_logfile_local(self):
"""Run by logging connection - close logfile"""
assert self.log_file_conn is Globals.local_connection
assert not self.logfp.close()
self.log_file_local = None
def format(self, message, verbosity):
"""Format the message, possibly adding date information"""
if verbosity < 9: return message + "\n"
else: return "%s %s\n" % (time.asctime(time.localtime(time.time())),
message)
def __call__(self, message, verbosity):
"""Log message that has verbosity importance
message can be a string, which is logged as-is, or a function,
which is then called and should return the string to be
logged. We do it this way in case producing the string would
take a significant amount of CPU.
"""
if verbosity > self.verbosity and verbosity > self.term_verbosity:
return
if not type(message) is types.StringType:
assert type(message) is types.FunctionType
message = message()
if verbosity <= self.verbosity: self.log_to_file(message)
if verbosity <= self.term_verbosity:
self.log_to_term(message, verbosity)
def log_to_file(self, message):
"""Write the message to the log file, if possible"""
if self.log_file_open:
if self.log_file_local:
self.logfp.write(self.format(message, self.verbosity))
else: self.log_file_conn.Log.log_to_file(message)
def log_to_term(self, message, verbosity):
"""Write message to stdout/stderr"""
if verbosity <= 2 or Globals.server: termfp = sys.stderr
else: termfp = sys.stdout
termfp.write(self.format(message, self.term_verbosity))
def conn(self, direction, result, req_num):
"""Log some data on the connection
The main worry with this function is that something in here
will create more network traffic, which will spiral to
infinite regress. So, for instance, logging must only be done
to the terminal, because otherwise the log file may be remote.
"""
if self.term_verbosity < 9: return
if type(result) is types.StringType: result_repr = repr(result)
else: result_repr = str(result)
if Globals.server: conn_str = "Server"
else: conn_str = "Client"
self.log_to_term("%s %s (%d): %s" %
(conn_str, direction, req_num, result_repr), 9)
def FatalError(self, message):
self("Fatal Error: " + message, 1)
Main.cleanup()
sys.exit(1)
def exception_to_string(self, arglist = []):
"""Return string version of current exception plus what's in arglist"""
type, value, tb = sys.exc_info()
s = ("Exception '%s' raised of class '%s':\n%s" %
(value, type, "".join(traceback.format_tb(tb))))
if arglist:
s += "__Arguments:\n" + "\n".join(map(str, arglist))
return s
def exception(self, only_terminal = 0, verbosity = 5):
"""Log an exception and traceback
If only_terminal is None, log normally. If it is 1, then only
log to disk if log file is local (self.log_file_open = 1). If
it is 2, don't log to disk at all.
"""
assert only_terminal in (0, 1, 2)
if (only_terminal == 0 or
(only_terminal == 1 and self.log_file_open)):
logging_func = self.__call__
else: logging_func = self.log_to_term
logging_func(self.exception_to_string(), verbosity)
Log = Logger()
import Globals, Main
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""list, delete, and otherwise manage increments"""
from __future__ import generators
from static import *
from log import *
import Globals, Time
class ManageException(Exception): pass
class Manage:
def get_file_type(rp):
"""Returns one of "regular", "directory", "missing", or "special"."""
if not rp.lstat(): return "missing"
elif rp.isdir(): return "directory"
elif rp.isreg(): return "regular"
else: return "special"
def get_inc_type(inc):
"""Return file type increment represents"""
assert inc.isincfile()
type = inc.getinctype()
if type == "dir": return "directory"
elif type == "diff": return "regular"
elif type == "missing": return "missing"
elif type == "snapshot": return Manage.get_file_type(inc)
else: assert None, "Unknown type %s" % (type,)
def describe_incs_parsable(incs, mirror_time, mirrorrp):
"""Return a string parsable by computer describing the increments
Each line is a time in seconds of the increment, and then the
type of the file. It will be sorted oldest to newest. For example:
10000 regular
20000 directory
30000 special
40000 missing
50000 regular <- last will be the current mirror
"""
incpairs = [(Time.stringtotime(inc.getinctime()), inc) for inc in incs]
incpairs.sort()
result = ["%s %s" % (time, Manage.get_inc_type(inc))
for time, inc in incpairs]
result.append("%s %s" % (mirror_time, Manage.get_file_type(mirrorrp)))
return "\n".join(result)
def describe_incs_human(incs, mirror_time, mirrorrp):
"""Return a string describing all the the root increments"""
incpairs = [(Time.stringtotime(inc.getinctime()), inc) for inc in incs]
incpairs.sort()
result = ["Found %d increments:" % len(incpairs)]
for time, inc in incpairs:
result.append(" %s %s" %
(inc.dirsplit()[1], Time.timetopretty(time)))
result.append("Current mirror: %s" % Time.timetopretty(mirror_time))
return "\n".join(result)
def delete_earlier_than(baserp, time):
"""Deleting increments older than time in directory baserp
time is in seconds. It will then delete any empty directories
in the tree. To process the entire backup area, the
rdiff-backup-data directory should be the root of the tree.
"""
baserp.conn.Manage.delete_earlier_than_local(baserp, time)
def delete_earlier_than_local(baserp, time):
"""Like delete_earlier_than, but run on local connection for speed"""
assert baserp.conn is Globals.local_connection
def yield_files(rp):
yield rp
if rp.isdir():
for filename in rp.listdir():
for sub_rp in yield_files(rp.append(filename)):
yield sub_rp
for rp in yield_files(baserp):
if ((rp.isincfile() and
Time.stringtotime(rp.getinctime()) < time) or
(rp.isdir() and not rp.listdir())):
Log("Deleting increment file %s" % rp.path, 5)
rp.delete()
MakeStatic(Manage)
class IncObj:
"""Increment object - represent a completed increment"""
def __init__(self, incrp):
"""IncObj initializer
incrp is an RPath of a path like increments.TIMESTR.dir
standing for the root of the increment.
"""
if not incrp.isincfile():
raise ManageException("%s is not an inc file" % incrp.path)
self.incrp = incrp
self.time = Time.stringtotime(incrp.getinctime())
def getbaserp(self):
"""Return rp of the incrp without extensions"""
return self.incrp.getincbase()
def pretty_time(self):
"""Return a formatted version of inc's time"""
return Time.timetopretty(self.time)
def full_description(self):
"""Return string describing increment"""
s = ["Increment file %s" % self.incrp.path,
"Date: %s" % self.pretty_time()]
return "\n".join(s)
#include <stdio.h>
#include <rsync.h>
main()
{
FILE *basis_file, *sig_file;
char filename[50];
rs_stats_t stats;
rs_result result;
long i;
for(i=0; i<=100000; i++) {
basis_file = fopen("a", "r");
sig_file = fopen("sig", "w");
result = rs_sig_file(basis_file, sig_file,
RS_DEFAULT_BLOCK_LEN, RS_DEFAULT_STRONG_LEN,
&stats);
if (result != RS_DONE) exit(result);
fclose(basis_file);
fclose(sig_file);
}
}
#!/usr/bin/env python
"""Like rdiff, but written in python and uses librsync module.
Useful for benchmarking and testing of librsync and _librsync.
"""
import librsync, sys
blocksize = 32768
def makesig(inpath, outpath):
"""Write a signature of inpath at outpath"""
sf = librsync.SigFile(open(inpath, "rb"))
fout = open(outpath, "wb")
while 1:
buf = sf.read(blocksize)
if not buf: break
fout.write(buf)
assert not sf.close()
assert not fout.close()
def makedelta(sigpath, newpath, deltapath):
"""Write delta at deltapath using signature at sigpath"""
df = librsync.DeltaFile(open(sigpath, "rb"), open(newpath, "rb"))
fout = open(deltapath, "wb")
while 1:
buf = df.read(blocksize)
if not buf: break
fout.write(buf)
assert not df.close()
assert not fout.close()
def makepatch(basis_path, delta_path, new_path):
"""Write new given basis and delta"""
pf = librsync.PatchedFile(open(basis_path, "rb"), open(delta_path, "rb"))
fout = open(new_path, "wb")
while 1:
buf = pf.read(blocksize)
if not buf: break
fout.write(buf)
assert not pf.close()
assert not fout.close()
if sys.argv[1] == "signature":
makesig(sys.argv[2], sys.argv[3])
elif sys.argv[1] == "delta":
makedelta(sys.argv[2], sys.argv[3], sys.argv[4])
elif sys.argv[1] == "patch":
makepatch(sys.argv[2], sys.argv[3], sys.argv[4])
else: assert 0, "Bad mode argument %s" % (sys.argv[1],)
#!/usr/bin/env python
"""Run rdiff-backup with profiling on
Same as rdiff-backup but runs profiler, and prints profiling
statistics afterwards.
"""
__no_execute__ = 1
import sys, rdiff_backup.Main, profile, pstats
profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
"profile-output")
p = pstats.Stats("profile-output")
p.sort_stats('time')
p.print_stats(40)
#p.print_callers(20)
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Read increment files and restore to original"""
from __future__ import generators
import tempfile
from static import *
class RestoreError(Exception): pass
class Restore:
def Restore(inc_rpath, mirror, target, rest_time):
"""Recursively restore inc_rpath and mirror to target at rest_time
Like restore_recusive below, but with a more friendly
interface (it converts to DSRPaths if necessary, finds the inc
files with the appropriate base, and makes rid).
rest_time is the time in seconds to restore to;
inc_rpath should not be the name of an increment file, but the
increment file shorn of its suffixes and thus should have the
same index as mirror.
"""
if not isinstance(mirror, DSRPath): mirror = DSRPath(1, mirror)
if not isinstance(target, DSRPath): target = DSRPath(None, target)
mirror_time = Restore.get_mirror_time()
rest_time = Restore.get_rest_time(rest_time, mirror_time)
inc_list = Restore.get_inclist(inc_rpath)
rid = RestoreIncrementData(inc_rpath.index, inc_rpath, inc_list)
rid.sortincseq(rest_time, mirror_time)
Restore.check_hardlinks(rest_time)
Restore.restore_recursive(inc_rpath.index, mirror, rid, target,
rest_time, mirror_time)
def get_mirror_time():
"""Return the time (in seconds) of latest mirror"""
current_mirror_incs = \
Restore.get_inclist(Globals.rbdir.append("current_mirror"))
if not current_mirror_incs:
Log.FatalError("Could not get time of current mirror")
elif len(current_mirror_incs) > 1:
Log("Warning, two different dates for current mirror found", 2)
return Time.stringtotime(current_mirror_incs[0].getinctime())
def get_rest_time(old_rest_time, mirror_time):
"""If old_rest_time is between two increments, return older time
There is a slightly tricky reason for doing this: The rest of
the code just ignores increments that are older than
rest_time. But sometimes we want to consider the very next
increment older than rest time, because rest_time will be
between two increments, and what was actually on the mirror
side will correspond to the older one.
So here we assume all rdiff-backup events were recorded in
"increments" increments, and if its in-between we pick the
older one here.
"""
base_incs = Restore.get_inclist(Globals.rbdir.append("increments"))
if not base_incs: return old_rest_time
inctimes = [Time.stringtotime(inc.getinctime()) for inc in base_incs]
inctimes.append(mirror_time)
older_times = filter(lambda time: time <= old_rest_time, inctimes)
if older_times: return max(older_times)
else: # restore time older than oldest increment, just return that
return min(inctimes)
def get_inclist(inc_rpath):
"""Returns increments with given base"""
dirname, basename = inc_rpath.dirsplit()
parent_dir = RPath(inc_rpath.conn, dirname, ())
if not parent_dir.isdir(): return [] # inc directory not created yet
index = inc_rpath.index
if index:
get_inc_ext = lambda filename: \
RPath(inc_rpath.conn, inc_rpath.base,
inc_rpath.index[:-1] + (filename,))
else: get_inc_ext = lambda filename: \
RPath(inc_rpath.conn, os.path.join(dirname, filename))
inc_list = []
for filename in parent_dir.listdir():
inc = get_inc_ext(filename)
if inc.isincfile() and inc.getincbase_str() == basename:
inc_list.append(inc)
return inc_list
def check_hardlinks(rest_time):
"""Check for hard links and enable hard link support if found"""
if (Globals.preserve_hardlinks != 0 and
Hardlink.retrieve_final(rest_time)):
Log("Hard link information found, attempting to preserve "
"hard links.", 5)
SetConnections.UpdateGlobal('preserve_hardlinks', 1)
else: SetConnections.UpdateGlobal('preserve_hardlinks', None)
def restore_recursive(index, mirror, rid, target, time, mirror_time):
"""Recursive restore function.
rid is a RestoreIncrementData object whose inclist is already
sortedincseq'd, and target is the dsrp to restore to.
Note that target may have a different index than mirror and
rid, because we may be restoring a file whose index is, say
('foo','bar') to a target whose path does not contain
"foo/bar".
"""
assert isinstance(mirror, DSRPath) and isinstance(target, DSRPath)
assert mirror.index == rid.index
mirror_finalizer = IterTreeReducer(DestructiveSteppingFinalizer, ())
target_finalizer = IterTreeReducer(DestructiveSteppingFinalizer, ())
for rcd in Restore.yield_rcds(rid.index, mirror, rid,
target, time, mirror_time):
rcd.RestoreFile()
if rcd.mirror: mirror_finalizer(rcd.index, rcd.mirror)
target_finalizer(rcd.target.index, rcd.target)
target_finalizer.Finish()
mirror_finalizer.Finish()
def yield_rcds(index, mirrorrp, rid, target, rest_time, mirror_time):
"""Iterate RestoreCombinedData objects starting with given args
rid is a RestoreCombinedData object. target is an rpath where
the created file should go.
In this case the "mirror" directory is treated as the source,
and we are actually copying stuff onto what Select considers
the source directory.
"""
select_result = Globals.select_mirror.Select(target)
if select_result == 0: return
if mirrorrp and not Globals.select_source.Select(mirrorrp):
mirrorrp = None
rcd = RestoreCombinedData(rid, mirrorrp, target)
if mirrorrp and mirrorrp.isdir() or \
rid and rid.inc_rpath and rid.inc_rpath.isdir():
sub_rcds = Restore.yield_sub_rcds(index, mirrorrp, rid,
target, rest_time, mirror_time)
else: sub_rcds = None
if select_result == 1:
yield rcd
if sub_rcds:
for sub_rcd in sub_rcds: yield sub_rcd
elif select_result == 2:
if sub_rcds:
try: first = sub_rcds.next()
except StopIteration: return # no tuples found inside, skip
yield rcd
yield first
for sub_rcd in sub_rcds: yield sub_rcd
def yield_sub_rcds(index, mirrorrp, rid, target, rest_time, mirror_time):
"""Yield collated tuples from inside given args"""
if not Restore.check_dir_exists(mirrorrp, rid): return
mirror_iter = Restore.yield_mirrorrps(mirrorrp)
rid_iter = Restore.yield_rids(rid, rest_time, mirror_time)
for indexed_tup in RORPIter.CollateIterators(mirror_iter, rid_iter):
index = indexed_tup.index
new_mirrorrp, new_rid = indexed_tup
for rcd in Restore.yield_rcds(index, new_mirrorrp,
new_rid, target.append(index[-1]), rest_time, mirror_time):
yield rcd
def check_dir_exists(mirrorrp, rid):
"""Return true if target should be a directory"""
if rid and rid.inc_list:
# Incs say dir if last (earliest) one is a dir increment
return rid.inc_list[-1].getinctype() == "dir"
elif mirrorrp: return mirrorrp.isdir() # if no incs, copy mirror
else: return None
def yield_mirrorrps(mirrorrp):
"""Yield mirrorrps underneath given mirrorrp"""
if mirrorrp and mirrorrp.isdir():
if Globals.quoting_enabled:
for rp in FilenameMapping.get_quoted_dir_children(mirrorrp):
yield rp
else:
dirlist = mirrorrp.listdir()
dirlist.sort()
for filename in dirlist: yield mirrorrp.append(filename)
def yield_rids(rid, rest_time, mirror_time):
"""Yield RestoreIncrementData objects within given rid dir
If the rid doesn't correspond to a directory, don't yield any
elements. If there are increments whose corresponding base
doesn't exist, the first element will be None. All the rpaths
involved correspond to files in the increment directory.
"""
if not rid or not rid.inc_rpath or not rid.inc_rpath.isdir(): return
rid_dict = {} # dictionary of basenames:rids
dirlist = rid.inc_rpath.listdir()
if Globals.quoting_enabled:
dirlist = [FilenameMapping.unquote(fn) for fn in dirlist]
def affirm_dict_indexed(basename):
"""Make sure the rid dictionary has given basename as key"""
if not rid_dict.has_key(basename):
rid_dict[basename] = RestoreIncrementData(
rid.index + (basename,), None, []) # init with empty rid
def add_to_dict(filename):
"""Add filename to the inc tuple dictionary"""
rp = rid.inc_rpath.append(filename)
if Globals.quoting_enabled: rp.quote_path()
if rp.isincfile() and rp.getinctype() != 'data':
basename = rp.getincbase_str()
affirm_dict_indexed(basename)
rid_dict[basename].inc_list.append(rp)
elif rp.isdir():
affirm_dict_indexed(filename)
rid_dict[filename].inc_rpath = rp
for filename in dirlist: add_to_dict(filename)
keys = rid_dict.keys()
keys.sort()
# sortincseq now to avoid descending .missing directories later
for key in keys:
rid = rid_dict[key]
if rid.inc_rpath or rid.inc_list:
rid.sortincseq(rest_time, mirror_time)
yield rid
MakeStatic(Restore)
class RestoreIncrementData:
"""Contains information about a specific index from the increments dir
This is just a container class, used because it would be easier to
work with than an IndexedTuple.
"""
def __init__(self, index, inc_rpath, inc_list):
self.index = index
self.inc_rpath = inc_rpath
self.inc_list = inc_list
def sortincseq(self, rest_time, mirror_time):
"""Sort self.inc_list sequence, throwing away irrelevant increments"""
if not self.inc_list or rest_time >= mirror_time:
self.inc_list = []
return
newer_incs = self.get_newer_incs(rest_time, mirror_time)
i = 0
while(i < len(newer_incs)):
# Only diff type increments require later versions
if newer_incs[i].getinctype() != "diff": break
i = i+1
self.inc_list = newer_incs[:i+1]
self.inc_list.reverse() # return in reversed order (latest first)
def get_newer_incs(self, rest_time, mirror_time):
"""Return list of newer incs sorted by time (increasing)
Also discard increments older than rest_time (rest_time we are
assuming is the exact time rdiff-backup was run, so no need to
consider the next oldest increment or any of that)
"""
incpairs = []
for inc in self.inc_list:
time = Time.stringtotime(inc.getinctime())
if time >= rest_time: incpairs.append((time, inc))
incpairs.sort()
return [pair[1] for pair in incpairs]
class RestoreCombinedData:
"""Combine index information from increment and mirror directories
This is similar to RestoreIncrementData but has mirror information
also.
"""
def __init__(self, rid, mirror, target):
"""Init - set values from one or both if they exist
mirror and target are DSRPaths of the corresponding files in
the mirror and target directory respectively. rid is a
RestoreIncrementData as defined above
"""
if rid:
self.index = rid.index
self.inc_rpath = rid.inc_rpath
self.inc_list = rid.inc_list
if mirror:
self.mirror = mirror
assert mirror.index == self.index
else: self.mirror = None
elif mirror:
self.index = mirror.index
self.mirror = mirror
self.inc_list = []
self.inc_rpath = None
else: assert None, "neither rid nor mirror given"
self.target = target
def RestoreFile(self):
"""Non-recursive restore function """
if not self.inc_list and not (self.mirror and self.mirror.lstat()):
return # no increments were applicable
self.log()
if self.restore_hardlink(): return
if not self.inc_list or self.inc_list[0].getinctype() == "diff":
assert self.mirror and self.mirror.lstat(), \
"No base to go with incs for %s" % self.target.path
RPath.copy_with_attribs(self.mirror, self.target)
for inc in self.inc_list: self.applyinc(inc, self.target)
def log(self):
"""Log current restore action"""
inc_string = ','.join([inc.path for inc in self.inc_list])
Log("Restoring %s with increments %s to %s" %
(self.mirror and self.mirror.path,
inc_string, self.target.path), 5)
def restore_hardlink(self):
"""Hard link target and return true if hard linking appropriate"""
if (Globals.preserve_hardlinks and
Hardlink.restore_link(self.index, self.target)):
RPath.copy_attribs(self.inc_list and self.inc_list[-1] or
self.mirror, self.target)
return 1
return None
def applyinc(self, inc, target):
"""Apply increment rp inc to targetrp target"""
Log("Applying increment %s to %s" % (inc.path, target.path), 6)
inctype = inc.getinctype()
if inctype == "diff":
if not target.lstat():
raise RestoreError("Bad increment sequence at " + inc.path)
Rdiff.patch_action(target, inc,
delta_compressed = inc.isinccompressed()
).execute()
elif inctype == "dir":
if not target.isdir():
if target.lstat():
raise RestoreError("File %s already exists" % target.path)
target.mkdir()
elif inctype == "missing": return
elif inctype == "snapshot":
if inc.isinccompressed():
target.write_from_fileobj(inc.open("rb", compress = 1))
else: RPath.copy(inc, target)
else: raise RestoreError("Unknown inctype %s" % inctype)
RPath.copy_attribs(inc, target)
from log import *
from destructive_stepping import *
from rpath import *
from rorpiter import *
import Globals, Time, Rdiff, Hardlink, FilenameMapping, SetConnections
from __future__ import generators
import marshal, sha, types
execfile("iterfile.py")
#######################################################################
#
# rlist - Define the CachingIter, and sig/diff/patch ops on iterators
#
class CachingIter:
"""Cache parts of an iter using a list
Turn an iter into something that you can prepend elements into,
and also read from without apparently changing the state.
"""
def __init__(self, iter_or_list):
if type(iter_or_list) is types.ListType:
self.iter = iter(iter_or_list)
else: self.iter = iter_or_list
self.next = self.iter.next
self.head = []
def __iter__(self): return self
def _next(self):
"""Take elements from the head list
When there are elements waiting before the main iterator, this
is the next function. If not, iter.next returns to being next.
"""
head = self.head
a = head[0]
del head[0]
if not head: self.next = self.iter.next
return a
def nextrange(self, m):
"""Return next m elements in list"""
l = head[:m]
del head[:m]
for i in xrange(m - len(l)): l.append(self.iter.next())
return l
def peek(self):
"""Return next element without removing it from iterator"""
n = self.next()
self.push(n)
return n
def push(self, elem):
"""Insert an element into the iterator at the beginning"""
if not self.head: self.next = self._next
self.head.insert(0, elem)
def pushrange(self, elem_list):
"""Insert list of multiple elements at the beginning"""
if not self.head: self.next = self._next
self.head[:0] = elem_list
def cache(self, m):
"""Move next m elements from iter to internal list
If m is None, append the entire rest of the iterator.
"""
h, it = self.head, self.iter
if m is None:
for i in it: h.append(i)
else:
for i in xrange(m): h.append(it.next())
def __getitem__(self, key):
"""Support a[i:j] style notation. Non destructive"""
if type(key) is types.SliceType:
if key.stop > len(self.head): self.cache(key.stop - len(self.head))
return self.head[key.start, key.stop]
else:
if key >= len(self.head): self.cache(key + 1 - len(self.head))
return self.head[key]
class RListDelta:
"""Note a difference from one iterator (A) to another (B)
The min, max pairs are indicies which stand for the half-open
interval (min, max], and elemlist is a list of all the elements in
A which fall within this interval.
These are produced by the function RList.Deltas(...)
"""
def __init__(self, (min, max), elemlist):
self.min, self.max = min, max
self.elemlist = elemlist
class RList:
"""Tools for signatures, diffing, and patching an iterator
This class requires that the iterators involved are yielding
objects that have .index and .data attributes. Two objects with
the same .data attribute are supposed to be equivalent. The
iterator must also yield the objects in increasing order with
respect to the .index attribute.
"""
blocksize = 100
def Signatures(iter):
"""Return iterator of signatures from stream of pairs
Each signature is an ordered pair (last index sig applies to,
SHA digest of data)
"""
i, s = 0, sha.new()
for iter_elem in iter:
s.update(marshal.dumps(iter_elem.data))
i = i+1
if i == RList.blocksize:
yield (iter_elem.index, s.digest())
i, s = 0, sha.new()
if i != 0: yield (iter_elem.index, s.digest())
def sig_one_block(iter_or_list):
"""Return the digest portion of a signature on given list"""
s = sha.new()
for iter_elem in iter_or_list: s.update(marshal.dumps(iter_elem.data))
return s.digest()
def Deltas(remote_sigs, iter):
"""Return iterator of Delta objects that bring iter to remote"""
def get_before(index, iter):
"""Return elements in iter whose index is before or equal index
iter needs to be pushable
"""
l = []
while 1:
try: iter_elem = iter.next()
except StopIteration: return l
if iter_elem.index > index: break
l.append(iter_elem)
iter.push(iter_elem)
return l
if not isinstance(iter, CachingIter): iter = CachingIter(iter)
oldindex = None
for (rs_index, rs_digest) in remote_sigs:
l = get_before(rs_index, iter)
if rs_digest != RList.sig_one_block(l):
yield RListDelta((oldindex, rs_index), l)
oldindex = rs_index
def patch_once(basis, delta):
"""Apply one delta to basis to return original iterator
This returns original iterator up to and including the max range
of delta, then stop. basis should be pushable.
"""
# Return elements of basis until start of delta range
for basis_elem in basis:
if basis_elem.index > delta.min:
basis.push(basis_elem)
break
yield basis_elem
# Yield elements of delta...
for elem in delta.elemlist: yield elem
# Finally, discard basis until end of delta range
for basis_elem in basis:
if basis_elem.index > delta.max:
basis.push(basis_elem)
break
def Patch(basis, deltas):
"""Apply a delta stream to basis iterator, yielding original"""
if not isinstance(basis, CachingIter): basis = CachingIter(basis)
for d in deltas:
for elem in RList.patch_once(basis, d): yield elem
for elem in basis: yield elem
def get_difference_once(basis, delta):
"""From one delta, find differences from basis
Will return pairs (basis_elem, new_elem) where basis_elem is
the element from the basis iterator and new_elem is the
element from the other iterator. If either is missing None
will take its place. If both are present iff two have the
same index.
"""
# Discard any elements of basis before delta starts
for basis_elem in basis:
if basis_elem.index > delta.min:
basis.push(basis_elem)
break
# In range compare each one by one
di, boverflow, doverflow = 0, None, None
while 1:
# Set indicies and data, or mark if at end of range already
try:
basis_elem = basis.next()
if basis_elem.index > delta.max:
basis.push(basis_elem)
boverflow = 1
except StopIteration: boverflow = 1
if di >= len(delta.elemlist): doverflow = 1
else: delta_elem = delta.elemlist[di]
if boverflow and doverflow: break
elif boverflow:
yield (None, delta_elem)
di = di+1
elif doverflow: yield (basis_elem, None)
# Now can assume that everything is in range
elif basis_elem.index > delta_elem.index:
yield (None, delta_elem)
basis.push(basis_elem)
di = di+1
elif basis_elem.index == delta_elem.index:
if basis_elem.data != delta_elem.data:
yield (basis_elem, delta_elem)
di = di+1
else: yield (basis_elem, None)
def Dissimilar(basis, deltas):
"""Return iter of differences from delta iter and basis iter"""
if not isinstance(basis, CachingIter): basis = CachingIter(basis)
for d in deltas:
for triple in RList.get_difference_once(basis, d): yield triple
MakeStatic(RList)
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Prevent mirror from being corrupted; handle errors
Ideally no matter an instance of rdiff-backup gets aborted, no
information should get lost. The target directory should be left in a
coherent state, and later instances of rdiff-backup should clean
things up so there is no sign that anything ever got aborted or
failed.
Thus, files should be updated in an atomic way as possible. Each file
should be updated (and the corresponding diff files written) or not,
and it should be clear which happened. In general, I don't think this
is possible, since the creation of the diff files and the changing of
updated files cannot be guarateed to happen together. It is possible,
I think, to record various information to files which would allow a
later process to figure out what the last operation was, but this
would add several file operations to the processing of each file, and
I don't think, would be a good tradeoff.
The compromise reached here is that diff files should be created just
before the mirror files are updated, and each file update should be
done with a rename operation on a file in the same directory.
Furthermore, every once in a while, rdiff-backup will record which
file it just finished processing. If any fatal errors are caught, it
will also record the last processed file. Future instances may not
know exactly when the previous instance was aborted, but they will be
able to narrow down the possibilities.
"""
import tempfile, errno, signal, cPickle, C
from static import *
class RobustAction:
"""Represents a file operation to be accomplished later"""
def __init__(self, init_thunk, final_func, error_handler):
"""RobustAction initializer
All the thunks are functions whose return value will be
ignored. init_thunk should not make any irreversible changes
but prepare for the writing of the important data. final_func
should be as short as possible and do the real work.
error_handler is run if there is an error in init_thunk or
final_func. Errors in init_thunk should be corrected by
error_handler as if nothing had been run in the first place.
init_thunk takes no arguments.
final_thunk takes the return value of init_thunk as its
argument, and its return value is returned by execute().
error_handler takes three arguments: the exception, a value
which is true just in case self.init_thunk ran correctly, and
a value which will be the return value of init_thunk if it ran
correctly.
"""
self.init_thunk = init_thunk or self.default_init_thunk
self.final_func = final_func or self.default_final_func
self.error_handler = error_handler or self.default_error_handler
def execute(self):
"""Actually run the operation"""
ran_init_thunk = None
try:
init_val = self.init_thunk()
ran_init_thunk = 1
return self.final_func(init_val)
except Exception, exc: # Catch all errors
Log.exception()
TracebackArchive.add()
if ran_init_thunk: self.error_handler(exc, 1, init_val)
else: self.error_handler(exc, None, None)
raise exc
def default_init_thunk(self): return None
def default_final_func(self, init_val): return init_val
def default_error_handler(self, exc, ran_init, init_val): pass
class Robust:
"""Contains various methods designed to make things safer"""
null_action = RobustAction(None, None, None)
def chain(*robust_action_list):
"""Return chain tying together a number of robust actions
The whole chain will be aborted if some error occurs in
initialization stage of any of the component actions.
"""
ras_with_started_inits, init_return_vals = [], []
def init():
for ra in robust_action_list:
ras_with_started_inits.append(ra)
init_return_vals.append(ra.init_thunk())
return init_return_vals
def final(init_return_vals):
final_vals = []
for ra, init_val in zip(robust_action_list, init_return_vals):
final_vals.append(ra.final_func(init_val))
return final_vals
def error(exc, ran_init, init_val):
for ra, init_val in zip(ras_with_started_inits, init_return_vals):
ra.error_handler(exc, 1, init_val)
for ra in ras_with_started_inits[len(init_return_vals):]:
ra.error_handler(exc, None, None)
return RobustAction(init, final, error)
def chain_nested(*robust_action_list):
"""Like chain but final actions performed in reverse order"""
ras_with_started_inits, init_vals = [], []
def init():
for ra in robust_action_list:
ras_with_started_inits.append(ra)
init_vals.append(ra.init_thunk())
return init_vals
def final(init_vals):
ras_and_inits = zip(robust_action_list, init_vals)
ras_and_inits.reverse()
final_vals = []
for ra, init_val in ras_and_inits:
final_vals.append(ra.final_func(init_val))
return final_vals
def error(exc, ran_init, init_val):
for ra, init_val in zip(ras_with_started_inits, init_vals):
ra.error_handler(exc, 1, init_val)
for ra in ras_with_started_inits[len(init_vals):]:
ra.error_handler(exc, None, None)
return RobustAction(init, final, error)
def make_tf_robustaction(init_thunk, tempfiles, final_renames = None):
"""Shortcut RobustAction creator when only tempfiles involved
Often the robust action will just consist of some initial
stage, renaming tempfiles in the final stage, and deleting
them if there is an error. This function makes it easier to
create RobustActions of that type.
"""
if isinstance(tempfiles, TempFile): tempfiles = (tempfiles,)
if isinstance(final_renames, RPath): final_renames = (final_renames,)
if final_renames is None: final_renames = [None] * len(tempfiles)
assert len(tempfiles) == len(final_renames)
def final(init_val): # rename tempfiles to final positions
for tempfile, destination in zip(tempfiles, final_renames):
if destination:
if destination.isdir(): # Cannot rename over directory
destination.delete()
tempfile.rename(destination)
return init_val
def error(exc, ran_init, init_val):
for tf in tempfiles: tf.delete()
return RobustAction(init_thunk, final, error)
def copy_action(rorpin, rpout):
"""Return robust action copying rorpin to rpout
The source can be a rorp or an rpath. Does not recurse. If
directories copied, then just exit (output directory not
overwritten).
"""
tfl = [None] # Need some mutable state to hold tf value
def init():
if not (rorpin.isdir() and rpout.isdir()): # already a dir
tfl[0] = tf = TempFileManager.new(rpout)
if rorpin.isreg(): tf.write_from_fileobj(rorpin.open("rb"))
else: RPath.copy(rorpin, tf)
return tf
else: return None
def final(tf):
if tf and tf.lstat():
if rpout.isdir(): rpout.delete()
tf.rename(rpout)
return rpout
def error(exc, ran_init, init_val):
if tfl[0]: tfl[0].delete()
return RobustAction(init, final, error)
def copy_with_attribs_action(rorpin, rpout, compress = None):
"""Like copy_action but also copy attributes"""
tfl = [None] # Need some mutable state for error handler
def init():
if not (rorpin.isdir() and rpout.isdir()): # already a dir
tfl[0] = tf = TempFileManager.new(rpout)
if rorpin.isreg():
tf.write_from_fileobj(rorpin.open("rb"), compress)
else: RPath.copy(rorpin, tf)
if tf.lstat(): # Some files, like sockets, won't be created
RPathStatic.copy_attribs(rorpin, tf)
return tf
else: return None
def final(tf):
if rorpin.isdir() and rpout.isdir():
RPath.copy_attribs(rorpin, rpout)
elif tf and tf.lstat():
if rpout.isdir(): rpout.delete() # can't rename over dir
tf.rename(rpout)
return rpout
def error(exc, ran_init, init_val):
if tfl[0]: tfl[0].delete()
return RobustAction(init, final, error)
def copy_attribs_action(rorpin, rpout):
"""Return action which just copies attributes
Copying attributes is already pretty atomic, so just run
normal sequence.
"""
def final(init_val):
RPath.copy_attribs(rorpin, rpout)
return rpout
return RobustAction(None, final, None)
def symlink_action(rpath, linktext):
"""Return symlink action by moving one file over another"""
tf = TempFileManager.new(rpath)
def init(): tf.symlink(linktext)
return Robust.make_tf_robustaction(init, tf, rpath)
def destructive_write_action(rp, s):
"""Return action writing string s to rpath rp in robust way
This will overwrite any data currently in rp.
"""
tf = TempFileManager.new(rp)
def init():
fp = tf.open("wb")
fp.write(s)
fp.close()
tf.setdata()
return Robust.make_tf_robustaction(init, tf, rp)
def check_common_error(error_handler, function, args = []):
"""Apply function to args, if error, run error_handler on exception
This uses the catch_error predicate below to only catch
certain exceptions which seems innocent enough.
"""
try: return function(*args)
except Exception, exc:
TracebackArchive.add([function] + list(args))
if Robust.catch_error(exc):
Log.exception()
conn = Globals.backup_writer
if conn is not None: # increment error count
ITRB_exists = conn.Globals.is_not_None('ITRB')
if ITRB_exists: conn.Globals.ITRB.increment_stat('Errors')
if error_handler: return error_handler(exc, *args)
else: return
Log.exception(1, 2)
raise
def catch_error(exc):
"""Return true if exception exc should be caught"""
for exception_class in (SkipFileException, DSRPPermError,
RPathException, Rdiff.RdiffException,
librsync.librsyncError,
C.UnknownFileTypeError):
if isinstance(exc, exception_class): return 1
if (isinstance(exc, EnvironmentError) and
errno.errorcode[exc[0]] in ('EPERM', 'ENOENT', 'EACCES', 'EBUSY',
'EEXIST', 'ENOTDIR', 'ENAMETOOLONG',
'EINTR', 'ENOTEMPTY', 'EIO', 'ETXTBSY',
'ESRCH', 'EINVAL')):
return 1
return 0
def listrp(rp):
"""Like rp.listdir() but return [] if error, and sort results"""
def error_handler(exc):
Log("Error listing directory %s" % rp.path, 2)
return []
dir_listing = Robust.check_common_error(error_handler, rp.listdir)
dir_listing.sort()
return dir_listing
def signal_handler(signum, frame):
"""This is called when signal signum is caught"""
raise SignalException(signum)
def install_signal_handlers():
"""Install signal handlers on current connection"""
for signum in [signal.SIGQUIT, signal.SIGHUP, signal.SIGTERM]:
signal.signal(signum, Robust.signal_handler)
MakeStatic(Robust)
class SignalException(Exception):
"""SignalException(signum) means signal signum has been received"""
pass
class TracebackArchive:
"""Save last 10 caught exceptions, so they can be printed if fatal"""
_traceback_strings = []
def add(cls, extra_args = []):
"""Add most recent exception to archived list
If extra_args are present, convert to strings and add them as
extra information to same traceback archive.
"""
cls._traceback_strings.append(Log.exception_to_string(extra_args))
if len(cls._traceback_strings) > 10:
cls._traceback_strings = cls._traceback_strings[:10]
def log(cls):
"""Print all exception information to log file"""
if cls._traceback_strings:
Log("------------ Old traceback info -----------\n%s\n"
"-------------------------------------------" %
("\n".join(cls._traceback_strings),), 3)
MakeClass(TracebackArchive)
class TempFileManager:
"""Manage temp files"""
# This is a connection-specific list of temp files, to be cleaned
# up before rdiff-backup exits.
_tempfiles = []
# To make collisions less likely, this gets put in the file name
# and incremented whenever a new file is requested.
_tfindex = 0
def new(cls, rp_base, same_dir = 1):
"""Return new tempfile that isn't in use.
If same_dir, tempfile will be in same directory as rp_base.
Otherwise, use tempfile module to get filename.
"""
conn = rp_base.conn
if conn is not Globals.local_connection:
return conn.TempFileManager.new(rp_base, same_dir)
def find_unused(conn, dir):
"""Find an unused tempfile with connection conn in directory dir"""
while 1:
if cls._tfindex > 100000000:
Log("Resetting index", 2)
cls._tfindex = 0
tf = TempFile(conn, os.path.join(dir,
"rdiff-backup.tmp.%d" % cls._tfindex))
cls._tfindex = cls._tfindex+1
if not tf.lstat(): return tf
if same_dir: tf = find_unused(conn, rp_base.dirsplit()[0])
else: tf = TempFile(conn, tempfile.mktemp())
cls._tempfiles.append(tf)
return tf
def remove_listing(cls, tempfile):
"""Remove listing of tempfile"""
if Globals.local_connection is not tempfile.conn:
tempfile.conn.TempFileManager.remove_listing(tempfile)
elif tempfile in cls._tempfiles: cls._tempfiles.remove(tempfile)
def delete_all(cls):
"""Delete all remaining tempfiles"""
for tf in cls._tempfiles[:]: tf.delete()
MakeClass(TempFileManager)
from rpath import *
class TempFile(RPath):
"""Like an RPath, but keep track of which ones are still here"""
def rename(self, rp_dest):
"""Rename temp file to permanent location, possibly overwriting"""
if self.isdir() and not rp_dest.isdir():
# Cannot move a directory directly over another file
rp_dest.delete()
if (isinstance(rp_dest, DSRPath) and rp_dest.delay_perms
and not self.hasfullperms()):
# If we are moving to a delayed perm directory, delay
# permission change on destination.
rp_dest.chmod(self.getperms())
self.chmod(0700)
RPathStatic.rename(self, rp_dest)
# Sometimes this just seems to fail silently, as in one
# hardlinked twin is moved over the other. So check to make
# sure below.
self.setdata()
if self.lstat():
rp_dest.delete()
RPathStatic.rename(self, rp_dest)
self.setdata()
if self.lstat(): raise OSError("Cannot rename tmp file correctly")
TempFileManager.remove_listing(self)
def delete(self):
RPath.delete(self)
TempFileManager.remove_listing(self)
class SaveState:
"""Save state in the middle of backups for resuming later"""
_last_file_sym = None # RPath of sym pointing to last file processed
_last_file_definitive_rp = None # Touch this if last file is really last
_last_checkpoint_time = 0 # time in seconds of last checkpoint
_checkpoint_rp = None # RPath of checkpoint data pickle
def init_filenames(cls):
"""Set rpaths of markers. Assume rbdir already set."""
if not Globals.isbackup_writer:
return Globals.backup_writer.SaveState.init_filenames()
assert Globals.local_connection is Globals.rbdir.conn, \
(Globals.rbdir.conn, Globals.backup_writer)
cls._last_file_sym = Globals.rbdir.append(
"last-file-incremented.%s.data" % Time.curtimestr)
cls._checkpoint_rp = Globals.rbdir.append(
"checkpoint-data.%s.data" % Time.curtimestr)
cls._last_file_definitive_rp = Globals.rbdir.append(
"last-file-definitive.%s.data" % Time.curtimestr)
def touch_last_file(cls):
"""Touch last file marker, indicating backup has begun"""
if not cls._last_file_sym.lstat(): cls._last_file_sym.touch()
def touch_last_file_definitive(cls):
"""Create last-file-definitive marker
When a backup gets aborted, there may be time to indicate the
last file successfully processed, and this should be touched.
Sometimes when the abort is hard, there may be a last file
indicated, but further files since then have been processed,
in which case this shouldn't be touched.
"""
cls._last_file_definitive_rp.touch()
def record_last_file_action(cls, last_file_rorp):
"""Action recording last file to be processed as symlink in rbdir
last_file_rorp is None means that no file is known to have
been processed.
"""
if last_file_rorp:
symtext = apply(os.path.join,
('increments',) + last_file_rorp.index)
return Robust.symlink_action(cls._last_file_sym, symtext)
else: return RobustAction(None, lambda init_val: cls.touch_last_file(),
None)
def checkpoint(cls, ITR, finalizer, last_file_rorp, override = None):
"""Save states of tree reducer and finalizer during inc backup
If override is true, checkpoint even if one isn't due.
"""
if not override and not cls.checkpoint_needed(): return
assert cls._checkpoint_rp, "_checkpoint_rp not set yet"
cls._last_checkpoint_time = time.time()
Log("Writing checkpoint time %s" % cls._last_checkpoint_time, 7)
state_string = cPickle.dumps((ITR, finalizer))
Robust.chain(Robust.destructive_write_action(cls._checkpoint_rp,
state_string),
cls.record_last_file_action(last_file_rorp)).execute()
def checkpoint_needed(cls):
"""Returns true if another checkpoint is called for"""
return (time.time() > cls._last_checkpoint_time +
Globals.checkpoint_interval)
def checkpoint_remove(cls):
"""Remove all checkpointing data after successful operation"""
for rp in Resume.get_relevant_rps(): rp.delete()
if Globals.preserve_hardlinks: Hardlink.remove_all_checkpoints()
MakeClass(SaveState)
class ResumeException(Exception):
"""Indicates some error has been encountered while trying to resume"""
pass
class Resume:
"""Check for old aborted backups and resume if necessary"""
_session_info_list = None # List of ResumeSessionInfo's, sorted by time
def FindTime(cls, index, later_than = 0):
"""For a given index, find the appropriate time to use for inc
If it is clear which time to use (because it is determined by
definitive records, or there are no aborted backup, etc.) then
just return the appropriate time. Otherwise, if an aborted
backup was last checkpointed before the index, assume that it
didn't get there, and go for the older time. If an inc file
is already present, the function will be rerun with later time
specified.
"""
assert Globals.isbackup_writer
if Time.prevtime > later_than: return Time.prevtime # usual case
for si in cls.get_sis_covering_index(index):
if si.time > later_than: return si.time
raise SkipFileException("Index %s already covered, skipping" %
str(index))
def get_sis_covering_index(cls, index):
"""Return sorted list of SessionInfos which may cover index
Aborted backup may be relevant unless index is lower and we
are sure that it didn't go further.
"""
return filter(lambda session_info:
not ((session_info.last_index is None or
session_info.last_index < index) and
session_info.last_definitive),
cls._session_info_list)
def SetSessionInfo(cls):
"""Read data directory and initialize _session_info"""
assert Globals.isbackup_writer
silist = []
rp_quad_dict = cls.group_rps_by_time(cls.get_relevant_rps())
times = rp_quad_dict.keys()
times.sort()
for time in times:
try: silist.append(cls.quad_to_si(time, rp_quad_dict[time]))
except ResumeException:
Log("Bad resume information found, skipping", 2)
cls._session_info_list = silist
def get_relevant_rps(cls):
"""Return list of relevant rpaths in rbdata directory"""
relevant_bases = ['last-file-incremented', 'last-file-mirrored',
'checkpoint-data', 'last-file-definitive']
rps = map(Globals.rbdir.append, Globals.rbdir.listdir())
return filter(lambda rp: rp.isincfile()
and rp.getincbase_str() in relevant_bases, rps)
def group_rps_by_time(cls, rplist):
"""Take list of rps return time dict {time: quadlist}
Times in seconds are the keys, values are triples of rps
[last-file-incremented, last-file-mirrored, checkpoint-data,
last-is-definitive].
"""
result = {}
for rp in rplist:
time = Time.stringtotime(rp.getinctime())
if result.has_key(time): quadlist = result[time]
else: quadlist = [None, None, None, None]
base_string = rp.getincbase_str()
if base_string == 'last-file-incremented': quadlist[0] = rp
elif base_string == 'last-file-mirrored': quadlist[1] = rp
elif base_string == 'last-file-definitive': quadlist[3] = 1
else:
assert base_string == 'checkpoint-data'
quadlist[2] = rp
result[time] = quadlist
return result
def quad_to_si(cls, time, quad):
"""Take time, quadlist, return associated ResumeSessionInfo"""
increment_sym, mirror_sym, checkpoint_rp, last_definitive = quad
if increment_sym and mirror_sym:
raise ResumeException("both mirror and inc sym shouldn't exist")
ITR, finalizer = None, None
if increment_sym:
mirror = None
last_index = cls.sym_to_index(increment_sym)
if checkpoint_rp:
ITR, finalizer = cls.unpickle_checkpoint(checkpoint_rp)
elif mirror_sym:
mirror = 1
last_index = cls.sym_to_index(mirror_sym)
if checkpoint_rp:
finalizer = cls.unpickle_checkpoint(checkpoint_rp)
else: raise ResumeException("Missing increment or mirror sym")
return ResumeSessionInfo(mirror, time, last_index, last_definitive,
finalizer, ITR)
def sym_to_index(cls, sym_rp):
"""Read last file sym rp, return last file index
If sym_rp is not a sym at all, return None, indicating that no
file index was ever conclusively processed.
"""
if not sym_rp.issym(): return None
link_components = sym_rp.readlink().split("/")
assert link_components[0] == 'increments'
return tuple(link_components[1:])
def unpickle_checkpoint(cls, checkpoint_rp):
"""Read data from checkpoint_rp and return unpickled data
Return value is pair (patch increment ITR, finalizer state).
"""
fp = checkpoint_rp.open("rb")
data = fp.read()
fp.close()
try: result = cPickle.loads(data)
except Exception, exc:
raise ResumeException("Bad pickle at %s: %s" %
(checkpoint_rp.path, exc))
return result
def ResumeCheck(cls):
"""Return relevant ResumeSessionInfo if there's one we should resume
Also if find RSI to resume, reset current time to old resume
time.
"""
cls.SetSessionInfo()
if not cls._session_info_list:
if Globals.resume == 1:
Log.FatalError("User specified resume, but no data on "
"previous backup found.")
else: return None
else:
si = cls._session_info_list[-1]
if (Globals.resume == 1 or
(time.time() <= (si.time + Globals.resume_window) and
not Globals.resume == 0)):
Log("Resuming aborted backup dated %s" %
Time.timetopretty(si.time), 2)
Time.setcurtime(si.time)
if Globals.preserve_hardlinks:
if (not si.last_definitive or not
Hardlink.retrieve_checkpoint(Globals.rbdir, si.time)):
Log("Hardlink information not successfully "
"recovered.", 2)
return si
else:
Log("Last backup dated %s was aborted, but we aren't "
"resuming it." % Time.timetopretty(si.time), 2)
return None
assert None
MakeClass(Resume)
class ResumeSessionInfo:
"""Hold information about a previously aborted session"""
def __init__(self, mirror, time, last_index,
last_definitive, finalizer = None, ITR = None):
"""Class initializer
time - starting time in seconds of backup
mirror - true if backup was a mirror, false if increment
last_index - Last confirmed index processed by backup, or None
last_definitive - True is we know last_index is really last
finalizer - the dsrp finalizer if available
ITR - For increment, ITM reducer (assume mirror if NA)
"""
self.time = time
self.mirror = mirror
self.last_index = last_index
self.last_definitive = last_definitive
self.ITR, self.finalizer, = ITR, finalizer
from log import *
from destructive_stepping import *
import Time, Rdiff, librsync
from highlevel import *
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Operations on Iterators of Read Only Remote Paths"""
from __future__ import generators
import tempfile, UserList, types, librsync
from static import *
from log import *
from rpath import *
from robust import *
from iterfile import *
import Globals, Rdiff, Hardlink
class RORPIterException(Exception): pass
class RORPIter:
"""Functions relating to iterators of Read Only RPaths
The main structure will be an iterator that yields RORPaths.
Every RORPath has a "raw" form that makes it more amenable to
being turned into a file. The raw form of the iterator yields
each RORPath in the form of the tuple (index, data_dictionary,
files), where files is the number of files attached (usually 1 or
0). After that, if a file is attached, it yields that file.
"""
def ToRaw(rorp_iter):
"""Convert a rorp iterator to raw form"""
for rorp in rorp_iter:
if rorp.file:
yield (rorp.index, rorp.data, 1)
yield rorp.file
else: yield (rorp.index, rorp.data, 0)
def FromRaw(raw_iter):
"""Convert raw rorp iter back to standard form"""
for index, data, num_files in raw_iter:
rorp = RORPath(index, data)
if num_files:
assert num_files == 1, "Only one file accepted right now"
rorp.setfile(RORPIter.getnext(raw_iter))
yield rorp
def ToFile(rorp_iter):
"""Return file version of iterator"""
return FileWrappingIter(RORPIter.ToRaw(rorp_iter))
def FromFile(fileobj):
"""Recover rorp iterator from file interface"""
return RORPIter.FromRaw(IterWrappingFile(fileobj))
def IterateRPaths(base_rp):
"""Return an iterator yielding RPaths with given base rp"""
yield base_rp
if base_rp.isdir():
dirlisting = base_rp.listdir()
dirlisting.sort()
for filename in dirlisting:
for rp in RORPIter.IterateRPaths(base_rp.append(filename)):
yield rp
def Signatures(rp_iter):
"""Yield signatures of rpaths in given rp_iter"""
def error_handler(exc, rp):
Log("Error generating signature for %s" % rp.path)
return None
for rp in rp_iter:
if rp.isplaceholder(): yield rp
else:
rorp = rp.getRORPath()
if rp.isreg():
if rp.isflaglinked(): rorp.flaglinked()
else:
fp = Robust.check_common_error(
error_handler, Rdiff.get_signature, (rp,))
if fp: rorp.setfile(fp)
else: continue
yield rorp
def GetSignatureIter(base_rp):
"""Return a signature iterator recurring over the base_rp"""
return RORPIter.Signatures(RORPIter.IterateRPaths(base_rp))
def CollateIterators(*rorp_iters):
"""Collate RORPath iterators by index
So it takes two or more iterators of rorps and returns an
iterator yielding tuples like (rorp1, rorp2) with the same
index. If one or the other lacks that index, it will be None
"""
# overflow[i] means that iter[i] has been exhausted
# rorps[i] is None means that it is time to replenish it.
iter_num = len(rorp_iters)
if iter_num == 2:
return RORPIter.Collate2Iters(rorp_iters[0], rorp_iters[1])
overflow = [None] * iter_num
rorps = overflow[:]
def setrorps(overflow, rorps):
"""Set the overflow and rorps list"""
for i in range(iter_num):
if not overflow[i] and rorps[i] is None:
try: rorps[i] = rorp_iters[i].next()
except StopIteration:
overflow[i] = 1
rorps[i] = None
def getleastindex(rorps):
"""Return the first index in rorps, assuming rorps isn't empty"""
return min(map(lambda rorp: rorp.index,
filter(lambda x: x, rorps)))
def yield_tuples(iter_num, overflow, rorps):
while 1:
setrorps(overflow, rorps)
if not None in overflow: break
index = getleastindex(rorps)
yieldval = []
for i in range(iter_num):
if rorps[i] and rorps[i].index == index:
yieldval.append(rorps[i])
rorps[i] = None
else: yieldval.append(None)
yield IndexedTuple(index, yieldval)
return yield_tuples(iter_num, overflow, rorps)
def Collate2Iters(riter1, riter2):
"""Special case of CollateIterators with 2 arguments
This does the same thing but is faster because it doesn't have
to consider the >2 iterator case. Profiler says speed is
important here.
"""
relem1, relem2 = None, None
while 1:
if not relem1:
try: relem1 = riter1.next()
except StopIteration:
if relem2: yield IndexedTuple(index2, (None, relem2))
for relem2 in riter2:
yield IndexedTuple(relem2.index, (None, relem2))
break
index1 = relem1.index
if not relem2:
try: relem2 = riter2.next()
except StopIteration:
if relem1: yield IndexedTuple(index1, (relem1, None))
for relem1 in riter1:
yield IndexedTuple(relem1.index, (relem1, None))
break
index2 = relem2.index
if index1 < index2:
yield IndexedTuple(index1, (relem1, None))
relem1 = None
elif index1 == index2:
yield IndexedTuple(index1, (relem1, relem2))
relem1, relem2 = None, None
else: # index2 is less
yield IndexedTuple(index2, (None, relem2))
relem2 = None
def getnext(iter):
"""Return the next element of an iterator, raising error if none"""
try: next = iter.next()
except StopIteration: raise RORPIterException("Unexpected end to iter")
return next
def GetDiffIter(sig_iter, new_iter):
"""Return delta iterator from sig_iter to new_iter
The accompanying file for each will be a delta as produced by
rdiff, unless the destination file does not exist, in which
case it will be the file in its entirety.
sig_iter may be composed of rorps, but new_iter should have
full RPaths.
"""
collated_iter = RORPIter.CollateIterators(sig_iter, new_iter)
for rorp, rp in collated_iter: yield RORPIter.diffonce(rorp, rp)
def diffonce(sig_rorp, new_rp):
"""Return one diff rorp, based from signature rorp and orig rp"""
if sig_rorp and Globals.preserve_hardlinks and sig_rorp.isflaglinked():
if new_rp: diff_rorp = new_rp.getRORPath()
else: diff_rorp = RORPath(sig_rorp.index)
diff_rorp.flaglinked()
return diff_rorp
elif sig_rorp and sig_rorp.isreg() and new_rp and new_rp.isreg():
diff_rorp = new_rp.getRORPath()
#fp = sig_rorp.open("rb")
#print "---------------------", fp
#tmp_sig_rp = RPath(Globals.local_connection, "/tmp/sig")
#tmp_sig_rp.delete()
#tmp_sig_rp.write_from_fileobj(fp)
#diff_rorp.setfile(Rdiff.get_delta_sigfileobj(tmp_sig_rp.open("rb"),
# new_rp))
diff_rorp.setfile(Rdiff.get_delta_sigfileobj(sig_rorp.open("rb"),
new_rp))
diff_rorp.set_attached_filetype('diff')
return diff_rorp
else:
# Just send over originial if diff isn't appropriate
if sig_rorp: sig_rorp.close_if_necessary()
if not new_rp: return RORPath(sig_rorp.index)
elif new_rp.isreg():
diff_rorp = new_rp.getRORPath(1)
diff_rorp.set_attached_filetype('snapshot')
return diff_rorp
else: return new_rp.getRORPath()
def PatchIter(base_rp, diff_iter):
"""Patch the appropriate rps in basis_iter using diff_iter"""
basis_iter = RORPIter.IterateRPaths(base_rp)
collated_iter = RORPIter.CollateIterators(basis_iter, diff_iter)
for basisrp, diff_rorp in collated_iter:
RORPIter.patchonce_action(base_rp, basisrp, diff_rorp).execute()
def patchonce_action(base_rp, basisrp, diff_rorp):
"""Return action patching basisrp using diff_rorp"""
assert diff_rorp, "Missing diff index %s" % basisrp.index
if not diff_rorp.lstat():
return RobustAction(None, lambda init_val: basisrp.delete(), None)
if Globals.preserve_hardlinks and diff_rorp.isflaglinked():
if not basisrp: basisrp = base_rp.new_index(diff_rorp.index)
tf = TempFileManager.new(basisrp)
def init(): Hardlink.link_rp(diff_rorp, tf, basisrp)
return Robust.make_tf_robustaction(init, tf, basisrp)
elif basisrp and basisrp.isreg() and diff_rorp.isreg():
if diff_rorp.get_attached_filetype() != 'diff':
raise RPathException("File %s appears to have changed during"
" processing, skipping" % (basisrp.path,))
return Rdiff.patch_with_attribs_action(basisrp, diff_rorp)
else: # Diff contains whole file, just copy it over
if not basisrp: basisrp = base_rp.new_index(diff_rorp.index)
return Robust.copy_with_attribs_action(diff_rorp, basisrp)
MakeStatic(RORPIter)
class IndexedTuple(UserList.UserList):
"""Like a tuple, but has .index
This is used by CollateIterator above, and can be passed to the
IterTreeReducer.
"""
def __init__(self, index, sequence):
self.index = index
self.data = tuple(sequence)
def __len__(self): return len(self.data)
def __getitem__(self, key):
"""This only works for numerical keys (easier this way)"""
return self.data[key]
def __lt__(self, other): return self.__cmp__(other) == -1
def __le__(self, other): return self.__cmp__(other) != 1
def __ne__(self, other): return not self.__eq__(other)
def __gt__(self, other): return self.__cmp__(other) == 1
def __ge__(self, other): return self.__cmp__(other) != -1
def __cmp__(self, other):
assert isinstance(other, IndexedTuple)
if self.index < other.index: return -1
elif self.index == other.index: return 0
else: return 1
def __eq__(self, other):
if isinstance(other, IndexedTuple):
return self.index == other.index and self.data == other.data
elif type(other) is types.TupleType:
return self.data == other
else: return None
def __str__(self):
return "(%s).%s" % (", ".join(map(str, self.data)), self.index)
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Wrapper class around a real path like "/usr/bin/env"
The RPath (short for Remote Path) and associated classes make some
function calls more convenient and also make working with files on
remote systems transparent.
For instance, suppose
rp = RPath(connection_object, "/usr/bin/env")
Then rp.getperms() returns the permissions of that file, and
rp.delete() deletes that file. Both of these will work the same even
if "usr/bin/env" is on a different computer. So many rdiff-backup
functions use rpaths so they don't have to know whether the files they
are dealing with are local or remote.
"""
import os, stat, re, sys, shutil, gzip, socket
from static import *
class RPathException(Exception): pass
class RPathStatic:
"""Contains static methods for use with RPaths"""
def copyfileobj(inputfp, outputfp):
"""Copies file inputfp to outputfp in blocksize intervals"""
blocksize = Globals.blocksize
while 1:
inbuf = inputfp.read(blocksize)
if not inbuf: break
outputfp.write(inbuf)
def cmpfileobj(fp1, fp2):
"""True if file objects fp1 and fp2 contain same data"""
blocksize = Globals.blocksize
while 1:
buf1 = fp1.read(blocksize)
buf2 = fp2.read(blocksize)
if buf1 != buf2: return None
elif not buf1: return 1
def check_for_files(*rps):
"""Make sure that all the rps exist, raise error if not"""
for rp in rps:
if not rp.lstat():
raise RPathException("File %s does not exist" % rp.path)
def move(rpin, rpout):
"""Move rpin to rpout, renaming if possible"""
try: RPath.rename(rpin, rpout)
except os.error:
RPath.copy(rpin, rpout)
rpin.delete()
def copy(rpin, rpout):
"""Copy RPath rpin to rpout. Works for symlinks, dirs, etc."""
Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6)
if not rpin.lstat():
raise RPathException, ("File %s does not exist" % rpin.index)
if rpout.lstat():
if rpin.isreg() or not RPath.cmp(rpin, rpout):
rpout.delete() # easier to write that compare
else: return
if rpin.isreg(): RPath.copy_reg_file(rpin, rpout)
elif rpin.isdir(): rpout.mkdir()
elif rpin.issym(): rpout.symlink(rpin.readlink())
elif rpin.ischardev():
major, minor = rpin.getdevnums()
rpout.makedev("c", major, minor)
elif rpin.isblkdev():
major, minor = rpin.getdevnums()
rpout.makedev("b", major, minor)
elif rpin.isfifo(): rpout.mkfifo()
elif rpin.issock(): rpout.mksock()
else: raise RPathException("File %s has unknown type" % rpin.path)
def copy_reg_file(rpin, rpout):
"""Copy regular file rpin to rpout, possibly avoiding connection"""
try:
if rpout.conn is rpin.conn:
rpout.conn.shutil.copyfile(rpin.path, rpout.path)
rpout.setdata()
return
except AttributeError: pass
rpout.write_from_fileobj(rpin.open("rb"))
def cmp(rpin, rpout):
"""True if rpin has the same data as rpout
cmp does not compare file ownership, permissions, or times, or
examine the contents of a directory.
"""
RPath.check_for_files(rpin, rpout)
if rpin.isreg():
if not rpout.isreg(): return None
fp1, fp2 = rpin.open("rb"), rpout.open("rb")
result = RPathStatic.cmpfileobj(fp1, fp2)
if fp1.close() or fp2.close():
raise RPathException("Error closing file")
return result
elif rpin.isdir(): return rpout.isdir()
elif rpin.issym():
return rpout.issym() and (rpin.readlink() == rpout.readlink())
elif rpin.ischardev():
return rpout.ischardev() and \
(rpin.getdevnums() == rpout.getdevnums())
elif rpin.isblkdev():
return rpout.isblkdev() and \
(rpin.getdevnums() == rpout.getdevnums())
elif rpin.isfifo(): return rpout.isfifo()
elif rpin.issock(): return rpout.issock()
else: raise RPathException("File %s has unknown type" % rpin.path)
def copy_attribs(rpin, rpout):
"""Change file attributes of rpout to match rpin
Only changes the chmoddable bits, uid/gid ownership, and
timestamps, so both must already exist.
"""
Log("Copying attributes from %s to %s" % (rpin.index, rpout.path), 7)
RPath.check_for_files(rpin, rpout)
if rpin.issym(): return # symlinks have no valid attributes
if Globals.change_ownership: apply(rpout.chown, rpin.getuidgid())
rpout.chmod(rpin.getperms())
if not rpin.isdev(): rpout.setmtime(rpin.getmtime())
def cmp_attribs(rp1, rp2):
"""True if rp1 has the same file attributes as rp2
Does not compare file access times. If not changing
ownership, do not check user/group id.
"""
RPath.check_for_files(rp1, rp2)
if Globals.change_ownership and rp1.getuidgid() != rp2.getuidgid():
result = None
elif rp1.getperms() != rp2.getperms(): result = None
elif rp1.issym() and rp2.issym(): # Don't check times for some types
result = 1
elif rp1.isblkdev() and rp2.isblkdev(): result = 1
elif rp1.ischardev() and rp2.ischardev(): result = 1
else: result = (rp1.getmtime() == rp2.getmtime())
Log("Compare attribs %s and %s: %s" % (rp1.path, rp2.path, result), 7)
return result
def copy_with_attribs(rpin, rpout):
"""Copy file and then copy over attributes"""
RPath.copy(rpin, rpout)
RPath.copy_attribs(rpin, rpout)
def quick_cmp_with_attribs(rp1, rp2):
"""Quicker version of cmp_with_attribs
Instead of reading all of each file, assume that regular files
are the same if the attributes compare.
"""
if not RPath.cmp_attribs(rp1, rp2): return None
if rp1.isreg() and rp2.isreg() and (rp1.getlen() == rp2.getlen()):
return 1
return RPath.cmp(rp1, rp2)
def cmp_with_attribs(rp1, rp2):
"""Combine cmp and cmp_attribs"""
return RPath.cmp_attribs(rp1, rp2) and RPath.cmp(rp1, rp2)
def rename(rp_source, rp_dest):
"""Rename rp_source to rp_dest"""
assert rp_source.conn is rp_dest.conn
Log(lambda: "Renaming %s to %s" % (rp_source.path, rp_dest.path), 7)
rp_source.conn.os.rename(rp_source.path, rp_dest.path)
rp_dest.data = rp_source.data
rp_source.data = {'type': None}
# If we are moving to a DSRPath, assume that the current times
# are the intended ones. We need to save them now in case
# they are changed later.
if isinstance(rp_dest, DSRPath):
if rp_dest.delay_mtime:
if 'mtime' in rp_dest.data:
rp_dest.setmtime(rp_dest.data['mtime'])
if rp_dest.delay_atime:
if 'atime' in rp_dest.data:
rp_dest.setatime(rp_dest.data['atime'])
def tupled_lstat(filename):
"""Like os.lstat, but return only a tuple, or None if os.error
Later versions of os.lstat return a special lstat object,
which can confuse the pickler and cause errors in remote
operations. This has been fixed in Python 2.2.1.
"""
try: return tuple(os.lstat(filename))
except os.error: return None
def make_socket_local(rpath):
"""Make a local socket at the given path
This takes an rpath so that it will be checked by Security.
(Miscellaneous strings will not be.)
"""
assert rpath.conn is Globals.local_connection
s = socket.socket(socket.AF_UNIX)
try: s.bind(rpath.path)
except socket.error, exc:
raise SkipFileException("Socket error: " + str(exc))
def gzip_open_local_read(rpath):
"""Return open GzipFile. See security note directly above"""
assert rpath.conn is Globals.local_connection
return gzip.GzipFile(rpath.path, "rb")
def open_local_read(rpath):
"""Return open file (provided for security reasons)"""
assert rpath.conn is Globals.local_connection
return open(rpath.path, "rb")
MakeStatic(RPathStatic)
class RORPath(RPathStatic):
"""Read Only RPath - carry information about a path
These contain information about a file, and possible the file's
data, but do not have a connection and cannot be written to or
changed. The advantage of these objects is that they can be
communicated by encoding their index and data dictionary.
"""
def __init__(self, index, data = None):
self.index = index
if data: self.data = data
else: self.data = {'type':None} # signify empty file
self.file = None
def __eq__(self, other):
"""True iff the two rorpaths are equivalent"""
if self.index != other.index: return None
for key in self.data.keys(): # compare dicts key by key
if ((key == 'uid' or key == 'gid') and
(not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership
pass
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif key == 'size' and self.isdir(): pass
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None
return 1
def __ne__(self, other): return not self.__eq__(other)
def __str__(self):
"""Pretty print file statistics"""
return "Index: %s\nData: %s" % (self.index, self.data)
def __getstate__(self):
"""Return picklable state
This is necessary in case the RORPath is carrying around a
file object, which can't/shouldn't be pickled.
"""
return (self.index, self.data)
def __setstate__(self, rorp_state):
"""Reproduce RORPath from __getstate__ output"""
self.index, self.data = rorp_state
def make_placeholder(self):
"""Make rorp into a placeholder
This object doesn't contain any information about the file,
but, when passed along, may show where the previous stages are
in their processing. It is the RORPath equivalent of fiber.
This placeholder size, in conjunction with the placeholder
threshold in Highlevel .. generate_dissimilar seem to yield an
OK tradeoff between unnecessary placeholders and lots of
memory usage, but I'm not sure exactly why.
"""
self.data = {'placeholder': " "*500}
def isplaceholder(self):
"""True if the object is a placeholder"""
return self.data.has_key('placeholder')
def lstat(self):
"""Returns type of file
The allowable types are None if the file doesn't exist, 'reg'
for a regular file, 'dir' for a directory, 'dev' for a device
file, 'fifo' for a fifo, 'sock' for a socket, and 'sym' for a
symlink.
"""
return self.data['type']
gettype = lstat
def isdir(self):
"""True if self is a dir"""
return self.data['type'] == 'dir'
def isreg(self):
"""True if self is a regular file"""
return self.data['type'] == 'reg'
def issym(self):
"""True if path is of a symlink"""
return self.data['type'] == 'sym'
def isfifo(self):
"""True if path is a fifo"""
return self.data['type'] == 'fifo'
def ischardev(self):
"""True if path is a character device file"""
return self.data['type'] == 'dev' and self.data['devnums'][0] == 'c'
def isblkdev(self):
"""True if path is a block device file"""
return self.data['type'] == 'dev' and self.data['devnums'][0] == 'b'
def isdev(self):
"""True if path is a device file"""
return self.data['type'] == 'dev'
def issock(self):
"""True if path is a socket"""
return self.data['type'] == 'sock'
def getperms(self):
"""Return permission block of file"""
return self.data['perms']
def getsize(self):
"""Return length of file in bytes"""
return self.data['size']
def getuidgid(self):
"""Return userid/groupid of file"""
return self.data['uid'], self.data['gid']
def getatime(self):
"""Return access time in seconds"""
return self.data['atime']
def getmtime(self):
"""Return modification time in seconds"""
return self.data['mtime']
def getinode(self):
"""Return inode number of file"""
return self.data['inode']
def getdevloc(self):
"""Device number file resides on"""
return self.data['devloc']
def getnumlinks(self):
"""Number of places inode is linked to"""
return self.data['nlink']
def readlink(self):
"""Wrapper around os.readlink()"""
return self.data['linkname']
def getdevnums(self):
"""Return a devices major/minor numbers from dictionary"""
return self.data['devnums'][1:]
def setfile(self, file):
"""Right now just set self.file to be the already opened file"""
assert file and not self.file
def closing_hook(): self.file_already_open = None
self.file = RPathFileHook(file, closing_hook)
self.file_already_open = None
def get_attached_filetype(self):
"""If there is a file attached, say what it is
Currently the choices are 'snapshot' meaning an exact copy of
something, and 'diff' for an rdiff style diff.
"""
return self.data['filetype']
def set_attached_filetype(self, type):
"""Set the type of the attached file"""
self.data['filetype'] = type
def isflaglinked(self):
"""True if rorp is a signature/diff for a hardlink file
This indicates that a file's data need not be transferred
because it is hardlinked on the remote side.
"""
return self.data.has_key('linked')
def flaglinked(self):
"""Signal that rorp is a signature/diff for a hardlink file"""
self.data['linked'] = 1
def open(self, mode):
"""Return file type object if any was given using self.setfile"""
if mode != "rb": raise RPathException("Bad mode %s" % mode)
if self.file_already_open:
raise RPathException("Attempt to open same file twice")
self.file_already_open = 1
return self.file
def close_if_necessary(self):
"""If file is present, discard data and close"""
if self.file:
while self.file.read(Globals.blocksize): pass
assert not self.file.close(), \
"Error closing file\ndata = %s\nindex = %s\n" % (self.data,
self.index)
self.file_already_open = None
class RPath(RORPath):
"""Remote Path class - wrapper around a possibly non-local pathname
This class contains a dictionary called "data" which should
contain all the information about the file sufficient for
identification (i.e. if two files have the the same (==) data
dictionary, they are the same file).
"""
regex_chars_to_quote = re.compile("[\\\\\\\"\\$`]")
def __init__(self, connection, base, index = (), data = None):
"""RPath constructor
connection = self.conn is the Connection the RPath will use to
make system calls, and index is the name of the rpath used for
comparison, and should be a tuple consisting of the parts of
the rpath after the base split up. For instance ("foo",
"bar") for "foo/bar" (no base), and ("local", "bin") for
"/usr/local/bin" if the base is "/usr".
For the root directory "/", the index is empty and the base is
"/".
"""
self.conn = connection
self.index = index
self.base = base
if base is not None:
if base == "/": self.path = "/" + "/".join(index)
else: self.path = "/".join((base,) + index)
self.file = None
if data or base is None: self.data = data
else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
self.data)
def __getstate__(self):
"""Return picklable state
The connection must be local because we can't pickle a
connection. Data and any attached file also won't be saved.
"""
assert self.conn is Globals.local_connection
return (self.index, self.base, self.data)
def __setstate__(self, rpath_state):
"""Reproduce RPath from __getstate__ output"""
self.conn = Globals.local_connection
self.index, self.base, self.data = rpath_state
self.path = "/".join((self.base,) + self.index)
def setdata(self):
"""Set data dictionary using C extension"""
self.data = self.conn.C.make_file_dict(self.path)
def make_file_dict_old(self):
"""Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None:
return {'type':None}
data = {}
mode = statblock[stat.ST_MODE]
if stat.S_ISREG(mode): type = 'reg'
elif stat.S_ISDIR(mode): type = 'dir'
elif stat.S_ISCHR(mode):
type = 'dev'
data['devnums'] = ('c',) + self._getdevnums()
elif stat.S_ISBLK(mode):
type = 'dev'
data['devnums'] = ('b',) + self._getdevnums()
elif stat.S_ISFIFO(mode): type = 'fifo'
elif stat.S_ISLNK(mode):
type = 'sym'
data['linkname'] = self.conn.os.readlink(self.path)
elif stat.S_ISSOCK(mode): type = 'sock'
else: raise C.UnknownFileError(self.path)
data['type'] = type
data['size'] = statblock[stat.ST_SIZE]
data['perms'] = stat.S_IMODE(mode)
data['uid'] = statblock[stat.ST_UID]
data['gid'] = statblock[stat.ST_GID]
data['inode'] = statblock[stat.ST_INO]
data['devloc'] = statblock[stat.ST_DEV]
data['nlink'] = statblock[stat.ST_NLINK]
if not (type == 'sym' or type == 'dev'):
# mtimes on symlinks and dev files don't work consistently
data['mtime'] = long(statblock[stat.ST_MTIME])
data['atime'] = long(statblock[stat.ST_ATIME])
return data
def check_consistency(self):
"""Raise an error if consistency of rp broken
This is useful for debugging when the cache and disk get out
of sync and you need to find out where it happened.
"""
temptype = self.data['type']
self.setdata()
assert temptype == self.data['type'], \
"\nName: %s\nOld: %s --> New: %s\n" % \
(self.path, temptype, self.data['type'])
def _getdevnums(self):
"""Return tuple for special file (major, minor)"""
s = self.conn.reval("lambda path: os.lstat(path).st_rdev", self.path)
return (s >> 8, s & 0xff)
def quote_path(self):
"""Set path from quoted version of index"""
quoted_list = [FilenameMapping.quote(path) for path in self.index]
self.path = "/".join([self.base] + quoted_list)
self.setdata()
def chmod(self, permissions):
"""Wrapper around os.chmod"""
self.conn.os.chmod(self.path, permissions)
self.data['perms'] = permissions
def settime(self, accesstime, modtime):
"""Change file modification times"""
Log("Setting time of %s to %d" % (self.path, modtime), 7)
self.conn.os.utime(self.path, (accesstime, modtime))
self.data['atime'] = accesstime
self.data['mtime'] = modtime
def setmtime(self, modtime):
"""Set only modtime (access time to present)"""
Log(lambda: "Setting time of %s to %d" % (self.path, modtime), 7)
self.conn.os.utime(self.path, (time.time(), modtime))
self.data['mtime'] = modtime
def chown(self, uid, gid):
"""Set file's uid and gid"""
self.conn.os.chown(self.path, uid, gid)
self.data['uid'] = uid
self.data['gid'] = gid
def mkdir(self):
Log("Making directory " + self.path, 6)
self.conn.os.mkdir(self.path)
self.setdata()
def rmdir(self):
Log("Removing directory " + self.path, 6)
self.conn.os.rmdir(self.path)
self.data = {'type': None}
def listdir(self):
"""Return list of string paths returned by os.listdir"""
return self.conn.os.listdir(self.path)
def symlink(self, linktext):
"""Make symlink at self.path pointing to linktext"""
self.conn.os.symlink(linktext, self.path)
self.setdata()
assert self.issym()
def hardlink(self, linkpath):
"""Make self into a hardlink joined to linkpath"""
self.conn.os.link(linkpath, self.path)
self.setdata()
def mkfifo(self):
"""Make a fifo at self.path"""
self.conn.os.mkfifo(self.path)
self.setdata()
assert self.isfifo()
def mksock(self):
"""Make a socket at self.path"""
self.conn.RPathStatic.make_socket_local(self)
self.setdata()
assert self.issock()
def touch(self):
"""Make sure file at self.path exists"""
Log("Touching " + self.path, 7)
self.conn.open(self.path, "w").close()
self.setdata()
assert self.isreg()
def hasfullperms(self):
"""Return true if current process has full permissions on the file"""
if self.isowner(): return self.getperms() % 01000 >= 0700
elif self.isgroup(): return self.getperms() % 0100 >= 070
else: return self.getperms() % 010 >= 07
def readable(self):
"""Return true if current process has read permissions on the file"""
if self.isowner(): return self.getperms() % 01000 >= 0400
elif self.isgroup(): return self.getperms() % 0100 >= 040
else: return self.getperms() % 010 >= 04
def executable(self):
"""Return true if current process has execute permissions"""
if self.isowner(): return self.getperms() % 0200 >= 0100
elif self.isgroup(): return self.getperms() % 020 >= 010
else: return self.getperms() % 02 >= 01
def isowner(self):
"""Return true if current process is owner of rp or root"""
uid = self.conn.os.getuid()
return uid == 0 or uid == self.data['uid']
def isgroup(self):
"""Return true if current process is in group of rp"""
return self.conn.Globals.get('process_gid') == self.data['gid']
def delete(self):
"""Delete file at self.path
The destructive stepping allows this function to delete
directories even if they have files and we lack permissions.
"""
Log("Deleting %s" % self.path, 7)
self.setdata()
if not self.lstat(): return # must have been deleted in meantime
elif self.isdir():
itm = IterTreeReducer(RpathDeleter, [])
for dsrp in Select(DSRPath(None, self)).set_iter():
itm(dsrp.index, dsrp)
itm.Finish()
else: self.conn.os.unlink(self.path)
self.setdata()
def quote(self):
"""Return quoted self.path for use with os.system()"""
return '"%s"' % self.regex_chars_to_quote.sub(
lambda m: "\\"+m.group(0), self.path)
def normalize(self):
"""Return RPath canonical version of self.path
This just means that redundant /'s will be removed, including
the trailing one, even for directories. ".." components will
be retained.
"""
newpath = "/".join(filter(lambda x: x and x != ".",
self.path.split("/")))
if self.path[0] == "/": newpath = "/" + newpath
elif not newpath: newpath = "."
return self.newpath(newpath)
def dirsplit(self):
"""Returns a tuple of strings (dirname, basename)
Basename is never '' unless self is root, so it is unlike
os.path.basename. If path is just above root (so dirname is
root), then dirname is ''. In all other cases dirname is not
the empty string. Also, dirsplit depends on the format of
self, so basename could be ".." and dirname could be a
subdirectory. For an atomic relative path, dirname will be
'.'.
"""
normed = self.normalize()
if normed.path.find("/") == -1: return (".", normed.path)
comps = normed.path.split("/")
return "/".join(comps[:-1]), comps[-1]
def newpath(self, newpath, index = ()):
"""Return new RPath with the same connection but different path"""
return self.__class__(self.conn, newpath, index)
def append(self, ext):
"""Return new RPath with same connection by adjoing ext"""
return self.__class__(self.conn, self.base, self.index + (ext,))
def append_path(self, ext, new_index = ()):
"""Like append, but add ext to path instead of to index"""
assert not self.index # doesn't make sense if index isn't ()
return self.__class__(self.conn, "/".join((self.base, ext)), new_index)
def new_index(self, index):
"""Return similar RPath but with new index"""
return self.__class__(self.conn, self.base, index)
def open(self, mode, compress = None):
"""Return open file. Supports modes "w" and "r".
If compress is true, data written/read will be gzip
compressed/decompressed on the fly. The extra complications
below are for security reasons - try to make the extent of the
risk apparent from the remote call.
"""
if self.conn is Globals.local_connection:
if compress: return gzip.GzipFile(self.path, mode)
else: return open(self.path, mode)
if compress:
if mode == "r" or mode == "rb":
return self.conn.RPathStatic.gzip_open_local_read(self)
else: return self.conn.gzip.GzipFile(self.path, mode)
else:
if mode == "r" or mode == "rb":
return self.conn.RPathStatic.open_local_read(self)
else: return self.conn.open(self.path, mode)
def write_from_fileobj(self, fp, compress = None):
"""Reads fp and writes to self.path. Closes both when done
If compress is true, fp will be gzip compressed before being
written to self.
"""
Log("Writing file object to " + self.path, 7)
assert not self.lstat(), "File %s already exists" % self.path
outfp = self.open("wb", compress = compress)
RPath.copyfileobj(fp, outfp)
if fp.close() or outfp.close():
raise RPathException("Error closing file")
self.setdata()
def isincfile(self):
"""Return true if path looks like an increment file
Also sets various inc information used by the *inc* functions.
"""
if self.index: dotsplit = self.index[-1].split(".")
else: dotsplit = self.base.split(".")
if dotsplit[-1] == "gz":
compressed = 1
if len(dotsplit) < 4: return None
timestring, ext = dotsplit[-3:-1]
else:
compressed = None
if len(dotsplit) < 3: return None
timestring, ext = dotsplit[-2:]
if Time.stringtotime(timestring) is None: return None
if not (ext == "snapshot" or ext == "dir" or
ext == "missing" or ext == "diff" or ext == "data"):
return None
self.inc_timestr = timestring
self.inc_compressed = compressed
self.inc_type = ext
if compressed: self.inc_basestr = ".".join(dotsplit[:-3])
else: self.inc_basestr = ".".join(dotsplit[:-2])
return 1
def isinccompressed(self):
"""Return true if inc file is compressed"""
return self.inc_compressed
def getinctype(self):
"""Return type of an increment file"""
return self.inc_type
def getinctime(self):
"""Return timestring of an increment file"""
return self.inc_timestr
def getincbase(self):
"""Return the base filename of an increment file in rp form"""
if self.index:
return self.__class__(self.conn, self.base, self.index[:-1] +
(self.inc_basestr,))
else: return self.__class__(self.conn, self.inc_basestr)
def getincbase_str(self):
"""Return the base filename string of an increment file"""
return self.getincbase().dirsplit()[1]
def makedev(self, type, major, minor):
"""Make a special file with specified type, and major/minor nums"""
cmdlist = ['mknod', self.path, type, str(major), str(minor)]
if self.conn.os.spawnvp(os.P_WAIT, 'mknod', cmdlist) != 0:
raise RPathException("Error running %s" % cmdlist)
if type == 'c': datatype = 'chr'
elif type == 'b': datatype = 'blk'
else: raise RPathException
self.setdata()
def getRORPath(self, include_contents = None):
"""Return read only version of self"""
rorp = RORPath(self.index, self.data)
if include_contents: rorp.setfile(self.open("rb"))
return rorp
class RPathFileHook:
"""Look like a file, but add closing hook"""
def __init__(self, file, closing_thunk):
self.file = file
self.closing_thunk = closing_thunk
def read(self, length = -1): return self.file.read(length)
def write(self, buf): return self.file.write(buf)
def close(self):
"""Close file and then run closing thunk"""
result = self.file.close()
self.closing_thunk()
return result
# Import these late to avoid circular dependencies
import FilenameMapping
from lazy import *
from selection import *
from destructive_stepping import *
from highlevel import *
class RpathDeleter(ITRBranch):
"""Delete a directory. Called by RPath.delete()"""
def start_process(self, index, dsrp):
self.dsrp = dsrp
def end_process(self):
if self.dsrp.isdir(): self.dsrp.rmdir()
else: self.dsrp.delete()
def can_fast_process(self, index, dsrp): return not dsrp.isdir()
def fast_process(self, index, dsrp): dsrp.delete()
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Iterate exactly the requested files in a directory
Parses includes and excludes to yield correct files. More
documentation on what this code does can be found on the man page.
"""
from __future__ import generators
import re
from log import *
from robust import *
from destructive_stepping import *
import FilenameMapping
class SelectError(Exception):
"""Some error dealing with the Select class"""
pass
class FilePrefixError(SelectError):
"""Signals that a specified file doesn't start with correct prefix"""
pass
class GlobbingError(SelectError):
"""Something has gone wrong when parsing a glob string"""
pass
class Select:
"""Iterate appropriate DSRPaths in given directory
This class acts as an iterator on account of its next() method.
Basically, it just goes through all the files in a directory in
order (depth-first) and subjects each file to a bunch of tests
(selection functions) in order. The first test that includes or
excludes the file means that the file gets included (iterated) or
excluded. The default is include, so with no tests we would just
iterate all the files in the directory in order.
The one complication to this is that sometimes we don't know
whether or not to include a directory until we examine its
contents. For instance, if we want to include all the **.py
files. If /home/ben/foo.py exists, we should also include /home
and /home/ben, but if these directories contain no **.py files,
they shouldn't be included. For this reason, a test may not
include or exclude a directory, but merely "scan" it. If later a
file in the directory gets included, so does the directory.
As mentioned above, each test takes the form of a selection
function. The selection function takes a dsrp, and returns:
None - means the test has nothing to say about the related file
0 - the file is excluded by the test
1 - the file is included
2 - the test says the file (must be directory) should be scanned
Also, a selection function f has a variable f.exclude which should
be true iff f could potentially exclude some file. This is used
to signal an error if the last function only includes, which would
be redundant and presumably isn't what the user intends.
"""
# This re should not match normal filenames, but usually just globs
glob_re = re.compile("(.*[*?[]|ignorecase\\:)", re.I | re.S)
def __init__(self, dsrpath, quoted_filenames = None):
"""DSRPIterator initializer. dsrp is the root directory
When files have quoted characters in them, quoted_filenames
should be true. Then RPath's index will be the unquoted
version.
"""
assert isinstance(dsrpath, DSRPath)
self.selection_functions = []
self.dsrpath = dsrpath
self.prefix = self.dsrpath.path
self.quoting_on = Globals.quoting_enabled and quoted_filenames
def set_iter(self, starting_index = None, iterate_parents = None,
sel_func = None):
"""Initialize more variables, get ready to iterate
Will iterate indicies greater than starting_index. If
iterate_parents is true, will also include parents of
starting_index in iteration. Selection function sel_func is
called on each dsrp and is usually self.Select. Returns self
just for convenience.
"""
if not sel_func: sel_func = self.Select
self.dsrpath.setdata() # this may have changed since Select init
if starting_index is not None:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func)
elif self.quoting_on:
self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents
self.next = self.iter.next
self.__iter__ = lambda: self
return self
def Iterate_fast(self, dsrpath, sel_func):
"""Like Iterate, but don't recur, saving time
Only handles standard case (quoting off, starting from
beginning).
"""
def error_handler(exc, filename):
Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
return None
def diryield(dsrpath):
"""Generate relevant files in directory dsrpath
Returns (dsrp, num) where num == 0 means dsrp should be
generated normally, num == 1 means the dsrp is a directory
and should be included iff something inside is included.
"""
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, (filename,))
if new_dsrp:
s = sel_func(new_dsrp)
if s == 1: yield (new_dsrp, 0)
elif s == 2 and new_dsrp.isdir(): yield (new_dsrp, 1)
yield dsrpath
diryield_stack = [diryield(dsrpath)]
delayed_dsrp_stack = []
while diryield_stack:
try: dsrp, val = diryield_stack[-1].next()
except StopIteration:
diryield_stack.pop()
if delayed_dsrp_stack: delayed_dsrp_stack.pop()
continue
if val == 0:
if delayed_dsrp_stack:
for delayed_dsrp in delayed_dsrp_stack: yield delayed_dsrp
del delayed_dsrp_stack[:]
yield dsrp
if dsrp.isdir(): diryield_stack.append(diryield(dsrp))
elif val == 1:
delayed_dsrp_stack.append(dsrp)
diryield_stack.append(diryield(dsrp))
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
rec_func is usually the same as this function and is what
Iterate uses to find files in subdirectories. It is used in
iterate_starting_from.
sel_func is the selection function to use on the dsrps. It is
usually self.Select.
"""
s = sel_func(dsrpath)
if s == 0: return
elif s == 1: # File is included
yield dsrpath
if dsrpath.isdir():
for dsrp in self.iterate_in_dir(dsrpath, rec_func, sel_func):
yield dsrp
elif s == 2:
if dsrpath.isdir(): # Directory is merely scanned
iid = self.iterate_in_dir(dsrpath, rec_func, sel_func)
try: first = iid.next()
except StopIteration: return # no files inside; skip dsrp
yield dsrpath
yield first
for dsrp in iid: yield dsrp
else: assert 0, "Invalid selection result %s" % (str(s),)
def iterate_in_dir(self, dsrpath, rec_func, sel_func):
"""Iterate the dsrps in directory dsrpath."""
def error_handler(exc, filename):
Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
return None
if self.quoting_on:
for subdir in FilenameMapping.get_quoted_dir_children(dsrpath):
for dsrp in rec_func(subdir, rec_func, sel_func):
yield dsrp
else:
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(
error_handler, dsrpath.append, [filename])
if new_dsrp:
for dsrp in rec_func(new_dsrp, rec_func, sel_func):
yield dsrp
def iterate_starting_from(self, dsrpath, rec_func, sel_func):
"""Like Iterate, but only yield indicies > self.starting_index"""
if dsrpath.index > self.starting_index: # past starting_index
for dsrp in self.Iterate(dsrpath, self.Iterate, sel_func):
yield dsrp
elif (dsrpath.index == self.starting_index[:len(dsrpath.index)]
and dsrpath.isdir()):
# May encounter starting index on this branch
if self.iterate_parents: yield dsrpath
for dsrp in self.iterate_in_dir(dsrpath,
self.iterate_starting_from,
sel_func): yield dsrp
def iterate_with_finalizer(self):
"""Like Iterate, but missing some options, and add finalizer"""
finalize = IterTreeReducer(DestructiveSteppingFinalizer, ())
for dsrp in self:
yield dsrp
finalize(dsrp.index, dsrp)
finalize.Finish()
def Select(self, dsrp):
"""Run through the selection functions and return dominant val 0/1/2"""
for sf in self.selection_functions:
result = sf(dsrp)
if result is not None: return result
return 1
def ParseArgs(self, argtuples, filelists):
"""Create selection functions based on list of tuples
The tuples have the form (option string, additional argument)
and are created when the initial commandline arguments are
read. The reason for the extra level of processing is that
the filelists may only be openable by the main connection, but
the selection functions need to be on the backup reader or
writer side. When the initial arguments are parsed the right
information is sent over the link.
"""
filelists_index = 0
try:
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf(0))
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(
filelists[filelists_index], 0, arg))
filelists_index += 1
elif opt == "--exclude-globbing-filelist":
map(self.add_selection_func,
self.filelist_globbing_get_sfs(
filelists[filelists_index], 0, arg))
filelists_index += 1
elif opt == "--exclude-other-filesystems":
self.add_selection_func(self.other_filesystems_get_sf(0))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--exclude-special-files":
self.add_selection_func(self.special_get_sf(0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(
filelists[filelists_index], 1, arg))
filelists_index += 1
elif opt == "--include-globbing-filelist":
map(self.add_selection_func,
self.filelist_globbing_get_sfs(
filelists[filelists_index], 1, arg))
filelists_index += 1
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad selection option %s" % opt
except IOError: pass#SelectError, e: self.parse_catch_error(e)
assert filelists_index == len(filelists)
self.parse_last_excludes()
self.parse_rbdir_exclude()
def parse_catch_error(self, exc):
"""Deal with selection error exc"""
if isinstance(exc, FilePrefixError):
Log.FatalError(
"""Fatal Error: The file specification
' %s'
cannot match any files in the base directory
' %s'
Useful file specifications begin with the base directory or some
pattern (such as '**') which matches the base directory.""" %
(exc, self.prefix))
elif isinstance(e, GlobbingError):
Log.FatalError("Fatal Error while processing expression\n"
"%s" % exc)
else: raise
def parse_rbdir_exclude(self):
"""Add exclusion of rdiff-backup-data dir to front of list"""
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
def parse_last_excludes(self):
"""Exit with error if last selection function isn't an exclude"""
if (self.selection_functions and
not self.selection_functions[-1].exclude):
Log.FatalError(
"""Last selection expression:
%s
only specifies that files be included. Because the default is to
include all files, the expression is redundant. Exiting because this
probably isn't what you meant.""" %
(self.selection_functions[-1].name,))
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
else: self.selection_functions.append(sel_func)
def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
"""Return selection function by reading list of files
The format of the filelist is documented in the man page.
filelist_fp should be an (open) file object.
inc_default should be true if this is an include list,
false for an exclude list.
filelist_name is just a string used for logging.
"""
Log("Reading filelist %s" % filelist_name, 4)
tuple_list, something_excluded = \
self.filelist_read(filelist_fp, inc_default, filelist_name)
Log("Sorting filelist %s" % filelist_name, 4)
tuple_list.sort()
i = [0] # We have to put index in list because of stupid scoping rules
def selection_function(dsrp):
while 1:
if i[0] >= len(tuple_list): return None
include, move_on = \
self.filelist_pair_match(dsrp, tuple_list[i[0]])
if move_on:
i[0] += 1
if include is None: continue # later line may match
return include
selection_function.exclude = something_excluded or inc_default == 0
selection_function.name = "Filelist: " + filelist_name
return selection_function
def filelist_read(self, filelist_fp, include, filelist_name):
"""Read filelist from fp, return (tuplelist, something_excluded)"""
prefix_warnings = [0]
def incr_warnings(exc):
"""Warn if prefix is incorrect"""
prefix_warnings[0] += 1
if prefix_warnings[0] < 6:
Log("Warning: file specification '%s' in filelist %s\n"
"doesn't start with correct prefix %s. Ignoring." %
(exc, filelist_name, self.prefix), 2)
if prefix_warnings[0] == 5:
Log("Future prefix errors will not be logged.", 2)
something_excluded, tuple_list = None, []
separator = Globals.null_separator and "\0" or "\n"
for line in filelist_fp.read().split(separator):
if not line: continue # skip blanks
try: tuple = self.filelist_parse_line(line, include)
except FilePrefixError, exc:
incr_warnings(exc)
continue
tuple_list.append(tuple)
if not tuple[1]: something_excluded = 1
if filelist_fp.close():
Log("Error closing filelist %s" % filelist_name, 2)
return (tuple_list, something_excluded)
def filelist_parse_line(self, line, include):
"""Parse a single line of a filelist, returning a pair
pair will be of form (index, include), where index is another
tuple, and include is 1 if the line specifies that we are
including a file. The default is given as an argument.
prefix is the string that the index is relative to.
"""
if line[:2] == "+ ": # Check for "+ "/"- " syntax
include = 1
line = line[2:]
elif line[:2] == "- ":
include = 0
line = line[2:]
if not line.startswith(self.prefix): raise FilePrefixError(line)
line = line[len(self.prefix):] # Discard prefix
index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
return (index, include)
def filelist_pair_match(self, dsrp, pair):
"""Matches a filelist tuple against a dsrp
Returns a pair (include, move_on). include is None if the
tuple doesn't match either way, and 0/1 if the tuple excludes
or includes the dsrp.
move_on is true if the tuple cannot match a later index, and
so we should move on to the next tuple in the index.
"""
index, include = pair
if include == 1:
if index < dsrp.index: return (None, 1)
if index == dsrp.index: return (1, 1)
elif index[:len(dsrp.index)] == dsrp.index:
return (1, None) # /foo/bar implicitly includes /foo
else: return (None, None) # dsrp greater, not initial sequence
elif include == 0:
if dsrp.index[:len(index)] == index:
return (0, None) # /foo implicitly excludes /foo/bar
elif index < dsrp.index: return (None, 1)
else: return (None, None) # dsrp greater, not initial sequence
else: assert 0, "Include is %s, should be 0 or 1" % (include,)
def filelist_globbing_get_sfs(self, filelist_fp, inc_default, list_name):
"""Return list of selection functions by reading fileobj
filelist_fp should be an open file object
inc_default is true iff this is an include list
list_name is just the name of the list, used for logging
See the man page on --[include/exclude]-globbing-filelist
"""
Log("Reading globbing filelist %s" % list_name, 4)
separator = Globals.null_separator and "\0" or "\n"
for line in filelist_fp.read().split(separator):
if not line: continue # skip blanks
if line[:2] == "+ ": yield self.glob_get_sf(line[2:], 1)
elif line[:2] == "- ": yield self.glob_get_sf(line[2:], 0)
else: yield self.glob_get_sf(line, inc_default)
def other_filesystems_get_sf(self, include):
"""Return selection function matching files on other filesystems"""
assert include == 0 or include == 1
root_devloc = self.dsrpath.getdevloc()
def sel_func(dsrp):
if dsrp.getdevloc() == root_devloc: return None
else: return include
sel_func.exclude = not include
sel_func.name = "Match other filesystems"
return sel_func
def regexp_get_sf(self, regexp_string, include):
"""Return selection function given by regexp_string"""
assert include == 0 or include == 1
try: regexp = re.compile(regexp_string)
except:
Log("Error compiling regular expression %s" % regexp_string, 1)
raise
def sel_func(dsrp):
if regexp.search(dsrp.path): return include
else: return None
sel_func.exclude = not include
sel_func.name = "Regular expression: %s" % regexp_string
return sel_func
def devfiles_get_sf(self, include):
"""Return a selection function matching all dev files"""
if self.selection_functions:
Log("Warning: exclude-device-files is not the first "
"selector.\nThis may not be what you intended", 3)
def sel_func(dsrp):
if dsrp.isdev(): return include
else: return None
sel_func.exclude = not include
sel_func.name = (include and "include" or "exclude") + " device files"
return sel_func
def special_get_sf(self, include):
"""Return sel function matching sockets, symlinks, sockets, devs"""
if self.selection_functions:
Log("Warning: exclude-special-files is not the first "
"selector.\nThis may not be what you intended", 3)
def sel_func(dsrp):
if dsrp.issym() or dsrp.issock() or dsrp.isfifo() or dsrp.isdev():
return include
else: return None
sel_func.exclude = not include
sel_func.name = (include and "include" or "exclude") + " special files"
return sel_func
def glob_get_sf(self, glob_str, include):
"""Return selection function given by glob string"""
assert include == 0 or include == 1
if glob_str == "**": sel_func = lambda dsrp: include
elif not self.glob_re.match(glob_str): # normal file
sel_func = self.glob_get_filename_sf(glob_str, include)
else: sel_func = self.glob_get_normal_sf(glob_str, include)
sel_func.exclude = not include
sel_func.name = "Command-line %s glob: %s" % \
(include and "include" or "exclude", glob_str)
return sel_func
def glob_get_filename_sf(self, filename, include):
"""Get a selection function given a normal filename
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
globbing is things are a lot less complicated if no special
globbing characters are used.
"""
if not filename.startswith(self.prefix):
raise FilePrefixError(filename)
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
"""Return selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
return 1 # /foo/bar implicitly matches /foo, vice-versa
else: return None
def exclude_sel_func(dsrp):
if dsrp.index[:len(tuple)] == tuple:
return 0 # /foo excludes /foo/bar, not vice-versa
else: return None
if include == 1: sel_func = include_sel_func
elif include == 0: sel_func = exclude_sel_func
sel_func.exclude = not include
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
def glob_get_normal_sf(self, glob_str, include):
"""Return selection function based on glob_str
The basic idea is to turn glob_str into a regular expression,
and just use the normal regular expression. There is a
complication because the selection function should return '2'
(scan) for directories which may contain a file which matches
the glob_str. So we break up the glob string into parts, and
any file which matches an initial sequence of glob parts gets
scanned.
Thanks to Donovan Baarda who provided some code which did some
things similar to this.
"""
if glob_str.lower().startswith("ignorecase:"):
re_comp = lambda r: re.compile(r, re.I | re.S)
glob_str = glob_str[len("ignorecase:"):]
else: re_comp = lambda r: re.compile(r, re.S)
# matches what glob matches and any files in directory
glob_comp_re = re_comp("^%s($|/)" % self.glob_to_re(glob_str))
if glob_str.find("**") != -1:
glob_str = glob_str[:glob_str.find("**")+2] # truncate after **
scan_comp_re = re_comp("^(%s)$" %
"|".join(self.glob_get_prefix_res(glob_str)))
def include_sel_func(dsrp):
if glob_comp_re.match(dsrp.path): return 1
elif scan_comp_re.match(dsrp.path): return 2
else: return None
def exclude_sel_func(dsrp):
if glob_comp_re.match(dsrp.path): return 0
else: return None
# Check to make sure prefix is ok
if not include_sel_func(self.dsrpath): raise FilePrefixError(glob_str)
if include: return include_sel_func
else: return exclude_sel_func
def glob_get_prefix_res(self, glob_str):
"""Return list of regexps equivalent to prefixes of glob_str"""
glob_parts = glob_str.split("/")
if "" in glob_parts[1:-1]: # "" OK if comes first or last, as in /foo/
raise GlobbingError("Consecutive '/'s found in globbing string "
+ glob_str)
prefixes = map(lambda i: "/".join(glob_parts[:i+1]),
range(len(glob_parts)))
# we must make exception for root "/", only dir to end in slash
if prefixes[0] == "": prefixes[0] = "/"
return map(self.glob_to_re, prefixes)
def glob_to_re(self, pat):
"""Returned regular expression equivalent to shell glob pat
Currently only the ?, *, [], and ** expressions are supported.
Ranges like [a-z] are also currently unsupported. There is no
way to quote these special characters.
This function taken with minor modifications from efnmatch.py
by Donovan Baarda.
"""
i, n, res = 0, len(pat), ''
while i < n:
c, s = pat[i], pat[i:i+2]
i = i+1
if s == '**':
res = res + '.*'
i = i + 1
elif c == '*': res = res + '[^/]*'
elif c == '?': res = res + '[^/]'
elif c == '[':
j = i
if j < n and pat[j] in '!^': j = j+1
if j < n and pat[j] == ']': j = j+1
while j < n and pat[j] != ']': j = j+1
if j >= n: res = res + '\\[' # interpret the [ literally
else: # Deal with inside of [..]
stuff = pat[i:j].replace('\\','\\\\')
i = j+1
if stuff[0] in '!^': stuff = '^' + stuff[1:]
res = res + '[' + stuff + ']'
else: res = res + re.escape(c)
return res
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""MakeStatic and MakeClass
These functions are used to make all the instance methods in a class
into static or class methods.
"""
class StaticMethodsError(Exception): pass
def MakeStatic(cls):
"""turn instance methods into static ones
The methods (that don't begin with _) of any class that
subclasses this will be turned into static methods.
"""
for name in dir(cls):
if name[0] != "_":
cls.__dict__[name] = staticmethod(cls.__dict__[name])
def MakeClass(cls):
"""Turn instance methods into classmethods. Ignore _ like above"""
for name in dir(cls):
if name[0] != "_":
cls.__dict__[name] = classmethod(cls.__dict__[name])
# Copyright 2002 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Generate and process aggregated backup information"""
from lazy import *
import re
class StatsException(Exception): pass
class StatsObj:
"""Contains various statistics, provide string conversion functions"""
# used when quoting files in get_stats_line
space_regex = re.compile(" ")
stat_file_attrs = ('SourceFiles', 'SourceFileSize',
'MirrorFiles', 'MirrorFileSize',
'NewFiles', 'NewFileSize',
'DeletedFiles', 'DeletedFileSize',
'ChangedFiles',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFiles', 'IncrementFileSize')
stat_misc_attrs = ('Errors', 'TotalDestinationSizeChange')
stat_time_attrs = ('StartTime', 'EndTime', 'ElapsedTime')
stat_attrs = (('Filename',) + stat_time_attrs +
stat_misc_attrs + stat_file_attrs)
# Below, the second value in each pair is true iff the value
# indicates a number of bytes
stat_file_pairs = (('SourceFiles', None), ('SourceFileSize', 1),
('MirrorFiles', None), ('MirrorFileSize', 1),
('NewFiles', None), ('NewFileSize', 1),
('DeletedFiles', None), ('DeletedFileSize', 1),
('ChangedFiles', None),
('ChangedSourceSize', 1), ('ChangedMirrorSize', 1),
('IncrementFiles', None), ('IncrementFileSize', 1))
# This is used in get_byte_summary_string below
byte_abbrev_list = ((1024*1024*1024*1024, "TB"),
(1024*1024*1024, "GB"),
(1024*1024, "MB"),
(1024, "KB"))
def __init__(self):
"""Set attributes to None"""
for attr in self.stat_attrs: self.__dict__[attr] = None
def get_stat(self, attribute):
"""Get a statistic"""
return self.__dict__[attribute]
def set_stat(self, attr, value):
"""Set attribute to given value"""
self.__dict__[attr] = value
def increment_stat(self, attr):
"""Add 1 to value of attribute"""
self.__dict__[attr] += 1
def get_total_dest_size_change(self):
"""Return total destination size change
This represents the total change in the size of the
rdiff-backup destination directory.
"""
addvals = [self.NewFileSize, self.ChangedSourceSize,
self.IncrementFileSize]
subtractvals = [self.DeletedFileSize, self.ChangedMirrorSize]
for val in addvals + subtractvals:
if val is None:
result = None
break
else:
def addlist(l): return reduce(lambda x,y: x+y, l)
result = addlist(addvals) - addlist(subtractvals)
self.TotalDestinationSizeChange = result
return result
def get_stats_line(self, index, use_repr = 1):
"""Return one line abbreviated version of full stats string"""
file_attrs = map(lambda attr: str(self.get_stat(attr)),
self.stat_file_attrs)
if not index: filename = "."
else:
filename = apply(os.path.join, index)
if use_repr:
# use repr to quote newlines in relative filename, then
# take of leading and trailing quote and quote spaces.
filename = self.space_regex.sub("\\x20", repr(filename)[1:-1])
return " ".join([filename,] + file_attrs)
def set_stats_from_line(self, line):
"""Set statistics from given line"""
def error(): raise StatsException("Bad line '%s'" % line)
if line[-1] == "\n": line = line[:-1]
lineparts = line.split(" ")
if len(lineparts) < len(stat_file_attrs): error()
for attr, val_string in zip(stat_file_attrs,
lineparts[-len(stat_file_attrs):]):
try: val = long(val_string)
except ValueError:
try: val = float(val_string)
except ValueError: error()
self.set_stat(attr, val)
return self
def get_stats_string(self):
"""Return extended string printing out statistics"""
return "%s%s%s" % (self.get_timestats_string(),
self.get_filestats_string(),
self.get_miscstats_string())
def get_timestats_string(self):
"""Return portion of statistics string dealing with time"""
timelist = []
if self.StartTime is not None:
timelist.append("StartTime %.2f (%s)\n" %
(self.StartTime, Time.timetopretty(self.StartTime)))
if self.EndTime is not None:
timelist.append("EndTime %.2f (%s)\n" %
(self.EndTime, Time.timetopretty(self.EndTime)))
if self.ElapsedTime or (self.StartTime is not None and
self.EndTime is not None):
if self.ElapsedTime is None:
self.ElapsedTime = self.EndTime - self.StartTime
timelist.append("ElapsedTime %.2f (%s)\n" %
(self.ElapsedTime, Time.inttopretty(self.ElapsedTime)))
return "".join(timelist)
def get_filestats_string(self):
"""Return portion of statistics string about files and bytes"""
def fileline(stat_file_pair):
"""Return zero or one line of the string"""
attr, in_bytes = stat_file_pair
val = self.get_stat(attr)
if val is None: return ""
if in_bytes:
return "%s %s (%s)\n" % (attr, val,
self.get_byte_summary_string(val))
else: return "%s %s\n" % (attr, val)
return "".join(map(fileline, self.stat_file_pairs))
def get_miscstats_string(self):
"""Return portion of extended stat string about misc attributes"""
misc_string = ""
tdsc = self.get_total_dest_size_change()
if tdsc is not None:
misc_string += ("TotalDestinationSizeChange %s (%s)\n" %
(tdsc, self.get_byte_summary_string(tdsc)))
if self.Errors is not None: misc_string += "Errors %d\n" % self.Errors
return misc_string
def get_byte_summary_string(self, byte_count):
"""Turn byte count into human readable string like "7.23GB" """
if byte_count < 0:
sign = "-"
byte_count = -byte_count
else: sign = ""
for abbrev_bytes, abbrev_string in self.byte_abbrev_list:
if byte_count >= abbrev_bytes:
# Now get 3 significant figures
abbrev_count = float(byte_count)/abbrev_bytes
if abbrev_count >= 100: precision = 0
elif abbrev_count >= 10: precision = 1
else: precision = 2
return "%s%%.%df %s" % (sign, precision, abbrev_string) \
% (abbrev_count,)
byte_count = round(byte_count)
if byte_count == 1: return sign + "1 byte"
else: return "%s%d bytes" % (sign, byte_count)
def get_stats_logstring(self, title):
"""Like get_stats_string, but add header and footer"""
header = "--------------[ %s ]--------------" % title
footer = "-" * len(header)
return "%s\n%s%s\n" % (header, self.get_stats_string(), footer)
def set_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
for line in s.split("\n"):
if not line: continue
line_parts = line.split()
if len(line_parts) < 2: error(line)
attr, value_string = line_parts[:2]
if not attr in self.stat_attrs: error(line)
try:
try: val1 = long(value_string)
except ValueError: val1 = None
val2 = float(value_string)
if val1 == val2: self.set_stat(attr, val1) # use integer val
else: self.set_stat(attr, val2) # use float
except ValueError: error(line)
return self
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
tf = TempFileManager.new(rp)
def init_thunk():
fp = tf.open("w")
fp.write(self.get_stats_string())
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp = rp.open("r")
self.set_stats_from_string(fp.read())
fp.close()
return self
def stats_equal(self, s):
"""Return true if s has same statistics as self"""
assert isinstance(s, StatsObj)
for attr in self.stat_file_attrs:
if self.get_stat(attr) != s.get_stat(attr): return None
return 1
def set_to_average(self, statobj_list):
"""Set self's attributes to average of those in statobj_list"""
for attr in self.stat_attrs: self.set_stat(attr, 0)
for statobj in statobj_list:
for attr in self.stat_attrs:
if statobj.get_stat(attr) is None: self.set_stat(attr, None)
elif self.get_stat(attr) is not None:
self.set_stat(attr, statobj.get_stat(attr) +
self.get_stat(attr))
# Don't compute average starting/stopping time
self.StartTime = None
self.EndTime = None
for attr in self.stat_attrs:
if self.get_stat(attr) is not None:
self.set_stat(attr,
self.get_stat(attr)/float(len(statobj_list)))
return self
def get_statsobj_copy(self):
"""Return new StatsObj object with same stats as self"""
s = StatObj()
for attr in self.stat_attrs: s.set_stat(attr, self.get_stat(attr))
return s
class StatsITRB(ITRBranch, StatsObj):
"""Keep track of per directory statistics
This is subclassed by the mirroring and incrementing ITRs.
"""
def __init__(self):
"""StatsITR initializer - zero out statistics"""
attr_dict = self.__dict__
for attr in StatsObj.stat_file_attrs: attr_dict[attr] = 0
self.ElapsedTime = self.Filename = None
def start_stats(self, mirror_dsrp):
"""Record status of mirror dsrp
This is called before the mirror is processed so we remember
the old state.
"""
if mirror_dsrp.lstat():
self.mirror_base_exists = 1
self.mirror_base_size = self.stats_getsize(mirror_dsrp)
else: self.mirror_base_exists = None
def stats_getsize(self, rp):
"""Return size of rp, with error checking"""
try: return rp.getsize()
except KeyError: return 0
def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None):
"""Set various statistics after mirror processed"""
if mirror_dsrp.lstat():
source_size = self.stats_getsize(mirror_dsrp)
self.SourceFiles += 1
self.SourceFileSize += source_size
if self.mirror_base_exists:
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
if diff_rorp: # otherwise no change
self.ChangedFiles += 1
self.ChangedSourceSize += source_size
self.ChangedMirrorSize += self.mirror_base_size
self.stats_incr_incfiles(inc_rp)
else: # new file was created
self.NewFiles += 1
self.NewFileSize += source_size
self.stats_incr_incfiles(inc_rp)
else:
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.stats_incr_incfiles(inc_rp)
def fast_process(self, mirror_rorp):
"""Use when there is no change from source to mirror"""
source_size = self.stats_getsize(mirror_rorp)
self.SourceFiles += 1
self.MirrorFiles += 1
self.SourceFileSize += source_size
self.MirrorFileSize += source_size
def stats_incr_incfiles(self, inc_rp):
"""Increment IncrementFile statistics"""
if inc_rp:
self.IncrementFiles += 1
self.IncrementFileSize += self.stats_getsize(inc_rp)
def add_file_stats(self, branch):
"""Add all file statistics from branch to current totals"""
for attr in self.stat_file_attrs:
self.__dict__[attr] += branch.__dict__[attr]
from log import *
from increment import *
from robust import *
import Globals
No changes in 10000 files:
Wed May 15 23:54:53 2002 profile-output
1805774 function calls (1480846 primitive calls) in 45.710 CPU seconds
Ordered by: internal time
List reduced from 259 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
151824/30615 5.620 0.000 18.810 0.001 lazy.py:251(__call__)
91212/30612 4.060 0.000 17.300 0.001 lazy.py:222(process_w_subinstance)
31006 2.630 0.000 4.650 0.000 rpath.py:448(setdata)
101629/20412 1.890 0.000 14.690 0.001 selection.py:96(Iterate)
61218/20410 1.880 0.000 14.370 0.001 selection.py:122(iterate_in_dir)
10206 1.400 0.000 44.620 0.004 highlevel.py:245(error_checked)
30618/10206 1.350 0.000 27.650 0.003 rorpiter.py:124(Collate2Iters)
486 1.280 0.003 19.690 0.041 highlevel.py:165(generate_dissimilar)
30748 1.270 0.000 6.640 0.000 rpath.py:408(__init__)
31006 1.260 0.000 1.260 0.000 rpath.py:182(tupled_lstat)
121209 1.140 0.000 1.140 0.000 lazy.py:214(intree)
20411 1.110 0.000 9.010 0.000 destructive_stepping.py:33(__init__)
10205 1.050 0.000 1.050 0.000 rpath.py:430(__str__)
30612 0.860 0.000 1.410 0.000 lazy.py:218(set_subinstance)
20411 0.840 0.000 2.830 0.000 destructive_stepping.py:77(set_init_perms)
486 0.830 0.002 26.900 0.055 highlevel.py:98(diffs)
30771 0.780 0.000 0.780 0.000 /usr/lib/python2.2/posixpath.py:44(join)
10205 0.720 0.000 3.870 0.000 increment.py:138(start_process)
91845 0.710 0.000 0.710 0.000 rorpiter.py:246(__getitem__)
20616 0.690 0.000 0.930 0.000 rpath.py:593(isowner)
Wed May 15 23:54:53 2002 profile-output
1805774 function calls (1480846 primitive calls) in 45.710 CPU seconds
Ordered by: cumulative time
List reduced from 259 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.010 0.010 45.710 45.710 profile:0(Globals.Main.Main(['--no-resume', 'manyfiles/', 'out']))
1 0.000 0.000 45.700 45.700 main.py:188(Main)
1 0.000 0.000 45.700 45.700 <string>:1(?)
1 0.000 0.000 45.670 45.670 main.py:169(take_action)
1 0.000 0.000 45.670 45.670 main.py:221(Backup)
1 0.000 0.000 45.650 45.650 highlevel.py:50(Mirror_and_increment)
1 0.240 0.240 45.650 45.650 highlevel.py:239(patch_increment_and_finalize)
10208 0.270 0.000 44.890 0.004 highlevel.py:268(check_skip_error)
10206 1.400 0.000 44.620 0.004 highlevel.py:245(error_checked)
30618/10206 1.350 0.000 27.650 0.003 rorpiter.py:124(Collate2Iters)
486 0.830 0.002 26.900 0.055 highlevel.py:98(diffs)
486 0.020 0.000 19.720 0.041 rorpiter.py:58(Signatures)
486 1.280 0.003 19.690 0.041 highlevel.py:165(generate_dissimilar)
151824/30615 5.620 0.000 18.810 0.001 lazy.py:251(__call__)
91212/30612 4.060 0.000 17.300 0.001 lazy.py:222(process_w_subinstance)
20412 0.520 0.000 14.900 0.001 lazy.py:166(yielda)
101629/20412 1.890 0.000 14.690 0.001 selection.py:96(Iterate)
61218/20410 1.880 0.000 14.370 0.001 selection.py:122(iterate_in_dir)
41229 0.660 0.000 11.180 0.000 robust.py:194(check_common_error)
20409 0.310 0.000 9.810 0.000 selection.py:130(<lambda>)
----------------------------------------------------------------------------
Writing 10000 new small files
Thu May 16 00:05:36 2002 profile-output
2452569 function calls (2249461 primitive calls) in 75.680 CPU seconds
Ordered by: internal time
List reduced from 236 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
71752 4.580 0.000 4.580 0.000 rpath.py:182(tupled_lstat)
71752 4.170 0.000 9.860 0.000 rpath.py:448(setdata)
101216/20410 3.550 0.000 11.070 0.001 lazy.py:251(__call__)
60808/20408 2.690 0.000 9.980 0.000 lazy.py:222(process_w_subinstance)
10206 2.460 0.000 74.630 0.007 highlevel.py:217(error_checked)
20012 2.080 0.000 2.820 0.000 rpath.py:674(open)
40819 2.050 0.000 13.440 0.000 destructive_stepping.py:33(__init__)
10228 2.050 0.000 5.030 0.000 rpath.py:625(normalize)
10000 1.980 0.000 8.060 0.001 rpath.py:684(write_from_fileobj)
51086 1.970 0.000 9.210 0.000 rpath.py:408(__init__)
143692 1.640 0.000 1.640 0.000 connection.py:40(__getattr__)
10206 1.610 0.000 8.920 0.001 highlevel.py:165(generate_dissimilar)
30618/10206 1.550 0.000 29.540 0.003 rorpiter.py:124(Collate2Iters)
10205 1.520 0.000 1.520 0.000 rpath.py:227(__str__)
20410 1.450 0.000 1.710 0.000 rpath.py:525(setmtime)
10226 1.380 0.000 1.670 0.000 rpath.py:163(rename)
20000 1.380 0.000 1.380 0.000 rpath.py:772(read)
10206 1.350 0.000 3.490 0.000 rpath.py:109(copy_attribs)
40819 1.290 0.000 3.100 0.000 destructive_stepping.py:77(set_init_perms)
50817/10208 1.250 0.000 7.890 0.001 selection.py:96(Iterate)
Thu May 16 00:05:36 2002 profile-output
2452569 function calls (2249461 primitive calls) in 75.680 CPU seconds
Ordered by: cumulative time
List reduced from 236 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 75.680 75.680 profile:0(Globals.Main.Main(['manyfiles/', 'out']))
1 0.000 0.000 75.680 75.680 <string>:1(?)
1 0.000 0.000 75.680 75.680 main.py:188(Main)
1 0.000 0.000 75.670 75.670 main.py:221(Backup)
1 0.000 0.000 75.670 75.670 main.py:169(take_action)
1 0.000 0.000 75.650 75.650 highlevel.py:27(Mirror)
1 0.450 0.450 75.650 75.650 highlevel.py:210(patch_and_finalize)
10206 0.190 0.000 74.820 0.007 highlevel.py:268(check_skip_error)
10206 2.460 0.000 74.630 0.007 highlevel.py:217(error_checked)
30618/10206 1.550 0.000 29.540 0.003 rorpiter.py:124(Collate2Iters)
10206 1.060 0.000 29.020 0.003 highlevel.py:98(diffs)
10216 0.270 0.000 28.800 0.003 robust.py:54(execute)
10205 1.140 0.000 22.760 0.002 robust.py:148(init)
40819 2.050 0.000 13.440 0.000 destructive_stepping.py:33(__init__)
101216/20410 3.550 0.000 11.070 0.001 lazy.py:251(__call__)
60808/20408 2.690 0.000 9.980 0.000 lazy.py:222(process_w_subinstance)
10206 0.570 0.000 9.860 0.001 rorpiter.py:58(Signatures)
71752 4.170 0.000 9.860 0.000 rpath.py:448(setdata)
10226 0.540 0.000 9.770 0.001 robust.py:240(new)
51086 1.970 0.000 9.210 0.000 rpath.py:408(__init__)
----------------------------------------------------------------------------
Nothing changed, source side:
ncalls tottime percall cumtime percall filename:lineno(function)
50608/10205 1.970 0.000 5.420 0.001 lazy.py:251(__call__)
10208/1 1.260 0.000 17.740 17.740 iterfile.py:166(addtobuffer)
30404/10204 1.240 0.000 5.010 0.000 lazy.py:222(process_w_subinstance)
1 1.180 1.180 17.740 17.740 highlevel.py:98(diffs)
10221 1.050 0.000 1.930 0.000 rpath.py:448(setdata)
50814/10206 0.880 0.000 7.610 0.001 selection.py:96(Iterate)
30609/10205 0.780 0.000 7.420 0.001 selection.py:122(iterate_in_dir)
10205 0.600 0.000 0.600 0.000 iterfile.py:197(_l2s)
10204 0.600 0.000 0.700 0.000 lazy.py:218(set_subinstance)
10215 0.550 0.000 2.760 0.000 rpath.py:408(__init__)
10206 0.470 0.000 4.900 0.000 destructive_stepping.py:33(__init__)
10206 0.430 0.000 1.570 0.000 destructive_stepping.py:77(set_init_perms)
10411 0.430 0.000 0.610 0.000 rpath.py:593(isowner)
10206 0.390 0.000 0.390 0.000 lazy.py:177(yieldb)
10206 0.380 0.000 10.860 0.001 rorpiter.py:124(Collate2Iters)
10208 0.370 0.000 0.370 0.000 rpath.py:182(tupled_lstat)
20614 0.360 0.000 6.010 0.000 robust.py:194(check_common_error)
40403 0.330 0.000 0.330 0.000 lazy.py:214(intree)
10205 0.320 0.000 0.980 0.000 rpath.py:581(readable)
10205 0.320 0.000 0.790 0.000 destructive_stepping.py:206(end_process)
Thu May 16 00:21:24 2002 profile-output
646330 function calls (503964 primitive calls) in 17.820 CPU seconds
Ordered by: cumulative time
List reduced from 213 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 17.820 17.820 <string>:1(?)
1 0.000 0.000 17.820 17.820 profile:0(Globals.Main.Main(['--remote-schema', '%s --server', 'manyfiles/', './rdiff-backup::out']))
1 0.000 0.000 17.820 17.820 main.py:188(Main)
1 0.000 0.000 17.770 17.770 main.py:221(Backup)
1 0.000 0.000 17.770 17.770 main.py:169(take_action)
47/46 0.000 0.000 17.750 0.386 connection.py:425(__call__)
1 0.000 0.000 17.750 17.750 highlevel.py:50(Mirror_and_increment)
47/46 0.000 0.000 17.750 0.386 connection.py:352(reval)
22/1 0.000 0.000 17.740 17.740 connection.py:448(readfromid)
10208/1 1.260 0.000 17.740 17.740 iterfile.py:166(addtobuffer)
22/1 0.140 0.006 17.740 17.740 iterfile.py:153(read)
22/1 0.030 0.001 17.740 17.740 connection.py:326(answer_request)
47/46 0.000 0.000 17.740 0.386 connection.py:307(get_response)
10207/1 0.130 0.000 17.740 17.740 rorpiter.py:23(ToRaw)
1 1.180 1.180 17.740 17.740 highlevel.py:98(diffs)
10206 0.380 0.000 10.860 0.001 rorpiter.py:124(Collate2Iters)
1 0.000 0.000 10.050 10.050 connection.py:480(read)
1 0.000 0.000 10.050 10.050 iterfile.py:221(read)
1 0.000 0.000 10.050 10.050 rorpiter.py:31(FromRaw)
1 0.000 0.000 10.050 10.050 iterfile.py:23(_get)
----------------------------------------------------------------------------
Nothing changed, destination side:
Thu May 16 00:27:58 2002 profile-output
2769004 function calls (2586001 primitive calls) in 113.720 CPU seconds
Ordered by: internal time
List reduced from 325 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
80820/60409 12.080 0.000 26.640 0.000 iterfile.py:221(read)
92424 9.240 0.000 9.240 0.000 rpath.py:182(tupled_lstat)
92426 6.060 0.000 16.750 0.000 rpath.py:448(setdata)
10243 5.360 0.001 10.890 0.001 rpath.py:625(normalize)
40411/30205 4.010 0.000 31.240 0.001 iterfile.py:23(_get)
101216/20410 3.810 0.000 73.120 0.004 lazy.py:251(__call__)
60808/20408 3.300 0.000 72.140 0.004 lazy.py:222(process_w_subinstance)
61337 2.770 0.000 15.330 0.000 rpath.py:408(__init__)
40409 2.630 0.000 2.630 0.000 iterfile.py:16(_s2l)
10000 2.220 0.000 13.140 0.001 rpath.py:684(write_from_fileobj)
10206 2.200 0.000 112.620 0.011 highlevel.py:245(error_checked)
20411 1.940 0.000 2.260 0.000 rpath.py:525(setmtime)
154184 1.910 0.000 1.910 0.000 connection.py:40(__getattr__)
10204 1.890 0.000 57.280 0.006 increment.py:211(init_non_dir)
10207 1.820 0.000 4.090 0.000 rpath.py:568(touch)
30615 1.730 0.000 13.490 0.000 destructive_stepping.py:33(__init__)
71585 1.690 0.000 1.690 0.000 /usr/lib/python2.2/posixpath.py:44(join)
10233 1.440 0.000 1.730 0.000 rpath.py:163(rename)
10204 1.400 0.000 1.400 0.000 rpath.py:227(__str__)
10206 1.330 0.000 3.900 0.000 rpath.py:109(copy_attribs)
Thu May 16 00:27:58 2002 profile-output
2769004 function calls (2586001 primitive calls) in 113.720 CPU seconds
Ordered by: cumulative time
List reduced from 325 to 20 due to restriction <20>
ncalls tottime percall cumtime percall filename:lineno(function)
1 0.000 0.000 113.720 113.720 <string>:1(?)
1 0.000 0.000 113.720 113.720 profile:0(Globals.Main.Main(['--remote-schema', '%s --server', './rdiff-backup::manyfiles/', 'out']))
1 0.000 0.000 113.720 113.720 main.py:188(Main)
1 0.000 0.000 113.690 113.690 main.py:169(take_action)
1 0.000 0.000 113.690 113.690 main.py:221(Backup)
1 0.010 0.010 113.660 113.660 highlevel.py:50(Mirror_and_increment)
1 0.300 0.300 113.650 113.650 highlevel.py:239(patch_increment_and_finalize)
10208 0.300 0.000 112.940 0.011 highlevel.py:268(check_skip_error)
10206 2.200 0.000 112.620 0.011 highlevel.py:245(error_checked)
101216/20410 3.810 0.000 73.120 0.004 lazy.py:251(__call__)
60808/20408 3.300 0.000 72.140 0.004 lazy.py:222(process_w_subinstance)
10205 0.780 0.000 61.080 0.006 increment.py:138(start_process)
10204 1.890 0.000 57.280 0.006 increment.py:211(init_non_dir)
10220 0.520 0.000 45.290 0.004 robust.py:54(execute)
10218 0.420 0.000 34.170 0.003 robust.py:76(init)
10204 1.090 0.000 33.700 0.003 robust.py:148(init)
40411/30205 4.010 0.000 31.240 0.001 iterfile.py:23(_get)
20412/10206 0.800 0.000 30.020 0.003 rorpiter.py:124(Collate2Iters)
20411/10205 1.300 0.000 29.480 0.003 rorpiter.py:31(FromRaw)
30411/20205 0.890 0.000 27.950 0.001 iterfile.py:54(next)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment