Commit e7252d6d authored by ben's avatar ben

Various changes to switch to new include/exclude syntax


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@39 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent f7a6f269
#!/usr/bin/env python #!/usr/bin/env python
# #
# rdiff-backup -- Mirror files while keeping incremental changes # rdiff-backup -- Mirror files while keeping incremental changes
# Version 0.7.1 released March 25, 2002 # Version 0.7.2 released April 30, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu> # Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
# #
# This program is licensed under the GNU General Public License (GPL). # This program is licensed under the GNU General Public License (GPL).
......
from __future__ import generators
execfile("destructive_stepping.py") execfile("destructive_stepping.py")
import re
####################################################################### #######################################################################
# #
...@@ -47,14 +49,23 @@ class Select: ...@@ -47,14 +49,23 @@ class Select:
be redundant and presumably isn't what the user intends. be redundant and presumably isn't what the user intends.
""" """
def __init__(self, dsrpath, starting_index = None): # This re should not match normal filenames, but usually just globs
"""DSRPIterator initializer. dsrpath should be the root dir""" glob_re = re.compile(".*[\*\?\[]")
def __init__(self, dsrpath):
"""DSRPIterator initializer"""
self.selection_functions = [] self.selection_functions = []
if starting_index: self.dsrpath = dsrpath
self.iter = self.iterate_starting_from(dsrpath, starting_index, self.prefix = dsrpath.path
self.iterate_starting_from)
else: self.iter = self.Iterate(dsrpath, self.Iterate) def set_iter(self, starting_index = None):
"""Initialize more variables. dsrpath should be the root dir"""
if starting_index is not None:
self.iter = self.iterate_starting_from(self.dsrpath,
starting_index, self.iterate_starting_from)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
self.next = self.iter.next self.next = self.iter.next
self.__iter__ = lambda: self
def Iterate(self, dsrpath, rec_func): def Iterate(self, dsrpath, rec_func):
"""Return iterator yielding dsrps in dsrpath """Return iterator yielding dsrps in dsrpath
...@@ -88,7 +99,7 @@ class Select: ...@@ -88,7 +99,7 @@ class Select:
"""Like Iterate, but only yield indicies > self.starting_index""" """Like Iterate, but only yield indicies > self.starting_index"""
if dsrpath.index > self.starting_index: # past starting_index if dsrpath.index > self.starting_index: # past starting_index
for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
elif dsrpath.index = self.starting_index[:len(dsrpath.index)]: elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch # May encounter starting index on this branch
for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
yield dsrp yield dsrp
...@@ -100,50 +111,99 @@ class Select: ...@@ -100,50 +111,99 @@ class Select:
if result is not None: return result if result is not None: return result
return 1 return 1
def add_selection_func(self, sel_func): def ParseArgs(self, argtuples):
"""Add another selection function at the end""" """Create selection functions based on list of tuples
self.selection_functions.append(sel_func)
The tuples have the form (option string, additional argument)
and are created when the initial commandline arguments are
read. The reason for the extra level of processing is that
the filelists may only be openable by the main connection, but
the selection functions need to be on the backup reader or
writer side. When the initial arguments are parsed the right
information is sent over the link.
"""
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf())
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
0, arg[0]))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
1, arg[0]))
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad option %s" % opt
# Exclude rdiff-backup-data directory
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
else: self.selection_functions.append(sel_func)
def filelist_add_sf(self, filelist_fp, include, filelist_name): def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
"""Adds selection function by reading list of files """Return selection function by reading list of files
The format of the filelist is documented in the man page. The format of the filelist is documented in the man page.
filelist_fp should be an (open) file object. filelist_fp should be an (open) file object.
include should be true if this is an include list, false for inc_default should be true if this is an include list,
an exclude list. false for an exclude list.
filelist_name is just a string used for logging. filelist_name is just a string used for logging.
""" """
Log("Reading filelist %s" % filelist_name, 4) Log("Reading filelist %s" % filelist_name, 4)
tuple_list, something_excluded = \ tuple_list, something_excluded = \
self.filelist_read(filelist_fp, include, filelist_name) self.filelist_read(filelist_fp, inc_default, filelist_name)
Log("Sorting filelist %s" % filelist_name, 4) Log("Sorting filelist %s" % filelist_name, 4)
tuple_list.sort() tuple_list.sort()
current_index = 0 i = [0] # We have to put index in list because of stupid scoping rules
def selection_function(dsrp): def selection_function(dsrp):
if i[0] > len(tuple_list): return inc_default
while 1:
include, move_on = \
self.filelist_pair_match(dsrp, tuple_list[i[0]])
if move_on:
i[0] += 1
if include is None: continue # later line may match
return include
selection_function.exclude = something_excluded
selection_function.name = "Filelist: " + filelist_name
return selection_function
def filelist_read(self, filelist_fp, include, filelist_name): def filelist_read(self, filelist_fp, include, filelist_name):
"""Read filelist from fp, return (tuplelist, something_excluded)""" """Read filelist from fp, return (tuplelist, something_excluded)"""
something_excluded, tuple_list = None, [] something_excluded, tuple_list = None, []
prefix_warnings = 0 prefix_warnings = 0
while 1: for line in filelist_fp:
line = filelist_fp.readline() if not line.strip(): continue # skip blanks
if not line: break
try: tuple = self.filelist_parse_line(line, include) try: tuple = self.filelist_parse_line(line, include)
except FilePrefixError, exp: except FilePrefixError, exp:
prefix_warnings += 1 prefix_warnings += 1
if prefix_warnings < 6: if prefix_warnings < 6:
Log("Warning: file specification %s in filelist %s\n" Log("Warning: file specification %s in filelist %s\n"
"doesn't start with correct prefix %s, ignoring." % "doesn't start with correct prefix %s, ignoring." %
(exp[0], filelist_name, exp[1]), 2) (exp, filelist_name, self.prefix), 2)
if prefix_warnings == 5: if prefix_warnings == 5:
Log("Future prefix errors will not be logged.", 2) Log("Future prefix errors will not be logged.", 2)
tuple_list.append(tuple) tuple_list.append(tuple)
if not tuple[1]: something_excluded = 1 if not tuple[1]: something_excluded = 1
if filelist_fp.close():
Log("Error closing filelist %s" % filelist_name, 2)
return (tuple_list, something_excluded) return (tuple_list, something_excluded)
def filelist_parse_line(self, line, include, prefix): def filelist_parse_line(self, line, include):
"""Parse a single line of a filelist, returning a pair """Parse a single line of a filelist, returning a pair
pair will be of form (index, include), where index is another pair will be of form (index, include), where index is another
...@@ -160,17 +220,110 @@ class Select: ...@@ -160,17 +220,110 @@ class Select:
include = 0 include = 0
line = line[2:] line = line[2:]
if not line.startswith(prefix+"/"): if not line.startswith(self.prefix): raise FilePrefixError(line)
raise FilePrefixError(line, prefix+"/") line = line[len(self.prefix):] # Discard prefix
index = filter(lambda x: x, line.split("/")) # remove empties index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
return (index, include) return (index, include)
def filelist_pair_match(self, dsrp, pair): def filelist_pair_match(self, dsrp, pair):
"""Return 0/1 if pair excludes/includes dsrp, None if doesn't match""" """Matches a filelist tuple against a dsrp
Returns a pair (include, move_on, definitive). include is
None if the tuple doesn't match either way, and 0/1 if the
tuple excludes or includes the dsrp.
move_on is true if the tuple cannot match a later index, and
so we should move on to the next tuple in the index.
"""
index, include = pair index, include = pair
if include == 1:
if index < dsrp.index: return (None, 1)
if index == dsrp.index: return (1, 1)
elif index[:len(dsrp.index)] == dsrp.index:
return (1, None) # /foo/bar implicitly includes /foo
else: return (None, None) # dsrp greater, not initial sequence
elif include == 0:
if dsrp.index[:len(index)] == index:
return (0, None) # /foo implicitly excludes /foo/bar
elif index < dsrp.index: return (None, 1)
else: return (None, None) # dsrp greater, not initial sequence
else: assert 0, "Include is %s, should be 0 or 1" % (include,)
def regexp_get_sf(self, regexp_string, include):
"""Return selection function given by regexp_string"""
assert include == 0 or include == 1
try: regexp = re.compile(regexp_string)
except:
Log("Error compiling regular expression %s" % regexp_string, 1)
raise
def sel_func(dsrp):
match = regexp.match(dsrp.path)
if match and match.end(0) == len(dsrp.path): return include
else: return None
sel_func.exclude = not include
sel_func.name = "Regular expression: %s" % regexp_string
return sel_func
def devfiles_get_sf(self):
"""Return a selection function to exclude all dev files"""
if self.selection_functions:
Log("Warning: exclude-device-files is not the first "
"selector.\nThis may not be what you intended", 3)
def sel_func(dsrp):
if dsrp.isdev(): return 0
else: return None
sel_func.exclude = 1
sel_func.name = "Exclude device files"
return sel_func
def glob_get_sf(self, glob_str, include):
"""Return selection function given by glob string"""
assert include == 0 or include == 1 assert include == 0 or include == 1
if not include and dsrp.index[:len(index)] == index: if glob_str == "**": sel_func = lambda dsrp: include
return 0 # /foo matches /foo/bar/baz elif not self.glob_re.match(glob_str): # normal file
elif include and index[:len(dsrp.index)] == dsrp.index: return self.glob_get_filename_sf(glob_str, include)
return 1 # /foo/bar implicitly matches /foo for includes only else: pass ####XXXXXXXXXXXXX
else: return None
sel_func.exclude = not include
sel_func.name = "Command-line glob: %s" % glob_str
return sel_func
def glob_get_filename_sf(self, filename, include):
"""Get a selection function given a normal filename
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
globbing is so we can check the prefix and give proper
warning.
"""
if not filename.startswith(self.prefix):
Log("Warning: file specification %s does not start with\n"
"prefix %s, ignoring" % (filename, self.prefix), 2)
return lambda x: None # dummy selection function
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
"""Add selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
return 1 # /foo/bar implicitly matches /foo, vice-versa
else: return None
def exclude_sel_func(dsrp):
if dsrp.index[:len(tuple)] == tuple:
return 0 # /foo excludes /foo/bar, not vice-versa
else: return None
if include == 1: sel_func = include_sel_func
elif include == 0: sel_func = exclude_sel_func
sel_func.exclude = not include
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
...@@ -8,7 +8,7 @@ import re, os ...@@ -8,7 +8,7 @@ import re, os
class Globals: class Globals:
# The current version of rdiff-backup # The current version of rdiff-backup
version = "0.7.1" version = "0.7.2"
# If this is set, use this value in seconds as the current time # If this is set, use this value in seconds as the current time
# instead of reading it from the clock. # instead of reading it from the clock.
...@@ -45,26 +45,9 @@ class Globals: ...@@ -45,26 +45,9 @@ class Globals:
# If true, try to reset the atimes of the source partition. # If true, try to reset the atimes of the source partition.
preserve_atime = None preserve_atime = None
# This is a list of compiled regular expressions. If one of them
# matches a file in the source area, do not process that file.
exclude_regexps = []
# Another list of compiled regexps; this time the file is excluded
# if it matches something in the destination area.
exclude_mirror_regexps = []
# If this is true, rdiff-backup will exclude any dev files it
# sees, in the same way it excludes files matching the exclude
# regexps.
exclude_device_files = None
# This will be set as soon as the LocalConnection class loads # This will be set as soon as the LocalConnection class loads
local_connection = None local_connection = None
# If this is true, instead of processing whole directory, just
# examine files read in from standard input.
include_from_stdin = None
# All connections should be added to the following list, so # All connections should be added to the following list, so
# further global changes can be propagated to the remote systems. # further global changes can be propagated to the remote systems.
# The first element should be Globals.local_connection. For a # The first element should be Globals.local_connection. For a
...@@ -138,12 +121,16 @@ class Globals: ...@@ -138,12 +121,16 @@ class Globals:
# Increments based on files whose names match this # Increments based on files whose names match this
# case-insensitive regular expression won't be compressed (applies # case-insensitive regular expression won't be compressed (applies
# to .snapshots and .diffs). The second below is the compiled # to .snapshots and .diffs). The second below will be the
# version of the first. # compiled version of the first.
no_compression_regexp_string = ".*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \ no_compression_regexp_string = ".*\\.(gz|z|bz|bz2|tgz|zip|rpm|deb|" \
"jpg|gif|png|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$" "jpg|gif|png|mp3|ogg|avi|wmv|mpeg|mpg|rm|mov)$"
no_compression_regexp = None no_compression_regexp = None
# On the reader and writer connections, the following will be
# replaced by the source and mirror Select objects respectively.
select_source, select_mirror = None, None
def get(cls, name): def get(cls, name):
"""Return the value of something in this class""" """Return the value of something in this class"""
return cls.__dict__[name] return cls.__dict__[name]
...@@ -181,19 +168,6 @@ class Globals: ...@@ -181,19 +168,6 @@ class Globals:
cls.__dict__[name][key] = val cls.__dict__[name][key] = val
set_dict_val = classmethod(set_dict_val) set_dict_val = classmethod(set_dict_val)
def add_regexp(cls, regstr, mirror=None):
"""Add a regular expression to the exclude list"""
for conn in Globals.connections:
conn.Globals.add_regexp_local(regstr, mirror)
add_regexp = classmethod(add_regexp)
def add_regexp_local(cls, regstr, mirror):
"""Add the regex only to the local Globals class"""
compiled = re.compile(regstr)
if mirror: Globals.exclude_mirror_regexps.append(compiled)
else: Globals.exclude_regexps.append(compiled)
add_regexp_local = classmethod(add_regexp_local)
def postset_regexp(cls, name, re_string, flags = None): def postset_regexp(cls, name, re_string, flags = None):
"""Compile re_string on all existing connections, set to name""" """Compile re_string on all existing connections, set to name"""
for conn in Globals.connections: for conn in Globals.connections:
...@@ -205,3 +179,13 @@ class Globals: ...@@ -205,3 +179,13 @@ class Globals:
if flags: cls.__dict__[name] = re.compile(re_string, flags) if flags: cls.__dict__[name] = re.compile(re_string, flags)
else: cls.__dict__[name] = re.compile(re_string) else: cls.__dict__[name] = re.compile(re_string)
postset_regexp_local = classmethod(postset_regexp_local) postset_regexp_local = classmethod(postset_regexp_local)
def set_select(cls, source, dsrpath, tuplelist):
"""Initialize select object using tuplelist"""
if source:
cls.select_source = Select(dsrpath)
cls.select_source.ParseArgs(tuplelist)
else:
cls.select_mirror = Select(dsrpath)
cls.select_mirror.ParseArgs(tuplelist)
set_select = classmethod(set_select)
#!/usr/bin/env python #!/usr/bin/env python
# #
# rdiff-backup -- Mirror files while keeping incremental changes # rdiff-backup -- Mirror files while keeping incremental changes
# Version 0.7.1 released March 25, 2002 # Version 0.7.2 released April 30, 2002
# Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu> # Copyright (C) 2001, 2002 Ben Escoto <bescoto@stanford.edu>
# #
# This program is licensed under the GNU General Public License (GPL). # This program is licensed under the GNU General Public License (GPL).
......
...@@ -13,22 +13,29 @@ class Main: ...@@ -13,22 +13,29 @@ class Main:
self.action = None self.action = None
self.remote_cmd, self.remote_schema = None, None self.remote_cmd, self.remote_schema = None, None
self.force = None self.force = None
self.exclude_regstrs = ["/proc"] self.select_opts, self.select_mirror_opts = [], []
self.exclude_mirror_regstrs = []
def parse_cmdlineoptions(self): def parse_cmdlineoptions(self):
"""Parse argument list and set global preferences""" """Parse argument list and set global preferences"""
try: optlist, self.args = getopt.getopt(sys.argv[1:], "blmv:Vs", def sel_fl(filename):
["backup-mode", "version", "verbosity=", "exclude=", """Helper function for including/excluding filelists below"""
"exclude-mirror=", "server", "test-server", try: return open(filename, "r")
"remote-cmd=", "mirror-only", "force", except IOError: Log.FatalError("Error opening file %s" % filename)
"change-source-perms", "list-increments",
"remove-older-than=", "remote-schema=", try: optlist, self.args = getopt.getopt(sys.argv[1:], "blmsv:V",
"include-from-stdin", "terminal-verbosity=", ["backup-mode", "change-source-perms",
"exclude-device-files", "resume", "no-resume", "checkpoint-interval=", "current-time=", "exclude=",
"resume-window=", "windows-time-format", "exclude-device-files", "exclude-filelist=",
"checkpoint-interval=", "no-hard-links", "current-time=", "exclude-filelist-stdin", "exclude-mirror=",
"no-compression", "no-compression-regexp="]) "exclude-regexp=", "force", "include=",
"include-filelist=", "include-filelist-stdin",
"include-regexp=", "list-increments", "mirror-only",
"no-compression", "no-compression-regexp=",
"no-hard-links", "no-resume", "remote-cmd=",
"remote-schema=", "remove-older-than=", "resume",
"resume-window=", "server", "terminal-verbosity=",
"test-server", "verbosity", "version",
"windows-time-format"])
except getopt.error: except getopt.error:
self.commandline_error("Error parsing commandline options") self.commandline_error("Error parsing commandline options")
...@@ -40,13 +47,24 @@ class Main: ...@@ -40,13 +47,24 @@ class Main:
Globals.set_integer('checkpoint_interval', arg) Globals.set_integer('checkpoint_interval', arg)
elif opt == "--current-time": elif opt == "--current-time":
Globals.set_integer('current_time', arg) Globals.set_integer('current_time', arg)
elif opt == "--exclude": self.exclude_regstrs.append(arg) elif opt == "--exclude": self.select_opts.append((opt, arg))
elif opt == "--exclude-device-files": elif opt == "--exclude-device-files":
Globals.set('exclude_device_files', 1) self.select_opts.append((opt, arg))
elif opt == "--exclude-filelist":
self.select_opts.append((opt, (arg, sel_fl(arg))))
elif opt == "--exclude-filelist-stdin":
self.select_opts.append((opt, ("standard input", sys.stdin)))
elif opt == "--exclude-mirror": elif opt == "--exclude-mirror":
self.exclude_mirror_regstrs.append(arg) self.select_mirror_opts.append(("--exclude", arg))
elif opt == "--exclude-regexp": self.select_opts.append((opt, arg))
elif opt == "--force": self.force = 1 elif opt == "--force": self.force = 1
elif opt == "--include-from-stdin": Globals.include_from_stdin = 1 elif opt == "--include": self.select_opts.append((opt, arg))
elif opt == "--include-filelist":
self.select_opts.append((opt, (arg, sel_fl(arg))))
elif opt == "--include-filelist-stdin":
self.select_opts.append((opt, ("standard input", sys.stdin)))
elif opt == "--include-regexp":
self.select_opts.append((opt, arg))
elif opt == "-l" or opt == "--list-increments": elif opt == "-l" or opt == "--list-increments":
self.action = "list-increments" self.action = "list-increments"
elif opt == "-m" or opt == "--mirror-only": self.action = "mirror" elif opt == "-m" or opt == "--mirror-only": self.action = "mirror"
...@@ -116,10 +134,10 @@ class Main: ...@@ -116,10 +134,10 @@ class Main:
for rp in rps: rp.setdata() # Update with userinfo for rp in rps: rp.setdata() # Update with userinfo
os.umask(077) os.umask(077)
for regex_string in self.exclude_regstrs: rps[0].conn.Globals.set_select(1, rps[0], self.select_opts)
Globals.add_regexp(regex_string, None) if len(rps) == 2:
for regex_string in self.exclude_mirror_regstrs: rps[1].conn.Globals.set_select(None, rps[1],
Globals.add_regexp(regex_string, 1) self.select_mirror_opts)
Globals.postset_regexp('no_compression_regexp', Globals.postset_regexp('no_compression_regexp',
Globals.no_compression_regexp_string, re.I) Globals.no_compression_regexp_string, re.I)
...@@ -217,9 +235,6 @@ rdiff-backup with the --force option.""" % rpout.path) ...@@ -217,9 +235,6 @@ rdiff-backup with the --force option.""" % rpout.path)
except os.error: except os.error:
Log.FatalError("Unable to create directory %s" % rpout.path) Log.FatalError("Unable to create directory %s" % rpout.path)
if not self.datadir.lstat(): self.datadir.mkdir() if not self.datadir.lstat(): self.datadir.mkdir()
Globals.add_regexp(self.datadir.path, 1)
Globals.add_regexp(rpin.append("rdiff-backup-data").path, None)
if Log.verbosity > 0: if Log.verbosity > 0:
Log.open_logfile(self.datadir.append("backup.log")) Log.open_logfile(self.datadir.append("backup.log"))
self.backup_warn_if_infinite_regress(rpin, rpout) self.backup_warn_if_infinite_regress(rpin, rpout)
...@@ -334,7 +349,6 @@ Try restoring from an increment file (the filenames look like ...@@ -334,7 +349,6 @@ Try restoring from an increment file (the filenames look like
else: Log.FatalError("Unable to find rdiff-backup-data dir") else: Log.FatalError("Unable to find rdiff-backup-data dir")
Globals.rbdir = self.datadir = datadirrp Globals.rbdir = self.datadir = datadirrp
Globals.add_regexp(self.datadir.path, 1)
rootrp = RPath(rpin.conn, "/".join(pathcomps[:i])) rootrp = RPath(rpin.conn, "/".join(pathcomps[:i]))
if not rootrp.lstat(): if not rootrp.lstat():
Log.FatalError("Root of mirror area %s does not exist" % Log.FatalError("Root of mirror area %s does not exist" %
......
from __future__ import generators
execfile("destructive_stepping.py") execfile("destructive_stepping.py")
import re
####################################################################### #######################################################################
# #
...@@ -47,14 +49,23 @@ class Select: ...@@ -47,14 +49,23 @@ class Select:
be redundant and presumably isn't what the user intends. be redundant and presumably isn't what the user intends.
""" """
def __init__(self, dsrpath, starting_index = None): # This re should not match normal filenames, but usually just globs
"""DSRPIterator initializer. dsrpath should be the root dir""" glob_re = re.compile(".*[\*\?\[]")
def __init__(self, dsrpath):
"""DSRPIterator initializer"""
self.selection_functions = [] self.selection_functions = []
if starting_index: self.dsrpath = dsrpath
self.iter = self.iterate_starting_from(dsrpath, starting_index, self.prefix = dsrpath.path
self.iterate_starting_from)
else: self.iter = self.Iterate(dsrpath, self.Iterate) def set_iter(self, starting_index = None):
"""Initialize more variables. dsrpath should be the root dir"""
if starting_index is not None:
self.iter = self.iterate_starting_from(self.dsrpath,
starting_index, self.iterate_starting_from)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate)
self.next = self.iter.next self.next = self.iter.next
self.__iter__ = lambda: self
def Iterate(self, dsrpath, rec_func): def Iterate(self, dsrpath, rec_func):
"""Return iterator yielding dsrps in dsrpath """Return iterator yielding dsrps in dsrpath
...@@ -88,7 +99,7 @@ class Select: ...@@ -88,7 +99,7 @@ class Select:
"""Like Iterate, but only yield indicies > self.starting_index""" """Like Iterate, but only yield indicies > self.starting_index"""
if dsrpath.index > self.starting_index: # past starting_index if dsrpath.index > self.starting_index: # past starting_index
for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp for dsrp in self.Iterate(dsrpath, self.iterate): yield dsrp
elif dsrpath.index = self.starting_index[:len(dsrpath.index)]: elif dsrpath.index == self.starting_index[:len(dsrpath.index)]:
# May encounter starting index on this branch # May encounter starting index on this branch
for dsrp in self.Iterate(dsrpath, self.iterate_starting_from): for dsrp in self.Iterate(dsrpath, self.iterate_starting_from):
yield dsrp yield dsrp
...@@ -100,50 +111,99 @@ class Select: ...@@ -100,50 +111,99 @@ class Select:
if result is not None: return result if result is not None: return result
return 1 return 1
def add_selection_func(self, sel_func): def ParseArgs(self, argtuples):
"""Add another selection function at the end""" """Create selection functions based on list of tuples
self.selection_functions.append(sel_func)
The tuples have the form (option string, additional argument)
and are created when the initial commandline arguments are
read. The reason for the extra level of processing is that
the filelists may only be openable by the main connection, but
the selection functions need to be on the backup reader or
writer side. When the initial arguments are parsed the right
information is sent over the link.
"""
for opt, arg in argtuples:
if opt == "--exclude":
self.add_selection_func(self.glob_get_sf(arg, 0))
elif opt == "--exclude-device-files":
self.add_selection_func(self.devfiles_get_sf())
elif opt == "--exclude-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
0, arg[0]))
elif opt == "--exclude-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 0))
elif opt == "--include":
self.add_selection_func(self.glob_get_sf(arg, 1))
elif opt == "--include-filelist":
self.add_selection_func(self.filelist_get_sf(arg[1],
1, arg[0]))
elif opt == "--include-regexp":
self.add_selection_func(self.regexp_get_sf(arg, 1))
else: assert 0, "Bad option %s" % opt
# Exclude rdiff-backup-data directory
self.add_selection_func(
self.glob_get_tuple_sf(("rdiff-backup-data",), 0), 1)
def add_selection_func(self, sel_func, add_to_start = None):
"""Add another selection function at the end or beginning"""
if add_to_start: self.selection_functions.insert(0, sel_func)
else: self.selection_functions.append(sel_func)
def filelist_add_sf(self, filelist_fp, include, filelist_name): def filelist_get_sf(self, filelist_fp, inc_default, filelist_name):
"""Adds selection function by reading list of files """Return selection function by reading list of files
The format of the filelist is documented in the man page. The format of the filelist is documented in the man page.
filelist_fp should be an (open) file object. filelist_fp should be an (open) file object.
include should be true if this is an include list, false for inc_default should be true if this is an include list,
an exclude list. false for an exclude list.
filelist_name is just a string used for logging. filelist_name is just a string used for logging.
""" """
Log("Reading filelist %s" % filelist_name, 4) Log("Reading filelist %s" % filelist_name, 4)
tuple_list, something_excluded = \ tuple_list, something_excluded = \
self.filelist_read(filelist_fp, include, filelist_name) self.filelist_read(filelist_fp, inc_default, filelist_name)
Log("Sorting filelist %s" % filelist_name, 4) Log("Sorting filelist %s" % filelist_name, 4)
tuple_list.sort() tuple_list.sort()
current_index = 0 i = [0] # We have to put index in list because of stupid scoping rules
def selection_function(dsrp): def selection_function(dsrp):
if i[0] > len(tuple_list): return inc_default
while 1:
include, move_on = \
self.filelist_pair_match(dsrp, tuple_list[i[0]])
if move_on:
i[0] += 1
if include is None: continue # later line may match
return include
selection_function.exclude = something_excluded
selection_function.name = "Filelist: " + filelist_name
return selection_function
def filelist_read(self, filelist_fp, include, filelist_name): def filelist_read(self, filelist_fp, include, filelist_name):
"""Read filelist from fp, return (tuplelist, something_excluded)""" """Read filelist from fp, return (tuplelist, something_excluded)"""
something_excluded, tuple_list = None, [] something_excluded, tuple_list = None, []
prefix_warnings = 0 prefix_warnings = 0
while 1: for line in filelist_fp:
line = filelist_fp.readline() if not line.strip(): continue # skip blanks
if not line: break
try: tuple = self.filelist_parse_line(line, include) try: tuple = self.filelist_parse_line(line, include)
except FilePrefixError, exp: except FilePrefixError, exp:
prefix_warnings += 1 prefix_warnings += 1
if prefix_warnings < 6: if prefix_warnings < 6:
Log("Warning: file specification %s in filelist %s\n" Log("Warning: file specification %s in filelist %s\n"
"doesn't start with correct prefix %s, ignoring." % "doesn't start with correct prefix %s, ignoring." %
(exp[0], filelist_name, exp[1]), 2) (exp, filelist_name, self.prefix), 2)
if prefix_warnings == 5: if prefix_warnings == 5:
Log("Future prefix errors will not be logged.", 2) Log("Future prefix errors will not be logged.", 2)
tuple_list.append(tuple) tuple_list.append(tuple)
if not tuple[1]: something_excluded = 1 if not tuple[1]: something_excluded = 1
if filelist_fp.close():
Log("Error closing filelist %s" % filelist_name, 2)
return (tuple_list, something_excluded) return (tuple_list, something_excluded)
def filelist_parse_line(self, line, include, prefix): def filelist_parse_line(self, line, include):
"""Parse a single line of a filelist, returning a pair """Parse a single line of a filelist, returning a pair
pair will be of form (index, include), where index is another pair will be of form (index, include), where index is another
...@@ -160,17 +220,110 @@ class Select: ...@@ -160,17 +220,110 @@ class Select:
include = 0 include = 0
line = line[2:] line = line[2:]
if not line.startswith(prefix+"/"): if not line.startswith(self.prefix): raise FilePrefixError(line)
raise FilePrefixError(line, prefix+"/") line = line[len(self.prefix):] # Discard prefix
index = filter(lambda x: x, line.split("/")) # remove empties index = tuple(filter(lambda x: x, line.split("/"))) # remove empties
return (index, include) return (index, include)
def filelist_pair_match(self, dsrp, pair): def filelist_pair_match(self, dsrp, pair):
"""Return 0/1 if pair excludes/includes dsrp, None if doesn't match""" """Matches a filelist tuple against a dsrp
Returns a pair (include, move_on, definitive). include is
None if the tuple doesn't match either way, and 0/1 if the
tuple excludes or includes the dsrp.
move_on is true if the tuple cannot match a later index, and
so we should move on to the next tuple in the index.
"""
index, include = pair index, include = pair
if include == 1:
if index < dsrp.index: return (None, 1)
if index == dsrp.index: return (1, 1)
elif index[:len(dsrp.index)] == dsrp.index:
return (1, None) # /foo/bar implicitly includes /foo
else: return (None, None) # dsrp greater, not initial sequence
elif include == 0:
if dsrp.index[:len(index)] == index:
return (0, None) # /foo implicitly excludes /foo/bar
elif index < dsrp.index: return (None, 1)
else: return (None, None) # dsrp greater, not initial sequence
else: assert 0, "Include is %s, should be 0 or 1" % (include,)
def regexp_get_sf(self, regexp_string, include):
"""Return selection function given by regexp_string"""
assert include == 0 or include == 1
try: regexp = re.compile(regexp_string)
except:
Log("Error compiling regular expression %s" % regexp_string, 1)
raise
def sel_func(dsrp):
match = regexp.match(dsrp.path)
if match and match.end(0) == len(dsrp.path): return include
else: return None
sel_func.exclude = not include
sel_func.name = "Regular expression: %s" % regexp_string
return sel_func
def devfiles_get_sf(self):
"""Return a selection function to exclude all dev files"""
if self.selection_functions:
Log("Warning: exclude-device-files is not the first "
"selector.\nThis may not be what you intended", 3)
def sel_func(dsrp):
if dsrp.isdev(): return 0
else: return None
sel_func.exclude = 1
sel_func.name = "Exclude device files"
return sel_func
def glob_get_sf(self, glob_str, include):
"""Return selection function given by glob string"""
assert include == 0 or include == 1 assert include == 0 or include == 1
if not include and dsrp.index[:len(index)] == index: if glob_str == "**": sel_func = lambda dsrp: include
return 0 # /foo matches /foo/bar/baz elif not self.glob_re.match(glob_str): # normal file
elif include and index[:len(dsrp.index)] == dsrp.index: return self.glob_get_filename_sf(glob_str, include)
return 1 # /foo/bar implicitly matches /foo for includes only else: pass ####XXXXXXXXXXXXX
else: return None
sel_func.exclude = not include
sel_func.name = "Command-line glob: %s" % glob_str
return sel_func
def glob_get_filename_sf(self, filename, include):
"""Get a selection function given a normal filename
Some of the parsing is better explained in
filelist_parse_line. The reason this is split from normal
globbing is so we can check the prefix and give proper
warning.
"""
if not filename.startswith(self.prefix):
Log("Warning: file specification %s does not start with\n"
"prefix %s, ignoring" % (filename, self.prefix), 2)
return lambda x: None # dummy selection function
index = tuple(filter(lambda x: x,
filename[len(self.prefix):].split("/")))
return self.glob_get_tuple_sf(index, include)
def glob_get_tuple_sf(self, tuple, include):
"""Add selection function based on tuple"""
def include_sel_func(dsrp):
if (dsrp.index == tuple[:len(dsrp.index)] or
dsrp.index[:len(tuple)] == tuple):
return 1 # /foo/bar implicitly matches /foo, vice-versa
else: return None
def exclude_sel_func(dsrp):
if dsrp.index[:len(tuple)] == tuple:
return 0 # /foo excludes /foo/bar, not vice-versa
else: return None
if include == 1: sel_func = include_sel_func
elif include == 0: sel_func = exclude_sel_func
sel_func.exclude = not include
sel_func.name = "Tuple select %s" % (tuple,)
return sel_func
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment