Commit 097e2c75 authored by ben's avatar ben

Added statistics.py for more sophisticated statistics handling


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@104 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent c9418644
execfile("filename_mapping.py")
execfile("statistics.py")
#######################################################################
#
......@@ -108,7 +108,7 @@ class Inc:
MakeStatic(Inc)
class IncrementITR(IterTreeReducer):
class IncrementITR(StatsITR):
"""Patch and increment iterator of increment triples
This has to be an ITR because directories that have files in them
......@@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer):
Remember this object needs to be pickable.
"""
directory, directory_replacement = None, None
mirror_isdirectory, directory_replacement = None, None
changed = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree"""
self.inc_rpath = inc_rpath
IterTreeReducer.__init__(self, inc_rpath)
StatsITR.__init__(self, inc_rpath)
def start_process(self, index, diff_rorp, dsrp):
"""Initial processing of file
......@@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer):
dsrp is the local file to be incremented
"""
self.init_statistics(diff_rorp, dsrp)
self.start_stats(dsrp)
incpref = self.inc_rpath.new_index(index)
if Globals.quoting_enabled: incpref.quote_path()
if dsrp.isdir():
self.init_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref)
self.mirror_isdirectory = 1
else: self.init_non_dir(dsrp, diff_rorp, incpref)
def init_statistics(self, diff_rorp, dsrp):
"""Set initial values for various statistics
These refer to the old mirror or to new increment files. Note
that changed_file_size could be bigger than total_file_size.
The other statistic, increment_file_size, is set later when we
have that information.
"""
if dsrp.lstat():
self.total_files = 1
self.total_file_size = dsrp.getsize()
else: self.total_files = self.total_file_size = 0
if diff_rorp:
self.changed_files = 1
if dsrp.lstat(): self.changed_file_size = dsrp.getsize()
else: self.changed_file_size = 0
else: self.changed_files = self.changed_file_size = 0
self.increment_file_size = 0
self.setvals(diff_rorp, dsrp, incpref)
def override_changed(self):
"""Set changed flag to true
......@@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer):
superclass.
"""
self.directory = 1
self.diff_rorp = diff_rorp
self.dsrp = dsrp
self.incpref = incpref
......@@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer):
Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)]
).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0)
self.changed = 1
def end_process(self):
"""Do final work when leaving a tree (directory)"""
if not self.directory: return
diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref
if not diff_rorp and not self.changed: return
if self.directory_replacement:
tf = self.directory_replacement
Inc.Increment(tf, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, tf).execute()
tf.delete()
else:
Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0)
self.write_statistics()
def write_statistics(self):
"""Write the accumulated totals into file in inc directory"""
if not self.incpref.isdir(): return # only write for directories
statrp = Inc.get_inc_ext(self.incpref.append("directory_statistics"),
"data")
tf = TempFileManager.new(statrp)
def init_thunk():
fp = tf.open("w")
fp.write("TotalFiles %d\n" % self.total_files)
fp.write("TotalFileSize %d\n" % self.total_file_size)
fp.write("ChangedFiles %d\n" % self.changed_files)
fp.write("ChangedFileSize %d\n" % self.changed_file_size)
fp.write("IncrementFileSize %d\n" % self.increment_file_size)
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (statrp,)).execute()
if self.mirror_isdirectory:
if not diff_rorp and not self.changed: return
if self.directory_replacement:
tf = self.directory_replacement
Inc.Increment(tf, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, tf).execute()
tf.delete()
else:
Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.end_stats(diff_rorp, dsrp, Inc._inc_file)
if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()):
self.write_stats_to_rp(Inc.get_inc_ext(
self.incpref.append("directory_statistics"), "data"))
def branch_process(self, subinstance):
"""Update statistics, and the has_changed flag if change in branch"""
if subinstance.changed: self.changed = 1
self.add_file_stats(subinstance)
self.total_files += subinstance.total_files
self.total_file_size += subinstance.total_file_size
self.changed_files += subinstance.changed_files
self.changed_file_size += subinstance.changed_file_size
self.increment_file_size += subinstance.increment_file_size
execfile("filename_mapping.py")
#######################################################################
#
# statistics - Generate and process aggregated backup information
#
class StatsException(Exception): pass
class StatsObj:
"""Contains various statistics, provide string conversion functions"""
stat_file_attrs = ('SourceFiles', 'SourceFileSize',
'MirrorFiles', 'MirrorFileSize',
'NewFiles', 'NewFileSize',
'DeletedFiles', 'DeletedFileSize',
'ChangedFiles',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFileSize')
stat_time_attrs = ('StartTime', 'EndTime')
stat_attrs = stat_time_attrs + stat_file_attrs
# Set all stats to None, indicating info not available
for attr in stat_attrs: locals()[attr] = None
def get_stat(self, attribute):
"""Get a statistic"""
try: return self.__dict__[attribute]
except KeyError:
# this may be a hack, but seems no good way to get attrs in python
return eval("self.%s" % attribute)
def set_stat(self, attr, value):
"""Set attribute to given value"""
self.__dict__[attr] = value
def get_stats_string(self):
"""Return string printing out statistics"""
slist = ["%s %s" % (attr, self.get_stat(attr))
for attr in self.stat_attrs
if self.get_stat(attr) is not None]
return "\n".join(slist)
def init_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
for line in s.split("\n"):
if not line: continue
line_parts = line.split()
if len(line_parts) < 2: error(line)
attr, value_string = line_parts[:2]
if not attr in self.stat_attrs: error(line)
try: self.set_stat(attr, long(value_string))
except ValueError: error(line)
return self
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
tf = TempFileManager.new(rp)
def init_thunk():
fp = tf.open("w")
fp.write(self.get_stats_string())
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp = rp.open("r")
self.init_stats_from_string(fp.read())
fp.close()
return self
def stats_equal(self, s):
"""Return true if s has same statistics as self"""
assert isinstance(s, StatsObj)
for attr in self.stat_file_attrs:
if self.get_stat(attr) != s.get_stat(attr): return None
return 1
class StatsITR(IterTreeReducer, StatsObj):
"""Keep track of per directory statistics
This is subclassed by the mirroring and incrementing ITRs.
"""
# zero out file statistics
for attr in StatsObj.stat_file_attrs: locals()[attr] = 0
def start_stats(self, mirror_dsrp):
"""Record status of mirror dsrp
This is called before the mirror is processed so we remember
the old state.
"""
if mirror_dsrp.lstat():
self.mirror_base_exists = 1
self.mirror_base_size = mirror_dsrp.getsize()
else: self.mirror_base_exists = None
def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None):
"""Set various statistics after mirror processed"""
if mirror_dsrp.lstat():
self.SourceFiles += 1
self.SourceFileSize += mirror_dsrp.getsize()
if self.mirror_base_exists:
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
if diff_rorp: # otherwise no change
self.ChangedFiles += 1
self.ChangedSourceSize += mirror_dsrp.getsize()
self.ChangedMirrorSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: # new file was created
self.NewFiles += 1
self.NewFileSize += mirror_dsrp.getsize()
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else:
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: assert None # One of before and after should exist
def add_file_stats(self, subinstance):
"""Add all file statistics from subinstance to current totals"""
for attr in self.stat_file_attrs:
self.set_stat(attr,
self.get_stat(attr) + subinstance.get_stat(attr))
......@@ -24,8 +24,9 @@ files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py",
"iterfile.py", "rdiff.py", "connection.py", "rpath.py",
"hardlink.py", "robust.py", "rorpiter.py",
"destructive_stepping.py", "selection.py",
"filename_mapping.py", "increment.py", "restore.py",
"manage.py", "highlevel.py", "setconnections.py", "main.py"]
"filename_mapping.py", "statistics.py", "increment.py",
"restore.py", "manage.py", "highlevel.py",
"setconnections.py", "main.py"]
os.system("cp header.py rdiff-backup")
......
execfile("filename_mapping.py")
execfile("statistics.py")
#######################################################################
#
......@@ -108,7 +108,7 @@ class Inc:
MakeStatic(Inc)
class IncrementITR(IterTreeReducer):
class IncrementITR(StatsITR):
"""Patch and increment iterator of increment triples
This has to be an ITR because directories that have files in them
......@@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer):
Remember this object needs to be pickable.
"""
directory, directory_replacement = None, None
mirror_isdirectory, directory_replacement = None, None
changed = None
def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree"""
self.inc_rpath = inc_rpath
IterTreeReducer.__init__(self, inc_rpath)
StatsITR.__init__(self, inc_rpath)
def start_process(self, index, diff_rorp, dsrp):
"""Initial processing of file
......@@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer):
dsrp is the local file to be incremented
"""
self.init_statistics(diff_rorp, dsrp)
self.start_stats(dsrp)
incpref = self.inc_rpath.new_index(index)
if Globals.quoting_enabled: incpref.quote_path()
if dsrp.isdir():
self.init_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref)
self.mirror_isdirectory = 1
else: self.init_non_dir(dsrp, diff_rorp, incpref)
def init_statistics(self, diff_rorp, dsrp):
"""Set initial values for various statistics
These refer to the old mirror or to new increment files. Note
that changed_file_size could be bigger than total_file_size.
The other statistic, increment_file_size, is set later when we
have that information.
"""
if dsrp.lstat():
self.total_files = 1
self.total_file_size = dsrp.getsize()
else: self.total_files = self.total_file_size = 0
if diff_rorp:
self.changed_files = 1
if dsrp.lstat(): self.changed_file_size = dsrp.getsize()
else: self.changed_file_size = 0
else: self.changed_files = self.changed_file_size = 0
self.increment_file_size = 0
self.setvals(diff_rorp, dsrp, incpref)
def override_changed(self):
"""Set changed flag to true
......@@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer):
superclass.
"""
self.directory = 1
self.diff_rorp = diff_rorp
self.dsrp = dsrp
self.incpref = incpref
......@@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer):
Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)]
).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0)
self.changed = 1
def end_process(self):
"""Do final work when leaving a tree (directory)"""
if not self.directory: return
diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref
if not diff_rorp and not self.changed: return
if self.directory_replacement:
tf = self.directory_replacement
Inc.Increment(tf, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, tf).execute()
tf.delete()
else:
Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0)
self.write_statistics()
def write_statistics(self):
"""Write the accumulated totals into file in inc directory"""
if not self.incpref.isdir(): return # only write for directories
statrp = Inc.get_inc_ext(self.incpref.append("directory_statistics"),
"data")
tf = TempFileManager.new(statrp)
def init_thunk():
fp = tf.open("w")
fp.write("TotalFiles %d\n" % self.total_files)
fp.write("TotalFileSize %d\n" % self.total_file_size)
fp.write("ChangedFiles %d\n" % self.changed_files)
fp.write("ChangedFileSize %d\n" % self.changed_file_size)
fp.write("IncrementFileSize %d\n" % self.increment_file_size)
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (statrp,)).execute()
if self.mirror_isdirectory:
if not diff_rorp and not self.changed: return
if self.directory_replacement:
tf = self.directory_replacement
Inc.Increment(tf, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, tf).execute()
tf.delete()
else:
Inc.Increment(diff_rorp, dsrp, incpref)
if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.end_stats(diff_rorp, dsrp, Inc._inc_file)
if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()):
self.write_stats_to_rp(Inc.get_inc_ext(
self.incpref.append("directory_statistics"), "data"))
def branch_process(self, subinstance):
"""Update statistics, and the has_changed flag if change in branch"""
if subinstance.changed: self.changed = 1
self.add_file_stats(subinstance)
self.total_files += subinstance.total_files
self.total_file_size += subinstance.total_file_size
self.changed_files += subinstance.changed_files
self.changed_file_size += subinstance.changed_file_size
self.increment_file_size += subinstance.increment_file_size
execfile("filename_mapping.py")
#######################################################################
#
# statistics - Generate and process aggregated backup information
#
class StatsException(Exception): pass
class StatsObj:
"""Contains various statistics, provide string conversion functions"""
stat_file_attrs = ('SourceFiles', 'SourceFileSize',
'MirrorFiles', 'MirrorFileSize',
'NewFiles', 'NewFileSize',
'DeletedFiles', 'DeletedFileSize',
'ChangedFiles',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFileSize')
stat_time_attrs = ('StartTime', 'EndTime')
stat_attrs = stat_time_attrs + stat_file_attrs
# Set all stats to None, indicating info not available
for attr in stat_attrs: locals()[attr] = None
def get_stat(self, attribute):
"""Get a statistic"""
try: return self.__dict__[attribute]
except KeyError:
# this may be a hack, but seems no good way to get attrs in python
return eval("self.%s" % attribute)
def set_stat(self, attr, value):
"""Set attribute to given value"""
self.__dict__[attr] = value
def get_stats_string(self):
"""Return string printing out statistics"""
slist = ["%s %s" % (attr, self.get_stat(attr))
for attr in self.stat_attrs
if self.get_stat(attr) is not None]
return "\n".join(slist)
def init_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
for line in s.split("\n"):
if not line: continue
line_parts = line.split()
if len(line_parts) < 2: error(line)
attr, value_string = line_parts[:2]
if not attr in self.stat_attrs: error(line)
try: self.set_stat(attr, long(value_string))
except ValueError: error(line)
return self
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
tf = TempFileManager.new(rp)
def init_thunk():
fp = tf.open("w")
fp.write(self.get_stats_string())
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp = rp.open("r")
self.init_stats_from_string(fp.read())
fp.close()
return self
def stats_equal(self, s):
"""Return true if s has same statistics as self"""
assert isinstance(s, StatsObj)
for attr in self.stat_file_attrs:
if self.get_stat(attr) != s.get_stat(attr): return None
return 1
class StatsITR(IterTreeReducer, StatsObj):
"""Keep track of per directory statistics
This is subclassed by the mirroring and incrementing ITRs.
"""
# zero out file statistics
for attr in StatsObj.stat_file_attrs: locals()[attr] = 0
def start_stats(self, mirror_dsrp):
"""Record status of mirror dsrp
This is called before the mirror is processed so we remember
the old state.
"""
if mirror_dsrp.lstat():
self.mirror_base_exists = 1
self.mirror_base_size = mirror_dsrp.getsize()
else: self.mirror_base_exists = None
def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None):
"""Set various statistics after mirror processed"""
if mirror_dsrp.lstat():
self.SourceFiles += 1
self.SourceFileSize += mirror_dsrp.getsize()
if self.mirror_base_exists:
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
if diff_rorp: # otherwise no change
self.ChangedFiles += 1
self.ChangedSourceSize += mirror_dsrp.getsize()
self.ChangedMirrorSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: # new file was created
self.NewFiles += 1
self.NewFileSize += mirror_dsrp.getsize()
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else:
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: assert None # One of before and after should exist
def add_file_stats(self, subinstance):
"""Add all file statistics from subinstance to current totals"""
for attr in self.stat_file_attrs:
self.set_stat(attr,
self.get_stat(attr) + subinstance.get_stat(attr))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment