Commit 097e2c75 authored by ben's avatar ben

Added statistics.py for more sophisticated statistics handling


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@104 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent c9418644
execfile("filename_mapping.py") execfile("statistics.py")
####################################################################### #######################################################################
# #
...@@ -108,7 +108,7 @@ class Inc: ...@@ -108,7 +108,7 @@ class Inc:
MakeStatic(Inc) MakeStatic(Inc)
class IncrementITR(IterTreeReducer): class IncrementITR(StatsITR):
"""Patch and increment iterator of increment triples """Patch and increment iterator of increment triples
This has to be an ITR because directories that have files in them This has to be an ITR because directories that have files in them
...@@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer): ...@@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer):
Remember this object needs to be pickable. Remember this object needs to be pickable.
""" """
directory, directory_replacement = None, None mirror_isdirectory, directory_replacement = None, None
changed = None changed = None
def __init__(self, inc_rpath): def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree""" """Set inc_rpath, an rpath of the base of the tree"""
self.inc_rpath = inc_rpath self.inc_rpath = inc_rpath
IterTreeReducer.__init__(self, inc_rpath) StatsITR.__init__(self, inc_rpath)
def start_process(self, index, diff_rorp, dsrp): def start_process(self, index, diff_rorp, dsrp):
"""Initial processing of file """Initial processing of file
...@@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer): ...@@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer):
dsrp is the local file to be incremented dsrp is the local file to be incremented
""" """
self.init_statistics(diff_rorp, dsrp) self.start_stats(dsrp)
incpref = self.inc_rpath.new_index(index) incpref = self.inc_rpath.new_index(index)
if Globals.quoting_enabled: incpref.quote_path() if Globals.quoting_enabled: incpref.quote_path()
if dsrp.isdir(): if dsrp.isdir():
self.init_dir(dsrp, diff_rorp, incpref) self.init_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref) self.mirror_isdirectory = 1
else: self.init_non_dir(dsrp, diff_rorp, incpref) else: self.init_non_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref)
def init_statistics(self, diff_rorp, dsrp):
"""Set initial values for various statistics
These refer to the old mirror or to new increment files. Note
that changed_file_size could be bigger than total_file_size.
The other statistic, increment_file_size, is set later when we
have that information.
"""
if dsrp.lstat():
self.total_files = 1
self.total_file_size = dsrp.getsize()
else: self.total_files = self.total_file_size = 0
if diff_rorp:
self.changed_files = 1
if dsrp.lstat(): self.changed_file_size = dsrp.getsize()
else: self.changed_file_size = 0
else: self.changed_files = self.changed_file_size = 0
self.increment_file_size = 0
def override_changed(self): def override_changed(self):
"""Set changed flag to true """Set changed flag to true
...@@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer): ...@@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer):
superclass. superclass.
""" """
self.directory = 1
self.diff_rorp = diff_rorp self.diff_rorp = diff_rorp
self.dsrp = dsrp self.dsrp = dsrp
self.incpref = incpref self.incpref = incpref
...@@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer): ...@@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer):
Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref), Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)] RORPIter.patchonce_action(None, dsrp, diff_rorp)]
).execute() ).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0)
self.changed = 1 self.changed = 1
def end_process(self): def end_process(self):
"""Do final work when leaving a tree (directory)""" """Do final work when leaving a tree (directory)"""
if not self.directory: return
diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref
if not diff_rorp and not self.changed: return if self.mirror_isdirectory:
if not diff_rorp and not self.changed: return
if self.directory_replacement:
tf = self.directory_replacement if self.directory_replacement:
Inc.Increment(tf, dsrp, incpref) tf = self.directory_replacement
RORPIter.patchonce_action(None, dsrp, tf).execute() Inc.Increment(tf, dsrp, incpref)
tf.delete() RORPIter.patchonce_action(None, dsrp, tf).execute()
else: tf.delete()
Inc.Increment(diff_rorp, dsrp, incpref) else:
if diff_rorp: Inc.Increment(diff_rorp, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0) self.end_stats(diff_rorp, dsrp, Inc._inc_file)
self.write_statistics() if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()):
self.write_stats_to_rp(Inc.get_inc_ext(
def write_statistics(self): self.incpref.append("directory_statistics"), "data"))
"""Write the accumulated totals into file in inc directory"""
if not self.incpref.isdir(): return # only write for directories
statrp = Inc.get_inc_ext(self.incpref.append("directory_statistics"),
"data")
tf = TempFileManager.new(statrp)
def init_thunk():
fp = tf.open("w")
fp.write("TotalFiles %d\n" % self.total_files)
fp.write("TotalFileSize %d\n" % self.total_file_size)
fp.write("ChangedFiles %d\n" % self.changed_files)
fp.write("ChangedFileSize %d\n" % self.changed_file_size)
fp.write("IncrementFileSize %d\n" % self.increment_file_size)
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (statrp,)).execute()
def branch_process(self, subinstance): def branch_process(self, subinstance):
"""Update statistics, and the has_changed flag if change in branch""" """Update statistics, and the has_changed flag if change in branch"""
if subinstance.changed: self.changed = 1 if subinstance.changed: self.changed = 1
self.add_file_stats(subinstance)
self.total_files += subinstance.total_files
self.total_file_size += subinstance.total_file_size
self.changed_files += subinstance.changed_files
self.changed_file_size += subinstance.changed_file_size
self.increment_file_size += subinstance.increment_file_size
execfile("filename_mapping.py")
#######################################################################
#
# statistics - Generate and process aggregated backup information
#
class StatsException(Exception): pass
class StatsObj:
"""Contains various statistics, provide string conversion functions"""
stat_file_attrs = ('SourceFiles', 'SourceFileSize',
'MirrorFiles', 'MirrorFileSize',
'NewFiles', 'NewFileSize',
'DeletedFiles', 'DeletedFileSize',
'ChangedFiles',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFileSize')
stat_time_attrs = ('StartTime', 'EndTime')
stat_attrs = stat_time_attrs + stat_file_attrs
# Set all stats to None, indicating info not available
for attr in stat_attrs: locals()[attr] = None
def get_stat(self, attribute):
"""Get a statistic"""
try: return self.__dict__[attribute]
except KeyError:
# this may be a hack, but seems no good way to get attrs in python
return eval("self.%s" % attribute)
def set_stat(self, attr, value):
"""Set attribute to given value"""
self.__dict__[attr] = value
def get_stats_string(self):
"""Return string printing out statistics"""
slist = ["%s %s" % (attr, self.get_stat(attr))
for attr in self.stat_attrs
if self.get_stat(attr) is not None]
return "\n".join(slist)
def init_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
for line in s.split("\n"):
if not line: continue
line_parts = line.split()
if len(line_parts) < 2: error(line)
attr, value_string = line_parts[:2]
if not attr in self.stat_attrs: error(line)
try: self.set_stat(attr, long(value_string))
except ValueError: error(line)
return self
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
tf = TempFileManager.new(rp)
def init_thunk():
fp = tf.open("w")
fp.write(self.get_stats_string())
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp = rp.open("r")
self.init_stats_from_string(fp.read())
fp.close()
return self
def stats_equal(self, s):
"""Return true if s has same statistics as self"""
assert isinstance(s, StatsObj)
for attr in self.stat_file_attrs:
if self.get_stat(attr) != s.get_stat(attr): return None
return 1
class StatsITR(IterTreeReducer, StatsObj):
"""Keep track of per directory statistics
This is subclassed by the mirroring and incrementing ITRs.
"""
# zero out file statistics
for attr in StatsObj.stat_file_attrs: locals()[attr] = 0
def start_stats(self, mirror_dsrp):
"""Record status of mirror dsrp
This is called before the mirror is processed so we remember
the old state.
"""
if mirror_dsrp.lstat():
self.mirror_base_exists = 1
self.mirror_base_size = mirror_dsrp.getsize()
else: self.mirror_base_exists = None
def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None):
"""Set various statistics after mirror processed"""
if mirror_dsrp.lstat():
self.SourceFiles += 1
self.SourceFileSize += mirror_dsrp.getsize()
if self.mirror_base_exists:
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
if diff_rorp: # otherwise no change
self.ChangedFiles += 1
self.ChangedSourceSize += mirror_dsrp.getsize()
self.ChangedMirrorSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: # new file was created
self.NewFiles += 1
self.NewFileSize += mirror_dsrp.getsize()
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else:
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: assert None # One of before and after should exist
def add_file_stats(self, subinstance):
"""Add all file statistics from subinstance to current totals"""
for attr in self.stat_file_attrs:
self.set_stat(attr,
self.get_stat(attr) + subinstance.get_stat(attr))
...@@ -24,8 +24,9 @@ files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py", ...@@ -24,8 +24,9 @@ files = ["globals.py", "static.py", "lazy.py", "log.py", "ttime.py",
"iterfile.py", "rdiff.py", "connection.py", "rpath.py", "iterfile.py", "rdiff.py", "connection.py", "rpath.py",
"hardlink.py", "robust.py", "rorpiter.py", "hardlink.py", "robust.py", "rorpiter.py",
"destructive_stepping.py", "selection.py", "destructive_stepping.py", "selection.py",
"filename_mapping.py", "increment.py", "restore.py", "filename_mapping.py", "statistics.py", "increment.py",
"manage.py", "highlevel.py", "setconnections.py", "main.py"] "restore.py", "manage.py", "highlevel.py",
"setconnections.py", "main.py"]
os.system("cp header.py rdiff-backup") os.system("cp header.py rdiff-backup")
......
execfile("filename_mapping.py") execfile("statistics.py")
####################################################################### #######################################################################
# #
...@@ -108,7 +108,7 @@ class Inc: ...@@ -108,7 +108,7 @@ class Inc:
MakeStatic(Inc) MakeStatic(Inc)
class IncrementITR(IterTreeReducer): class IncrementITR(StatsITR):
"""Patch and increment iterator of increment triples """Patch and increment iterator of increment triples
This has to be an ITR because directories that have files in them This has to be an ITR because directories that have files in them
...@@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer): ...@@ -127,13 +127,13 @@ class IncrementITR(IterTreeReducer):
Remember this object needs to be pickable. Remember this object needs to be pickable.
""" """
directory, directory_replacement = None, None mirror_isdirectory, directory_replacement = None, None
changed = None changed = None
def __init__(self, inc_rpath): def __init__(self, inc_rpath):
"""Set inc_rpath, an rpath of the base of the tree""" """Set inc_rpath, an rpath of the base of the tree"""
self.inc_rpath = inc_rpath self.inc_rpath = inc_rpath
IterTreeReducer.__init__(self, inc_rpath) StatsITR.__init__(self, inc_rpath)
def start_process(self, index, diff_rorp, dsrp): def start_process(self, index, diff_rorp, dsrp):
"""Initial processing of file """Initial processing of file
...@@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer): ...@@ -142,34 +142,15 @@ class IncrementITR(IterTreeReducer):
dsrp is the local file to be incremented dsrp is the local file to be incremented
""" """
self.init_statistics(diff_rorp, dsrp) self.start_stats(dsrp)
incpref = self.inc_rpath.new_index(index) incpref = self.inc_rpath.new_index(index)
if Globals.quoting_enabled: incpref.quote_path() if Globals.quoting_enabled: incpref.quote_path()
if dsrp.isdir(): if dsrp.isdir():
self.init_dir(dsrp, diff_rorp, incpref) self.init_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref) self.mirror_isdirectory = 1
else: self.init_non_dir(dsrp, diff_rorp, incpref) else: self.init_non_dir(dsrp, diff_rorp, incpref)
self.setvals(diff_rorp, dsrp, incpref)
def init_statistics(self, diff_rorp, dsrp):
"""Set initial values for various statistics
These refer to the old mirror or to new increment files. Note
that changed_file_size could be bigger than total_file_size.
The other statistic, increment_file_size, is set later when we
have that information.
"""
if dsrp.lstat():
self.total_files = 1
self.total_file_size = dsrp.getsize()
else: self.total_files = self.total_file_size = 0
if diff_rorp:
self.changed_files = 1
if dsrp.lstat(): self.changed_file_size = dsrp.getsize()
else: self.changed_file_size = 0
else: self.changed_files = self.changed_file_size = 0
self.increment_file_size = 0
def override_changed(self): def override_changed(self):
"""Set changed flag to true """Set changed flag to true
...@@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer): ...@@ -187,7 +168,6 @@ class IncrementITR(IterTreeReducer):
superclass. superclass.
""" """
self.directory = 1
self.diff_rorp = diff_rorp self.diff_rorp = diff_rorp
self.dsrp = dsrp self.dsrp = dsrp
self.incpref = incpref self.incpref = incpref
...@@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer): ...@@ -224,53 +204,32 @@ class IncrementITR(IterTreeReducer):
Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref), Robust.chain([Inc.Increment_action(diff_rorp, dsrp, incpref),
RORPIter.patchonce_action(None, dsrp, diff_rorp)] RORPIter.patchonce_action(None, dsrp, diff_rorp)]
).execute() ).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0)
self.changed = 1 self.changed = 1
def end_process(self): def end_process(self):
"""Do final work when leaving a tree (directory)""" """Do final work when leaving a tree (directory)"""
if not self.directory: return
diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref diff_rorp, dsrp, incpref = self.diff_rorp, self.dsrp, self.incpref
if not diff_rorp and not self.changed: return if self.mirror_isdirectory:
if not diff_rorp and not self.changed: return
if self.directory_replacement:
tf = self.directory_replacement if self.directory_replacement:
Inc.Increment(tf, dsrp, incpref) tf = self.directory_replacement
RORPIter.patchonce_action(None, dsrp, tf).execute() Inc.Increment(tf, dsrp, incpref)
tf.delete() RORPIter.patchonce_action(None, dsrp, tf).execute()
else: tf.delete()
Inc.Increment(diff_rorp, dsrp, incpref) else:
if diff_rorp: Inc.Increment(diff_rorp, dsrp, incpref)
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute() if diff_rorp:
RORPIter.patchonce_action(None, dsrp, diff_rorp).execute()
self.increment_file_size += ((Inc._inc_file and Inc._inc_file.lstat()
and Inc._inc_file.getsize()) or 0) self.end_stats(diff_rorp, dsrp, Inc._inc_file)
self.write_statistics() if self.incpref.isdir() and (self.mirror_isdirectory or dsrp.isdir()):
self.write_stats_to_rp(Inc.get_inc_ext(
def write_statistics(self): self.incpref.append("directory_statistics"), "data"))
"""Write the accumulated totals into file in inc directory"""
if not self.incpref.isdir(): return # only write for directories
statrp = Inc.get_inc_ext(self.incpref.append("directory_statistics"),
"data")
tf = TempFileManager.new(statrp)
def init_thunk():
fp = tf.open("w")
fp.write("TotalFiles %d\n" % self.total_files)
fp.write("TotalFileSize %d\n" % self.total_file_size)
fp.write("ChangedFiles %d\n" % self.changed_files)
fp.write("ChangedFileSize %d\n" % self.changed_file_size)
fp.write("IncrementFileSize %d\n" % self.increment_file_size)
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (statrp,)).execute()
def branch_process(self, subinstance): def branch_process(self, subinstance):
"""Update statistics, and the has_changed flag if change in branch""" """Update statistics, and the has_changed flag if change in branch"""
if subinstance.changed: self.changed = 1 if subinstance.changed: self.changed = 1
self.add_file_stats(subinstance)
self.total_files += subinstance.total_files
self.total_file_size += subinstance.total_file_size
self.changed_files += subinstance.changed_files
self.changed_file_size += subinstance.changed_file_size
self.increment_file_size += subinstance.increment_file_size
execfile("filename_mapping.py")
#######################################################################
#
# statistics - Generate and process aggregated backup information
#
class StatsException(Exception): pass
class StatsObj:
"""Contains various statistics, provide string conversion functions"""
stat_file_attrs = ('SourceFiles', 'SourceFileSize',
'MirrorFiles', 'MirrorFileSize',
'NewFiles', 'NewFileSize',
'DeletedFiles', 'DeletedFileSize',
'ChangedFiles',
'ChangedSourceSize', 'ChangedMirrorSize',
'IncrementFileSize')
stat_time_attrs = ('StartTime', 'EndTime')
stat_attrs = stat_time_attrs + stat_file_attrs
# Set all stats to None, indicating info not available
for attr in stat_attrs: locals()[attr] = None
def get_stat(self, attribute):
"""Get a statistic"""
try: return self.__dict__[attribute]
except KeyError:
# this may be a hack, but seems no good way to get attrs in python
return eval("self.%s" % attribute)
def set_stat(self, attr, value):
"""Set attribute to given value"""
self.__dict__[attr] = value
def get_stats_string(self):
"""Return string printing out statistics"""
slist = ["%s %s" % (attr, self.get_stat(attr))
for attr in self.stat_attrs
if self.get_stat(attr) is not None]
return "\n".join(slist)
def init_stats_from_string(self, s):
"""Initialize attributes from string, return self for convenience"""
def error(line): raise StatsException("Bad line '%s'" % line)
for line in s.split("\n"):
if not line: continue
line_parts = line.split()
if len(line_parts) < 2: error(line)
attr, value_string = line_parts[:2]
if not attr in self.stat_attrs: error(line)
try: self.set_stat(attr, long(value_string))
except ValueError: error(line)
return self
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
tf = TempFileManager.new(rp)
def init_thunk():
fp = tf.open("w")
fp.write(self.get_stats_string())
fp.close()
Robust.make_tf_robustaction(init_thunk, (tf,), (rp,)).execute()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp = rp.open("r")
self.init_stats_from_string(fp.read())
fp.close()
return self
def stats_equal(self, s):
"""Return true if s has same statistics as self"""
assert isinstance(s, StatsObj)
for attr in self.stat_file_attrs:
if self.get_stat(attr) != s.get_stat(attr): return None
return 1
class StatsITR(IterTreeReducer, StatsObj):
"""Keep track of per directory statistics
This is subclassed by the mirroring and incrementing ITRs.
"""
# zero out file statistics
for attr in StatsObj.stat_file_attrs: locals()[attr] = 0
def start_stats(self, mirror_dsrp):
"""Record status of mirror dsrp
This is called before the mirror is processed so we remember
the old state.
"""
if mirror_dsrp.lstat():
self.mirror_base_exists = 1
self.mirror_base_size = mirror_dsrp.getsize()
else: self.mirror_base_exists = None
def end_stats(self, diff_rorp, mirror_dsrp, inc_rp = None):
"""Set various statistics after mirror processed"""
if mirror_dsrp.lstat():
self.SourceFiles += 1
self.SourceFileSize += mirror_dsrp.getsize()
if self.mirror_base_exists:
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
if diff_rorp: # otherwise no change
self.ChangedFiles += 1
self.ChangedSourceSize += mirror_dsrp.getsize()
self.ChangedMirrorSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: # new file was created
self.NewFiles += 1
self.NewFileSize += mirror_dsrp.getsize()
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else:
if self.mirror_base_exists: # file was deleted from mirror
self.MirrorFiles += 1
self.MirrorFileSize += self.mirror_base_size
self.DeletedFiles += 1
self.DeletedFileSize += self.mirror_base_size
self.IncrementFileSize += inc_rp and inc_rp.getsize() or 0
else: assert None # One of before and after should exist
def add_file_stats(self, subinstance):
"""Add all file statistics from subinstance to current totals"""
for attr in self.stat_file_attrs:
self.set_stat(attr,
self.get_stat(attr) + subinstance.get_stat(attr))
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment