Commit acbf3437 authored by bescoto's avatar bescoto

Reduce hardlink memory usage


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@719 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent 5aba34df
......@@ -14,6 +14,9 @@ Added supplementary rdiff-backup-statistics utility for parsing
rdiff-backup's statistics files (originally based off perl script by
Dean Gaudet).
rdiff-backup should now use much less memory than v1.1.1-1.1.4 if you
have lots of hard links.
New in v1.1.4 (2005/12/13)
--------------------------
......
......@@ -314,11 +314,8 @@ rdiff-backup-data directory. rdiff-backup will run slightly quicker
and take up a bit less space.
.TP
.BI --no-hard-links
Don't replicate hard links on destination side. Note that because
metadata is written to a separate file, hard link information will not
be lost even if the --no-hard-links option is given (however, mirror
files will not be linked). If many hard-linked files are present,
this option can drastically decrease memory usage.
Don't replicate hard links on destination side. If many hard-linked
files are present, this option can drastically decrease memory usage.
.TP
.B --null-separator
Use nulls (\\0) instead of newlines (\\n) as line separators, which
......
......@@ -31,15 +31,15 @@ source side should only transmit inode information.
"""
from __future__ import generators
import cPickle
import Globals, Time, rpath, log, robust, errno
import Globals, Time, log, robust, errno
# The keys in this dictionary are (inode, devloc) pairs. The values
# are a pair (index, remaining_links, dest_key) where index is the
# rorp index of the first such linked file, remaining_links is the
# number of files hard linked to this one we may see, and key is
# are a pair (index, remaining_links, dest_key, sha1sum) where index
# is the rorp index of the first such linked file, remaining_links is
# the number of files hard linked to this one we may see, and key is
# either (dest_inode, dest_devloc) or None, and represents the
# hardlink info of the existing file on the destination.
# hardlink info of the existing file on the destination. Finally
# sha1sum is the hash of the file if it exists, or None.
_inode_index = None
def initialize_dictionaries():
......@@ -64,7 +64,9 @@ def add_rorp(rorp, dest_rorp = None):
if not dest_rorp: dest_key = None
elif dest_rorp.getnumlinks() == 1: dest_key = "NA"
else: dest_key = get_inode_key(dest_rorp)
_inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key)
digest = rorp.has_sha1() and rorp.get_sha1() or None
_inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(),
dest_key, digest)
return rp_inode_key
def del_rorp(rorp):
......@@ -73,12 +75,12 @@ def del_rorp(rorp):
rp_inode_key = get_inode_key(rorp)
val = _inode_index.get(rp_inode_key)
if not val: return
index, remaining, dest_key = val
index, remaining, dest_key, digest = val
if remaining == 1:
del _inode_index[rp_inode_key]
return 1
else:
_inode_index[rp_inode_key] = (index, remaining-1, dest_key)
_inode_index[rp_inode_key] = (index, remaining-1, dest_key, digest)
return 0
def rorp_eq(src_rorp, dest_rorp):
......@@ -95,11 +97,11 @@ def rorp_eq(src_rorp, dest_rorp):
if src_rorp.getnumlinks() < dest_rorp.getnumlinks(): return 0
src_key = get_inode_key(src_rorp)
index, remaining, dest_key = _inode_index[src_key]
index, remaining, dest_key, digest = _inode_index[src_key]
if dest_key == "NA":
# Allow this to be ok for first comparison, but not any
# subsequent ones
_inode_index[src_key] = (index, remaining, None)
_inode_index[src_key] = (index, remaining, None, None)
return 1
return dest_key == get_inode_key(dest_rorp)
......@@ -114,6 +116,10 @@ def get_link_index(rorp):
"""Return first index on target side rorp is already linked to"""
return _inode_index[get_inode_key(rorp)][0]
def get_sha1(rorp):
"""Return sha1 digest of what rorp is linked to"""
return _inode_index[get_inode_key(rorp)][3]
def link_rp(diff_rorp, dest_rpath, dest_root = None):
"""Make dest_rpath into a link using link flag in diff_rorp"""
if not dest_root: dest_root = dest_rpath # use base of dest_rpath
......
......@@ -295,11 +295,6 @@ class CacheCollatedPostProcess:
# after we're finished with them
self.dir_perms_list = []
# A dictionary of {index: source_rorp}. We use this to
# hold the digest of a hard linked file so it only needs to be
# computed once.
self.inode_digest_dict = {}
# Contains list of (index, (source_rorp, diff_rorp)) pairs for
# the parent directories of the last item in the cache.
self.parent_list = []
......@@ -326,8 +321,7 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
if Hardlink.add_rorp(source_rorp, dest_rorp):
self.inode_digest_dict[source_rorp.index] = source_rorp
Hardlink.add_rorp(source_rorp, dest_rorp)
if (dest_rorp and dest_rorp.isdir() and Globals.process_uid != 0
and dest_rorp.getperms() % 01000 < 0700):
self.unreadable_dir_init(source_rorp, dest_rorp)
......@@ -394,8 +388,7 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
if Hardlink.del_rorp(source_rorp):
del self.inode_digest_dict[source_rorp.index]
Hardlink.del_rorp(source_rorp)
if not changed or success:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
......@@ -469,10 +462,10 @@ class CacheCollatedPostProcess:
def update_hardlink_hash(self, diff_rorp):
"""Tag associated source_rorp with same hash diff_rorp points to"""
orig_rorp = self.inode_digest_dict[diff_rorp.get_link_flag()]
if orig_rorp.has_sha1():
new_source_rorp = self.get_source_rorp(diff_rorp.index)
new_source_rorp.set_sha1(orig_rorp.get_sha1())
sha1sum = Hardlink.get_sha1(diff_rorp)
if not sha1sum: return
source_rorp = self.get_source_rorp(diff_rorp.index)
source_rorp.set_sha1(sha1sum)
def close(self):
"""Process the remaining elements in the cache"""
......
......@@ -386,4 +386,19 @@ def raise_interpreter(use_locals = None):
else: local_dict = globals()
code.InteractiveConsole(local_dict).interact()
def getrefs(i, depth):
"""Get the i'th object in memory, return objects that reference it"""
import sys, gc, types
o = sys.getobjects(i)[-1]
for d in range(depth):
for ref in gc.get_referrers(o):
if type(ref) in (types.ListType, types.DictType,
types.InstanceType):
if type(ref) is types.DictType and ref.has_key('copyright'):
continue
o = ref
break
else:
print "Max depth ", d
return o
return o
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment