Commit 9fb3b17a authored by bescoto's avatar bescoto

Write SHA1 digests for all regular files


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@662 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent 5b216be1
New in v1.1.1 (????/??/??)
--------------------------
rdiff-backup now writes SHA1 sums into its mirror_metadata file for
all regular files.
Applied Alec Berryman's patch to update the no-compression regexp.
Alec Berryman's fs_abilities patch is supposed to help with AFS.
......
# Copyright 2002 Ben Escoto
# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
......@@ -58,13 +58,14 @@ def get_inode_key(rorp):
def add_rorp(rorp, dest_rorp = None):
"""Process new rorp and update hard link dictionaries"""
if not rorp.isreg() or rorp.getnumlinks() < 2: return
if not rorp.isreg() or rorp.getnumlinks() < 2: return None
rp_inode_key = get_inode_key(rorp)
if not _inode_index.has_key(rp_inode_key):
if not dest_rorp: dest_key = None
elif dest_rorp.getnumlinks() == 1: dest_key = "NA"
else: dest_key = get_inode_key(dest_rorp)
_inode_index[rp_inode_key] = (rorp.index, rorp.getnumlinks(), dest_key)
return rp_inode_key
def del_rorp(rorp):
"""Remove rorp information from dictionary if seen all links"""
......@@ -73,8 +74,12 @@ def del_rorp(rorp):
val = _inode_index.get(rp_inode_key)
if not val: return
index, remaining, dest_key = val
if remaining == 1: del _inode_index[rp_inode_key]
else: _inode_index[rp_inode_key] = (index, remaining-1, dest_key)
if remaining == 1:
del _inode_index[rp_inode_key]
return 1
else:
_inode_index[rp_inode_key] = (index, remaining-1, dest_key)
return 0
def rorp_eq(src_rorp, dest_rorp):
"""Compare hardlinked for equality
......
# Copyright 2002 Ben Escoto
# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
......@@ -20,7 +20,7 @@
"""Invoke rdiff utility to make signatures, deltas, or patch"""
import os, librsync
import Globals, log, static, TempFile, rpath
import Globals, log, static, TempFile, rpath, hash
def get_signature(rp, blocksize = None):
......@@ -53,6 +53,14 @@ def get_delta_sigrp(rp_signature, rp_new):
(rp_new.path, rp_signature.get_indexpath()), 7)
return librsync.DeltaFile(rp_signature.open("rb"), rp_new.open("rb"))
def get_delta_sigrp_hash(rp_signature, rp_new):
"""Like above but also calculate hash of new as close() value"""
log.Log("Getting delta with hash of %s with signature %s" %
(rp_new.path, rp_signature.get_indexpath()), 7)
return librsync.DeltaFile(rp_signature.open("rb"),
hash.FileWrapper(rp_new.open("rb")))
def write_delta(basis, new, delta, compress = None):
"""Write rdiff delta which brings basis to new"""
log.Log("Writing delta %s from %s -> %s" %
......@@ -68,8 +76,9 @@ def write_patched_fp(basis_fp, delta_fp, out_fp):
def write_via_tempfile(fp, rp):
"""Write fileobj fp to rp by writing to tempfile and renaming"""
tf = TempFile.new(rp)
tf.write_from_fileobj(fp)
retval = tf.write_from_fileobj(fp)
rpath.rename(tf, rp)
return retval
def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None):
"""Patch routine that must be run locally, writes to outrp
......@@ -83,8 +92,8 @@ def patch_local(rp_basis, rp_delta, outrp = None, delta_compressed = None):
if delta_compressed: deltafile = rp_delta.open("rb", 1)
else: deltafile = rp_delta.open("rb")
patchfile = librsync.PatchedFile(rp_basis.open("rb"), deltafile)
if outrp: outrp.write_from_fileobj(patchfile)
else: write_via_tempfile(patchfile, rp_basis)
if outrp: return outrp.write_from_fileobj(patchfile)
else: return write_via_tempfile(patchfile, rp_basis)
def copy_local(rpin, rpout, rpnew = None):
"""Write rpnew == rpin using rpout as basis. rpout and rpnew local"""
......
......@@ -23,7 +23,7 @@ from __future__ import generators
import errno
import Globals, metadata, rorpiter, TempFile, Hardlink, robust, increment, \
rpath, static, log, selection, Time, Rdiff, statistics, iterfile, \
eas_acls
eas_acls, hash
def Mirror(src_rpath, dest_rpath):
"""Turn dest_rpath into a copy of src_rpath"""
......@@ -85,14 +85,14 @@ class SourceStruct:
"""Attach file of snapshot to diff_rorp, w/ error checking"""
fileobj = robust.check_common_error(
error_handler, rpath.RPath.open, (src_rp, "rb"))
if fileobj: diff_rorp.setfile(fileobj)
if fileobj: diff_rorp.setfile(hash.FileWrapper(fileobj))
else: diff_rorp.zero()
diff_rorp.set_attached_filetype('snapshot')
def attach_diff(diff_rorp, src_rp, dest_sig):
"""Attach file of diff to diff_rorp, w/ error checking"""
fileobj = robust.check_common_error(
error_handler, Rdiff.get_delta_sigrp, (dest_sig, src_rp))
error_handler, Rdiff.get_delta_sigrp_hash, (dest_sig, src_rp))
if fileobj:
diff_rorp.setfile(fileobj)
diff_rorp.set_attached_filetype('diff')
......@@ -255,6 +255,9 @@ class CacheCollatedPostProcess:
we enter them to computer signatures, and then reset after we
are done patching everything inside them.
4. We need some place to put hashes (like SHA1) after computing
them and before writing them to the metadata.
The class caches older source_rorps and dest_rps so the patch
function can retrieve them if necessary. The patch function can
also update the processed correctly flag. When an item falls out
......@@ -294,6 +297,11 @@ class CacheCollatedPostProcess:
# after we're finished with them
self.dir_perms_list = []
# A dictionary of {index: source_rorp}. We use this to
# hold the digest of a hard linked file so it only needs to be
# computed once.
self.inode_digest_dict = {}
def __iter__(self): return self
def next(self):
......@@ -316,7 +324,8 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
Hardlink.add_rorp(source_rorp, dest_rorp)
if Hardlink.add_rorp(source_rorp, dest_rorp):
self.inode_digest_dict[source_rorp.index] = source_rorp
if (dest_rorp and dest_rorp.isdir() and Globals.process_uid != 0
and dest_rorp.getperms() % 01000 < 0700):
self.unreadable_dir_init(source_rorp, dest_rorp)
......@@ -359,7 +368,8 @@ class CacheCollatedPostProcess:
"""
if Globals.preserve_hardlinks and source_rorp:
Hardlink.del_rorp(source_rorp)
if Hardlink.del_rorp(source_rorp):
del self.inode_digest_dict[source_rorp.index]
if not changed or success:
if source_rorp: self.statfileobj.add_source_file(source_rorp)
......@@ -424,6 +434,17 @@ class CacheCollatedPostProcess:
"""Retrieve mirror_rorp with given index from cache"""
return self.cache_dict[index][1]
def update_hash(self, index, sha1sum):
"""Update the source rorp's SHA1 hash"""
self.get_source_rorp(index).set_sha1(sha1sum)
def update_hardlink_hash(self, diff_rorp):
"""Tag associated source_rorp with same hash diff_rorp points to"""
orig_rorp = self.inode_digest_dict[diff_rorp.get_link_flag()]
if orig_rorp.has_sha1():
new_source_rorp = self.get_source_rorp(diff_rorp.index)
new_source_rorp.set_sha1(orig_rorp.get_sha1())
def close(self):
"""Process the remaining elements in the cache"""
while self.cache_indicies: self.shorten_cache()
......@@ -486,24 +507,52 @@ class PatchITRB(rorpiter.ITRBranch):
if tf.lstat(): tf.delete()
def patch_to_temp(self, basis_rp, diff_rorp, new):
"""Patch basis_rp, writing output in new, which doesn't exist yet"""
"""Patch basis_rp, writing output in new, which doesn't exist yet
Returns true if able to write new as desired, false if
UpdateError or similar gets in the way.
"""
if diff_rorp.isflaglinked():
Hardlink.link_rp(diff_rorp, new, self.basis_root_rp)
self.patch_hardlink_to_temp(diff_rorp, new)
elif diff_rorp.get_attached_filetype() == 'snapshot':
if diff_rorp.isspecial():
self.write_special(diff_rorp, new)
rpath.copy_attribs(diff_rorp, new)
return 1
elif robust.check_common_error(self.error_handler, rpath.copy,
(diff_rorp, new)) == 0: return 0
else:
assert diff_rorp.get_attached_filetype() == 'diff'
if robust.check_common_error(self.error_handler,
Rdiff.patch_local, (basis_rp, diff_rorp, new)) == 0: return 0
if not self.patch_snapshot_to_temp(diff_rorp, new):
return 0
elif not self.patch_diff_to_temp(basis_rp, diff_rorp, new):
return 0
if new.lstat() and not diff_rorp.isflaglinked():
rpath.copy_attribs(diff_rorp, new)
return self.matches_cached_rorp(diff_rorp, new)
def patch_hardlink_to_temp(self, diff_rorp, new):
"""Hardlink diff_rorp to temp, update hash if necessary"""
Hardlink.link_rp(diff_rorp, new, self.basis_root_rp)
self.CCPP.update_hardlink_hash(diff_rorp)
def patch_snapshot_to_temp(self, diff_rorp, new):
"""Write diff_rorp to new, return true if successful"""
if diff_rorp.isspecial():
self.write_special(diff_rorp, new)
rpath.copy_attribs(diff_rorp, new)
return 1
report = robust.check_common_error(self.error_handler, rpath.copy,
(diff_rorp, new))
if isinstance(report, hash.Report):
self.CCPP.update_hash(diff_rorp.index, report.sha1_digest)
return 1
return report != 0 # if == 0, error_handler caught something
def patch_diff_to_temp(self, basis_rp, diff_rorp, new):
"""Apply diff_rorp to basis_rp, write output in new"""
assert diff_rorp.get_attached_filetype() == 'diff'
report = robust.check_common_error(self.error_handler,
Rdiff.patch_local, (basis_rp, diff_rorp, new))
if isinstance(report, hash.Report):
self.CCPP.update_hash(diff_rorp.index, report.sha1_digest)
return 1
return report != 0 # if report == 0, error
def matches_cached_rorp(self, diff_rorp, new_rp):
"""Return true if new_rp matches cached src rorp
......
# Copyright 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
# rdiff-backup is free software; you can redistribute it and/or modify
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# rdiff-backup is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rdiff-backup; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
# USA
"""Contains a file wrapper that returns a hash on close"""
import sha
class FileWrapper:
"""Wrapper around a file-like object
Only use this with files that will be read through in a single
pass and then closed. (There is no seek().) When you close it,
return value will be a Report.
Currently this just calculates a sha1sum of the datastream.
"""
def __init__(self, fileobj):
self.fileobj = fileobj
self.sha1 = sha.new()
self.closed = 0
def read(self, length = -1):
assert not self.closed
buf = self.fileobj.read(length)
self.sha1.update(buf)
return buf
def close(self):
return Report(self.fileobj.close(), self.sha1.hexdigest())
class Report:
"""Hold final information about a byte stream"""
def __init__(self, close_val, sha1_digest):
assert not close_val # For now just assume inner file closes correctly
self.sha1_digest = sha1_digest
# Copyright 2002 Ben Escoto
# Copyright 2002 2005 Ben Escoto
#
# This file is part of rdiff-backup.
#
......@@ -24,7 +24,8 @@ which is written in C. The goal was to use C as little as possible...
"""
import _librsync, types, array
import types, array
import _librsync
blocksize = _librsync.RS_JOB_BLOCKSIZE
......@@ -95,15 +96,16 @@ class LikeFile:
new_in = self.infile.read(blocksize)
if not new_in:
self.infile_eof = 1
assert not self.infile.close()
self.infile_closeval = self.infile.close()
self.infile_closed = 1
break
self.inbuf += new_in
def close(self):
"""Close infile"""
if not self.infile_closed: assert not self.infile.close()
"""Close infile and pass on infile close value"""
self.closed = 1
if self.infile_closed: return self.infile_closeval
else: return self.infile.close()
class SigFile(LikeFile):
......
......@@ -119,6 +119,10 @@ def RORP2Record(rorpath):
str_list.append(" Inode %s\n" % rorpath.getinode())
str_list.append(" DeviceLoc %s\n" % rorpath.getdevloc())
# Save any hashes, if available
if rorpath.has_sha1():
str_list.append(' SHA1Digest %s\n' % rorpath.get_sha1())
elif type == "None": return "".join(str_list)
elif type == "dir" or type == "sock" or type == "fifo": pass
elif type == "sym":
......@@ -166,6 +170,7 @@ def Record2RORP(record_string):
elif field == "CarbonFile":
if data == "None": data_dict['carbonfile'] = None
else: data_dict['carbonfile'] = string2carbonfile(data)
elif field == "SHA1Digest": data_dict['sha1'] = data
elif field == "NumHardLinks": data_dict['nlink'] = int(data)
elif field == "Inode": data_dict['inode'] = long(data)
elif field == "DeviceLoc": data_dict['devloc'] = long(data)
......
......@@ -82,7 +82,12 @@ def move(rpin, rpout):
rpin.delete()
def copy(rpin, rpout, compress = 0):
"""Copy RPath rpin to rpout. Works for symlinks, dirs, etc."""
"""Copy RPath rpin to rpout. Works for symlinks, dirs, etc.
Returns close value of input for regular file, which can be used
to pass hashes on.
"""
log.Log("Regular copying %s to %s" % (rpin.index, rpout.path), 6)
if not rpin.lstat():
if rpout.lstat(): rpout.delete()
......@@ -93,7 +98,7 @@ def copy(rpin, rpout, compress = 0):
rpout.delete() # easier to write than compare
else: return
if rpin.isreg(): copy_reg_file(rpin, rpout, compress)
if rpin.isreg(): return copy_reg_file(rpin, rpout, compress)
elif rpin.isdir(): rpout.mkdir()
elif rpin.issym(): rpout.symlink(rpin.readlink())
elif rpin.ischardev():
......@@ -115,7 +120,7 @@ def copy_reg_file(rpin, rpout, compress = 0):
rpout.setdata()
return
except AttributeError: pass
rpout.write_from_fileobj(rpin.open("rb"), compress = compress)
return rpout.write_from_fileobj(rpin.open("rb"), compress = compress)
def cmp(rpin, rpout):
"""True if rpin has the same data as rpout
......@@ -349,6 +354,7 @@ class RORPath:
elif key == 'carbonfile' and not Globals.carbonfile_write: pass
elif key == 'resourcefork' and not Globals.resource_forks_write:
pass
elif key == 'sha1': pass # one or other may not have set
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return 0
......@@ -646,6 +652,18 @@ class RORPath:
"""Record resource fork in dictionary. Does not write"""
self.data['resourcefork'] = rfork
def has_sha1(self):
"""True iff self has its sha1 digest set"""
return self.data.has_key('sha1')
def get_sha1(self):
"""Return sha1 digest. Causes exception unless set_sha1 first"""
return self.data['sha1']
def set_sha1(self, digest):
"""Set sha1 hash (should be in hexdecimal)"""
self.data['sha1'] = digest
class RPath(RORPath):
"""Remote Path class - wrapper around a possibly non-local pathname
......@@ -978,16 +996,16 @@ class RPath(RORPath):
"""Reads fp and writes to self.path. Closes both when done
If compress is true, fp will be gzip compressed before being
written to self.
written to self. Returns closing value of fp.
"""
log.Log("Writing file object to " + self.path, 7)
assert not self.lstat(), "File %s already exists" % self.path
outfp = self.open("wb", compress = compress)
copyfileobj(fp, outfp)
if fp.close() or outfp.close():
raise RPathException("Error closing file")
if outfp.close(): raise RPathException("Error closing file")
self.setdata()
return fp.close()
def write_string(self, s, compress = None):
"""Write string s into rpath"""
......
......@@ -90,7 +90,7 @@ class HashTest(unittest.TestCase):
hashlist = self.extract_hashs(metadata_rp)
assert hashlist == hashlist1, (hashlist1, hashlist)
rdiff_backup(1, 1, in_rp2.path, "testfiles/output", 20000, "-v5")
rdiff_backup(1, 1, in_rp2.path, "testfiles/output", 20000, "-v7")
incs = restore.get_inclist(meta_prefix)
assert len(incs) == 2
metadata_rp.delete() # easy way to find the other one
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment