Commit b22a30aa authored by bescoto's avatar bescoto

Added iterator operations on metadata file

git-svn-id: 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent c198e27a
......@@ -54,7 +54,9 @@ field names and values.
import re, log, Globals, rpath
from __future__ import generators
import re, gzip
from rdiff_backup import log, Globals, rpath, Time
class ParsingError(Exception):
"""This is raised when bad or unparsable data is received"""
......@@ -169,3 +171,139 @@ def unquote_path(quoted_string):
log.Log("Warning, unknown quoted sequence %s found" % two_chars, 2)
return two_chars
return re.sub("\\\\n|\\\\\\\\", replacement_func, quoted_string)
def write_rorp_iter_to_file(rorp_iter, file):
"""Given iterator of RORPs, write records to (pre-opened) file object"""
for rorp in rorp_iter: file.write(RORP2Record(rorp))
class rorp_extractor:
"""Controls iterating rorps from metadata file"""
def __init__(self, fileobj):
self.fileobj = fileobj # holds file object we are reading from
self.buf = "" # holds the next part of the file
self.record_boundary_regexp = re.compile("\\nFile")
self.at_end = 0 # True if we are at the end of the file
self.blocksize = 32 * 1024
def get_next_pos(self):
"""Return position of next record in buffer"""
while 1:
m =
if m: return m.start(0)+1 # the +1 skips the newline
else: # add next block to the buffer, loop again
newbuf =
if not newbuf:
self.at_end = 1
return len(self.buf)
else: self.buf += newbuf
def iterate(self):
"""Return iterator over all records"""
while 1:
next_pos = self.get_next_pos()
try: yield Record2RORP(self.buf[:next_pos])
except ParsingError, e:
log.Log("Error parsing metadata file: %s" % (e,), 2)
if self.at_end: break
self.buf = self.buf[next_pos:]
def skip_to_index(self, index):
"""Scan through the file, set buffer to beginning of index record
Here we make sure that the buffer always ends in a newline, so
we will not be splitting lines in half.
assert not self.buf or self.buf.endswith("\n")
if not index: indexpath = "."
else: indexpath = "/".join(index)
# Must double all backslashes, because they will be
# reinterpreted. For instance, to search for index \n
# (newline), it will be \\n (backslash n) in the file, so the
# regular expression is "File \\\\n\\n" (File two backslash n
# backslash n)
double_quote = re.sub("\\\\", "\\\\\\\\", indexpath)
begin_re = re.compile("(^|\\n)(File %s\\n)" % (double_quote,))
while 1:
m =
if m:
self.buf = self.buf[m.start(2):]
self.buf =
self.buf += self.fileobj.readline()
if not self.buf:
self.at_end = 1
def iterate_starting_with(self, index):
"""Iterate records whose index starts with given index"""
if self.at_end: return
while 1:
next_pos = self.get_next_pos()
try: rorp = Record2RORP(self.buf[:next_pos])
except ParsingError, e:
log.Log("Error parsing metadata file: %s" % (e,), 2)
if rorp.index[:len(index)] != index: break
yield rorp
if self.at_end: break
self.buf = self.buf[next_pos:]
def close(self):
"""Return value of closing associated file"""
return self.fileobj.close()
metadata_rp = None
metadata_fileobj = None
def OpenMetadata(rp = None, compress = 1):
"""Open the Metadata file for writing"""
global metadata_filename, metadata_fileobj
assert not metadata_fileobj, "Metadata file already open"
if rp: metadata_rp = rp
else: metadata_rp = Globals.rbdir.append("" %
metadata_fileobj ="wb", compress = compress)
def WriteMetadata(rorp):
"""Write metadata of rorp to file"""
global metadata_fileobj
def CloseMetadata():
"""Close the metadata file"""
global metadata_fileobj
result = metadata_fileobj.close()
metadata_fileobj = None
return result
def GetMetadata(rp = None, restrict_index = None, compressed = None):
"""Return iterator of metadata from given metadata file rp"""
if compressed is None:
if rp.isincfile():
compressed = rp.inc_compressed
assert rp.inc_type == "data", rp.inc_type
else: compressed = rp.get_indexpath().endswith(".gz")
fileobj ="rb", compress = compressed)
if restrict_index is None: return rorp_extractor(fileobj).iterate()
else: return rorp_extractor(fileobj).iterate_starting_with(restrict_index)
def GetMetadata_at_time(rpdir, time, restrict_index = None, rplist = None):
"""Scan through rpdir, finding metadata file at given time, iterate
If rplist is given, use that instead of listing rpdir. Time here
is exact, we don't take the next one older or anything. Returns
None if no matching metadata found.
if rplist is None: rplist = map(lambda x: rpdir.append(x), rpdir.listdir())
for rp in rplist:
if (rp.isincfile() and rp.getinctype() == "data" and
rp.getincbase_str() == "mirror_metadata"):
if Time.stringtotime(rp.getinctime()) == time:
return GetMetadata(rp, restrict_index)
return None
......@@ -271,11 +271,35 @@ class RORPath(RPathStatic):
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif key == 'size' and self.isdir(): pass
elif key == 'size' and not self.isreg():
pass # size only matters for regular files
elif (not or[key] !=[key]): return None
return 1
def equal_verbose(self, other):
"""Like __eq__, but log more information. Useful when testing"""
if self.index != other.index:
Log("Index %s != index %s" % (self.index, other.index), 2)
return None
for key in # compare dicts key by key
if ((key == 'uid' or key == 'gid') and
(not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif key == 'size' and not self.isreg(): pass
elif (not or[key] !=[key]):
if not
Log("Second is missing key %s" % (key,), 2)
else: Log("Value of %s differs: %s vs %s" %
(key,[key],[key]), 2)
return None
return 1
def __ne__(self, other): return not self.__eq__(other)
def __str__(self):
import unittest, os
import unittest, os, cStringIO, time
from rdiff_backup.metadata import *
from rdiff_backup import rpath, Globals
from rdiff_backup import rpath, Globals, selection, destructive_stepping
tempdir = rpath.RPath(Globals.local_connection, "testfiles/output")
class MetadataTest(unittest.TestCase):
def make_temp(self):
"""Make temp directory testfiles/output"""
global tempdir
def testQuote(self):
"""Test quoting and unquoting"""
filenames = ["foo", ".", "hello\nthere", "\\", "\\\\\\",
......@@ -13,19 +21,89 @@ class MetadataTest(unittest.TestCase):
result = unquote_path(quoted)
assert result == filename, (quoted, result, filename)
def testRORP2Record(self):
"""Test turning RORPs into records and back again"""
def get_rpaths(self):
"""Return list of rorps"""
vft = rpath.RPath(Globals.local_connection,
rpaths = map(lambda x: vft.append(x), vft.listdir())
extra_rpaths = map(lambda x: rpath.RPath(Globals.local_connection, x),
['/bin/ls', '/dev/ttyS0', '/dev/hda', 'aoeuaou'])
return [vft] + rpaths + extra_rpaths
for rp in [vft] + rpaths + extra_rpaths:
def testRORP2Record(self):
"""Test turning RORPs into records and back again"""
for rp in self.get_rpaths():
record = RORP2Record(rp)
#print record
new_rorp = Record2RORP(record)
assert new_rorp == rp, (new_rorp, rp, record)
def testIterator(self):
"""Test writing RORPs to file and iterating them back"""
l = self.get_rpaths()
fp = cStringIO.StringIO()
write_rorp_iter_to_file(iter(l), fp)
cstring =
outlist = list(rorp_extractor(fp).iterate())
assert len(l) == len(outlist), (len(l), len(outlist))
for i in range(len(l)):
if not l[i].equal_verbose(outlist[i]):
#print cstring
assert 0, (i, str(l[i]), str(outlist[i]))
def write_metadata_to_temp(self):
"""If necessary, write metadata of bigdir to file metadata.gz"""
global tempdir
temprp = tempdir.append("metadata.gz")
if temprp.lstat(): return temprp
root = rpath.RPath(Globals.local_connection, "testfiles/bigdir")
dsrp_root = destructive_stepping.DSRPath(1, root)
rpath_iter = selection.Select(dsrp_root).set_iter()
start_time = time.time()
for rp in rpath_iter: WriteMetadata(rp)
print "Writing metadata took %s seconds" % (time.time() - start_time)
return temprp
def testSpeed(self):
"""Test testIterator on 10000 files"""
temprp = self.write_metadata_to_temp()
start_time = time.time(); i = 0
for rorp in GetMetadata(temprp): i += 1
print "Reading %s metadata entries took %s seconds." % \
(i, time.time() - start_time)
start_time = time.time()
blocksize = 32 * 1024
tempfp ="rb", compress = 1)
while 1:
buf =
if not buf: break
assert not tempfp.close()
print "Simply decompressing metadata file took %s seconds" % \
(time.time() - start_time)
def testIterate_restricted(self):
"""Test getting rorps restricted to certain index
In this case, get assume subdir (subdir3, subdir10) has 50
files in it.
temprp = self.write_metadata_to_temp()
start_time = time.time(); i = 0
for rorp in GetMetadata(temprp, ("subdir3", "subdir10")): i += 1
print "Reading %s metadata entries took %s seconds." % \
(i, time.time() - start_time)
assert i == 51
if __name__ == "__main__": unittest.main()
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment