Commit f2ddb66f authored by ben's avatar ben

Many optimizations - up to 3x speed improvement


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@134 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent bb870818
import re, os
# The current version of rdiff-backup
version = "0.8.0"
version = "0.9.0"
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
......
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <Python.h>
#include <errno.h>
static PyObject *c_make_file_dict(self, args)
PyObject *self;
PyObject *args;
{
char *filename, filetype[5];
struct stat sbuf;
mode_t mode;
if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
if (lstat(filename, &sbuf) != 0) {
if (errno == ENOENT || errno == ENOTDIR)
return Py_BuildValue("{s:s}", "type", NULL);
else {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
mode = sbuf.st_mode;
/* Build return dictionary from stat struct */
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
/* Regular files, directories, sockets, and fifos */
if S_ISREG(mode) strcpy(filetype, "reg");
else if S_ISDIR(mode) strcpy(filetype, "dir");
else if S_ISSOCK(mode) strcpy(filetype, "sock");
else strcpy(filetype, "fifo");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
"type", filetype,
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else if S_ISLNK(mode) {
/* Symbolic links */
char linkname[1024];
int len_link = readlink(filename, linkname, 1023);
if (len_link < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
linkname[len_link] = '\0';
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
"type", "sym",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"linkname", linkname);
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
/* Device files */
char devtype[2];
int devnums = (int)sbuf.st_rdev;
if S_ISCHR(mode) strcpy(devtype, "c");
else strcpy(devtype, "b");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
"type", "dev",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"devnums", Py_BuildValue("(s,i,i)", devtype,
devnums >> 8,
devnums & 0xff),
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else {
/* Unrecognized file type - pretend it isn't there */
errno = ENOENT;
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
static PyObject *long2str(self, args)
PyObject *self;
PyObject *args;
{
unsigned char s[7];
int sindex;
unsigned long long int l;
PyObject *pylong;
if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
l = PyLong_AsUnsignedLongLong(pylong);
for(sindex = 0; sindex <= 6; sindex++) {
s[sindex] = l % 256;
l /= 256;
}
return Py_BuildValue("s#", s, 7);
}
static PyObject *str2long(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *s;
unsigned long long int l = 0;
int sindex, ssize;
if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
if (ssize != 7) return Py_BuildValue("i", -1);
for(sindex=6; sindex >= 0; sindex--)
l = l*256 + s[sindex];
return PyLong_FromLongLong(l);
}
static PyMethodDef CMethods[] = {
{"make_file_dict", c_make_file_dict, METH_VARARGS,
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS,
"Convert long int to 7 byte string"},
{"str2long", str2long, METH_VARARGS,
"Convert 7 byte string to long int"},
{NULL, NULL, 0, NULL}
};
void initC(void)
{
(void) Py_InitModule("C", CMethods);
}
from __future__ import generators
import types, os, tempfile, cPickle, shutil, traceback
import types, os, tempfile, cPickle, shutil, traceback, pickle
#######################################################################
#
......@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe"""
self._write("o", cPickle.dumps(obj, 1), req_num)
# for some reason there is an error when cPickle is used below..
self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe"""
......@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe"""
self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data)))
self.outpipe.write(headerchar + chr(req_num) +
C.long2str(long(len(data))))
self.outpipe.write(data)
self.outpipe.flush()
......@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
"""Read length bytes from inpipe, returning result"""
return self.inpipe.read(length)
def _s2l(self, s):
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
def _l2s(self, l):
def _l2s_old(self, l):
"""Convert long int to string"""
s = ""
for i in range(7):
......@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
try:
format_string, req_num, length = (header_string[0],
ord(header_string[1]),
self._s2l(header_string[2:]))
C.str2long(header_string[2:]))
except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal")
......@@ -490,7 +492,7 @@ class VirtualFile:
# everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities.
import Globals, Time, Rdiff, Hardlink, FilenameMapping
import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
from static import *
from lazy import *
from log import *
......
......@@ -31,7 +31,7 @@ class DSRPath(RPath):
newmtime - holds the new mtime
"""
def __init__(self, source, *args):
def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP
Source should be true iff the DSRPath is taken from the
......@@ -42,10 +42,11 @@ class DSRPath(RPath):
otherwise use the same arguments as the RPath initializer.
"""
if len(args) == 1 and isinstance(args[0], RPath):
rp = args[0]
RPath.__init__(self, rp.conn, rp.base, rp.index)
else: RPath.__init__(self, *args)
if base == 0:
assert isinstance(conn_or_rp, RPath)
RPath.__init__(self, conn_or_rp.conn,
conn_or_rp.base, conn_or_rp.index)
else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass":
# "bypass" val is used when unpackaging over connection
......
import cPickle
import Globals
import cPickle, array
import Globals, C
#######################################################################
#
......@@ -13,7 +13,7 @@ class UnwrapFile:
def __init__(self, file):
self.file = file
def _s2l(self, s):
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
......@@ -31,8 +31,9 @@ class UnwrapFile:
"""
header = self.file.read(8)
if not header: return None, None
assert len(header) == 8, "Header is only %d bytes" % len(header)
type, length = header[0], self._s2l(header[1:])
if len(header) != 8:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf)
else: return type, buf
......@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
self.bufferlist = [initial_data]
self.bufferlen = len(initial_data)
self.buffer = initial_data
self.closed = None
def check_consistency(self):
l = len("".join(self.bufferlist))
assert l == self.bufferlen, \
"Length of IVF bufferlist doesn't match (%s, %s)" % \
(l, self.bufferlen)
def read(self, length):
"""Read length bytes from the file, updating buffers as necessary"""
assert not self.closed
if self.iwf.currently_in_file:
while length >= self.bufferlen:
while length >= len(self.buffer):
if not self.addtobuffer(): break
real_len = min(length, self.bufferlen)
combined_buffer = "".join(self.bufferlist)
assert len(combined_buffer) == self.bufferlen, \
(len(combined_buffer), self.bufferlen)
self.bufferlist = [combined_buffer[real_len:]]
self.bufferlen = self.bufferlen - real_len
return combined_buffer[:real_len]
real_len = min(length, len(self.buffer))
return_val = self.buffer[:real_len]
self.buffer = self.buffer[real_len:]
return return_val
def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer"""
......@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
type, data = self._get()
assert type == "c", "Type is %s instead of c" % type
if data:
self.bufferlen = self.bufferlen + len(data)
self.bufferlist.append(data)
self.buffer += data
return 1
else:
self.iwf.currently_in_file = None
......@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
self.bufferlist = []
self.bufferlen = 0
self.buffer = ""
self.closed = 1
......@@ -145,45 +135,43 @@ class FileWrappingIter:
def __init__(self, iter):
"""Initialize with iter"""
self.iter = iter
self.bufferlist = []
self.bufferlen = 0L
self.array_buf = array.array('c')
self.currently_in_file = None
self.closed = None
def read(self, length):
"""Return next length bytes in file"""
assert not self.closed
while self.bufferlen < length:
while len(self.array_buf) < length:
if not self.addtobuffer(): break
combined_buffer = "".join(self.bufferlist)
assert len(combined_buffer) == self.bufferlen
real_len = min(self.bufferlen, length)
self.bufferlen = self.bufferlen - real_len
self.bufferlist = [combined_buffer[real_len:]]
return combined_buffer[:real_len]
result = self.array_buf[:length].tostring()
del self.array_buf[:length]
return result
def addtobuffer(self):
"""Updates self.bufferlist and self.bufferlen, adding on a chunk
"""Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator,
otherwise return true.
"""
array_buf = self.array_buf
if self.currently_in_file:
buf = "c" + self.addfromfile()
array_buf.fromstring("c")
array_buf.fromstring(self.addfromfile())
else:
try: currentobj = self.iter.next()
except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
buf = "f" + self.addfromfile()
array_buf.fromstring("f")
array_buf.fromstring(self.addfromfile())
else:
pickle = cPickle.dumps(currentobj, 1)
buf = "o" + self._l2s(len(pickle)) + pickle
self.bufferlist.append(buf)
self.bufferlen = self.bufferlen + len(buf)
array_buf.fromstring("o")
array_buf.fromstring(C.long2str(long(len(pickle))))
array_buf.fromstring(pickle)
return 1
def addfromfile(self):
......@@ -192,9 +180,9 @@ class FileWrappingIter:
if not buf:
assert not self.currently_in_file.close()
self.currently_in_file = None
return self._l2s(len(buf)) + buf
return C.long2str(long(len(buf))) + buf
def _l2s(self, l):
def _l2s_old(self, l):
"""Convert long int to string of 7 characters"""
s = ""
for i in range(7):
......@@ -210,26 +198,28 @@ class BufferedRead:
"""Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent
over a connection.
over a connection. Profiling said that arrays were faster than
strings here.
"""
def __init__(self, file):
self.file = file
self.buffer = ""
self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize
def read(self, l = -1):
array_buf = self.array_buf
if l < 0: # Read as much as possible
result = self.buffer + self.file.read()
self.buffer = ""
result = array_buf.tostring() + self.file.read()
del array_buf[:]
return result
if len(self.buffer) < l: # Try to make buffer as long as l
self.buffer += self.file.read(max(self.bufsize,
l - len(self.buffer)))
actual_size = min(l, len(self.buffer))
result = self.buffer[:actual_size]
self.buffer = self.buffer[actual_size:]
if len(array_buf) < l: # Try to make buffer at least as long as l
array_buf.fromstring(self.file.read(max(self.bufsize, l)))
result = array_buf[:l].tostring()
del array_buf[:l]
return result
def close(self): return self.file.close()
from log import *
......@@ -8,9 +8,9 @@ statistics afterwards.
"""
__no_execute__ = 1
execfile("main.py")
import profile, pstats
profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output")
import sys, rdiff_backup.Main, profile, pstats
profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
"profile-output")
p = pstats.Stats("profile-output")
p.sort_stats('time')
p.print_stats(40)
......
......@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
(not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership
pass
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None
......@@ -425,7 +426,7 @@ class RPath(RORPath):
if base is not None: self.path = "/".join((base,) + index)
self.file = None
if data or base is None: self.data = data
else: self.setdata()
else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
......@@ -448,6 +449,10 @@ class RPath(RORPath):
self.path = "/".join((self.base,) + self.index)
def setdata(self):
"""Set data dictionary using C extension"""
self.data = self.conn.C.make_file_dict(self.path)
def setdata_old(self):
"""Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None:
......
......@@ -94,7 +94,9 @@ class Select:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
elif self.quoting_on:
self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents
......@@ -102,6 +104,52 @@ class Select:
self.__iter__ = lambda: self
return self
def Iterate_fast(self, dsrpath, sel_func):
"""Like Iterate, but don't recur, saving time
This is a bit harder to read than Iterate/iterate_in_dir, but
it should be faster because it only recurs to half as much
depth. It doesn't handle the quoting case.
"""
def error_handler(exc, filename):
Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
return None
def diryield(dsrpath):
s = sel_func(dsrpath)
if s == 0: return
elif s == 1:
yield dsrpath
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp): yield dsrp
elif sel_func(new_dsrp) == 1: yield new_dsrp
elif s == 2:
yielded_something = None
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp):
if not yielded_something:
yielded_something = 1
yield dsrpath
yield dsrp
elif sel_func(new_dsrp) == 1:
if not yielded_something:
yielded_something = 1
yield dsrpath
yield new_dsrp
if dsrpath.isdir():
for dsrp in diryield(dsrpath): yield dsrp
elif sel_func(dsrpath) == 1: yield dsrpath
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
......
import re, os
# The current version of rdiff-backup
version = "0.8.0"
version = "0.9.0"
# If this is set, use this value in seconds as the current time
# instead of reading it from the clock.
......
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <Python.h>
#include <errno.h>
static PyObject *c_make_file_dict(self, args)
PyObject *self;
PyObject *args;
{
char *filename, filetype[5];
struct stat sbuf;
mode_t mode;
if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
if (lstat(filename, &sbuf) != 0) {
if (errno == ENOENT || errno == ENOTDIR)
return Py_BuildValue("{s:s}", "type", NULL);
else {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
mode = sbuf.st_mode;
/* Build return dictionary from stat struct */
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
/* Regular files, directories, sockets, and fifos */
if S_ISREG(mode) strcpy(filetype, "reg");
else if S_ISDIR(mode) strcpy(filetype, "dir");
else if S_ISSOCK(mode) strcpy(filetype, "sock");
else strcpy(filetype, "fifo");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
"type", filetype,
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else if S_ISLNK(mode) {
/* Symbolic links */
char linkname[1024];
int len_link = readlink(filename, linkname, 1023);
if (len_link < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
linkname[len_link] = '\0';
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
"type", "sym",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"linkname", linkname);
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
/* Device files */
char devtype[2];
int devnums = (int)sbuf.st_rdev;
if S_ISCHR(mode) strcpy(devtype, "c");
else strcpy(devtype, "b");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
"type", "dev",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"devnums", Py_BuildValue("(s,i,i)", devtype,
devnums >> 8,
devnums & 0xff),
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else {
/* Unrecognized file type - pretend it isn't there */
errno = ENOENT;
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
static PyObject *long2str(self, args)
PyObject *self;
PyObject *args;
{
unsigned char s[7];
int sindex;
unsigned long long int l;
PyObject *pylong;
if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
l = PyLong_AsUnsignedLongLong(pylong);
for(sindex = 0; sindex <= 6; sindex++) {
s[sindex] = l % 256;
l /= 256;
}
return Py_BuildValue("s#", s, 7);
}
static PyObject *str2long(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *s;
unsigned long long int l = 0;
int sindex, ssize;
if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
if (ssize != 7) return Py_BuildValue("i", -1);
for(sindex=6; sindex >= 0; sindex--)
l = l*256 + s[sindex];
return PyLong_FromLongLong(l);
}
static PyMethodDef CMethods[] = {
{"make_file_dict", c_make_file_dict, METH_VARARGS,
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS,
"Convert long int to 7 byte string"},
{"str2long", str2long, METH_VARARGS,
"Convert 7 byte string to long int"},
{NULL, NULL, 0, NULL}
};
void initC(void)
{
(void) Py_InitModule("C", CMethods);
}
from __future__ import generators
import types, os, tempfile, cPickle, shutil, traceback
import types, os, tempfile, cPickle, shutil, traceback, pickle
#######################################################################
#
......@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe"""
self._write("o", cPickle.dumps(obj, 1), req_num)
# for some reason there is an error when cPickle is used below..
self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe"""
......@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe"""
self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data)))
self.outpipe.write(headerchar + chr(req_num) +
C.long2str(long(len(data))))
self.outpipe.write(data)
self.outpipe.flush()
......@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
"""Read length bytes from inpipe, returning result"""
return self.inpipe.read(length)
def _s2l(self, s):
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
for i in range(7): l = l*256 + ord(s[i])
return l
def _l2s(self, l):
def _l2s_old(self, l):
"""Convert long int to string"""
s = ""
for i in range(7):
......@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
try:
format_string, req_num, length = (header_string[0],
ord(header_string[1]),
self._s2l(header_string[2:]))
C.str2long(header_string[2:]))
except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal")
......@@ -490,7 +492,7 @@ class VirtualFile:
# everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities.
import Globals, Time, Rdiff, Hardlink, FilenameMapping
import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
from static import *
from lazy import *
from log import *
......
......@@ -31,7 +31,7 @@ class DSRPath(RPath):
newmtime - holds the new mtime
"""
def __init__(self, source, *args):
def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP
Source should be true iff the DSRPath is taken from the
......@@ -42,10 +42,11 @@ class DSRPath(RPath):
otherwise use the same arguments as the RPath initializer.
"""
if len(args) == 1 and isinstance(args[0], RPath):
rp = args[0]
RPath.__init__(self, rp.conn, rp.base, rp.index)
else: RPath.__init__(self, *args)
if base == 0:
assert isinstance(conn_or_rp, RPath)
RPath.__init__(self, conn_or_rp.conn,
conn_or_rp.base, conn_or_rp.index)
else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass":
# "bypass" val is used when unpackaging over connection
......
import cPickle
import Globals
import cPickle, array
import Globals, C
#######################################################################
#
......@@ -13,7 +13,7 @@ class UnwrapFile:
def __init__(self, file):
self.file = file
def _s2l(self, s):
def _s2l_old(self, s):
"""Convert string to long int"""
assert len(s) == 7
l = 0L
......@@ -31,8 +31,9 @@ class UnwrapFile:
"""
header = self.file.read(8)
if not header: return None, None
assert len(header) == 8, "Header is only %d bytes" % len(header)
type, length = header[0], self._s2l(header[1:])
if len(header) != 8:
assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf)
else: return type, buf
......@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
"""
UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf
self.bufferlist = [initial_data]
self.bufferlen = len(initial_data)
self.buffer = initial_data
self.closed = None
def check_consistency(self):
l = len("".join(self.bufferlist))
assert l == self.bufferlen, \
"Length of IVF bufferlist doesn't match (%s, %s)" % \
(l, self.bufferlen)
def read(self, length):
"""Read length bytes from the file, updating buffers as necessary"""
assert not self.closed
if self.iwf.currently_in_file:
while length >= self.bufferlen:
while length >= len(self.buffer):
if not self.addtobuffer(): break
real_len = min(length, self.bufferlen)
combined_buffer = "".join(self.bufferlist)
assert len(combined_buffer) == self.bufferlen, \
(len(combined_buffer), self.bufferlen)
self.bufferlist = [combined_buffer[real_len:]]
self.bufferlen = self.bufferlen - real_len
return combined_buffer[:real_len]
real_len = min(length, len(self.buffer))
return_val = self.buffer[:real_len]
self.buffer = self.buffer[real_len:]
return return_val
def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer"""
......@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
type, data = self._get()
assert type == "c", "Type is %s instead of c" % type
if data:
self.bufferlen = self.bufferlen + len(data)
self.bufferlist.append(data)
self.buffer += data
return 1
else:
self.iwf.currently_in_file = None
......@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
"""Currently just reads whats left and discards it"""
while self.iwf.currently_in_file:
self.addtobuffer()
self.bufferlist = []
self.bufferlen = 0
self.buffer = ""
self.closed = 1
......@@ -145,45 +135,43 @@ class FileWrappingIter:
def __init__(self, iter):
"""Initialize with iter"""
self.iter = iter
self.bufferlist = []
self.bufferlen = 0L
self.array_buf = array.array('c')
self.currently_in_file = None
self.closed = None
def read(self, length):
"""Return next length bytes in file"""
assert not self.closed
while self.bufferlen < length:
while len(self.array_buf) < length:
if not self.addtobuffer(): break
combined_buffer = "".join(self.bufferlist)
assert len(combined_buffer) == self.bufferlen
real_len = min(self.bufferlen, length)
self.bufferlen = self.bufferlen - real_len
self.bufferlist = [combined_buffer[real_len:]]
return combined_buffer[:real_len]
result = self.array_buf[:length].tostring()
del self.array_buf[:length]
return result
def addtobuffer(self):
"""Updates self.bufferlist and self.bufferlen, adding on a chunk
"""Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator,
otherwise return true.
"""
array_buf = self.array_buf
if self.currently_in_file:
buf = "c" + self.addfromfile()
array_buf.fromstring("c")
array_buf.fromstring(self.addfromfile())
else:
try: currentobj = self.iter.next()
except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj
buf = "f" + self.addfromfile()
array_buf.fromstring("f")
array_buf.fromstring(self.addfromfile())
else:
pickle = cPickle.dumps(currentobj, 1)
buf = "o" + self._l2s(len(pickle)) + pickle
self.bufferlist.append(buf)
self.bufferlen = self.bufferlen + len(buf)
array_buf.fromstring("o")
array_buf.fromstring(C.long2str(long(len(pickle))))
array_buf.fromstring(pickle)
return 1
def addfromfile(self):
......@@ -192,9 +180,9 @@ class FileWrappingIter:
if not buf:
assert not self.currently_in_file.close()
self.currently_in_file = None
return self._l2s(len(buf)) + buf
return C.long2str(long(len(buf))) + buf
def _l2s(self, l):
def _l2s_old(self, l):
"""Convert long int to string of 7 characters"""
s = ""
for i in range(7):
......@@ -210,26 +198,28 @@ class BufferedRead:
"""Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent
over a connection.
over a connection. Profiling said that arrays were faster than
strings here.
"""
def __init__(self, file):
self.file = file
self.buffer = ""
self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize
def read(self, l = -1):
array_buf = self.array_buf
if l < 0: # Read as much as possible
result = self.buffer + self.file.read()
self.buffer = ""
result = array_buf.tostring() + self.file.read()
del array_buf[:]
return result
if len(self.buffer) < l: # Try to make buffer as long as l
self.buffer += self.file.read(max(self.bufsize,
l - len(self.buffer)))
actual_size = min(l, len(self.buffer))
result = self.buffer[:actual_size]
self.buffer = self.buffer[actual_size:]
if len(array_buf) < l: # Try to make buffer at least as long as l
array_buf.fromstring(self.file.read(max(self.bufsize, l)))
result = array_buf[:l].tostring()
del array_buf[:l]
return result
def close(self): return self.file.close()
from log import *
......@@ -8,9 +8,9 @@ statistics afterwards.
"""
__no_execute__ = 1
execfile("main.py")
import profile, pstats
profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output")
import sys, rdiff_backup.Main, profile, pstats
profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
"profile-output")
p = pstats.Stats("profile-output")
p.sort_stats('time')
p.print_stats(40)
......
......@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
(not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership
pass
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None
......@@ -425,7 +426,7 @@ class RPath(RORPath):
if base is not None: self.path = "/".join((base,) + index)
self.file = None
if data or base is None: self.data = data
else: self.setdata()
else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
......@@ -448,6 +449,10 @@ class RPath(RORPath):
self.path = "/".join((self.base,) + self.index)
def setdata(self):
"""Set data dictionary using C extension"""
self.data = self.conn.C.make_file_dict(self.path)
def setdata_old(self):
"""Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None:
......
......@@ -94,7 +94,9 @@ class Select:
self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
elif self.quoting_on:
self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents
......@@ -102,6 +104,52 @@ class Select:
self.__iter__ = lambda: self
return self
def Iterate_fast(self, dsrpath, sel_func):
"""Like Iterate, but don't recur, saving time
This is a bit harder to read than Iterate/iterate_in_dir, but
it should be faster because it only recurs to half as much
depth. It doesn't handle the quoting case.
"""
def error_handler(exc, filename):
Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
return None
def diryield(dsrpath):
s = sel_func(dsrpath)
if s == 0: return
elif s == 1:
yield dsrpath
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp): yield dsrp
elif sel_func(new_dsrp) == 1: yield new_dsrp
elif s == 2:
yielded_something = None
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp):
if not yielded_something:
yielded_something = 1
yield dsrpath
yield dsrp
elif sel_func(new_dsrp) == 1:
if not yielded_something:
yielded_something = 1
yield dsrpath
yield new_dsrp
if dsrpath.isdir():
for dsrp in diryield(dsrpath): yield dsrp
elif sel_func(dsrpath) == 1: yield dsrpath
def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment