Commit f2ddb66f authored by ben's avatar ben

Many optimizations - up to 3x speed improvement


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@134 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent bb870818
import re, os import re, os
# The current version of rdiff-backup # The current version of rdiff-backup
version = "0.8.0" version = "0.9.0"
# If this is set, use this value in seconds as the current time # If this is set, use this value in seconds as the current time
# instead of reading it from the clock. # instead of reading it from the clock.
......
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <Python.h>
#include <errno.h>
static PyObject *c_make_file_dict(self, args)
PyObject *self;
PyObject *args;
{
char *filename, filetype[5];
struct stat sbuf;
mode_t mode;
if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
if (lstat(filename, &sbuf) != 0) {
if (errno == ENOENT || errno == ENOTDIR)
return Py_BuildValue("{s:s}", "type", NULL);
else {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
mode = sbuf.st_mode;
/* Build return dictionary from stat struct */
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
/* Regular files, directories, sockets, and fifos */
if S_ISREG(mode) strcpy(filetype, "reg");
else if S_ISDIR(mode) strcpy(filetype, "dir");
else if S_ISSOCK(mode) strcpy(filetype, "sock");
else strcpy(filetype, "fifo");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
"type", filetype,
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else if S_ISLNK(mode) {
/* Symbolic links */
char linkname[1024];
int len_link = readlink(filename, linkname, 1023);
if (len_link < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
linkname[len_link] = '\0';
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
"type", "sym",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"linkname", linkname);
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
/* Device files */
char devtype[2];
int devnums = (int)sbuf.st_rdev;
if S_ISCHR(mode) strcpy(devtype, "c");
else strcpy(devtype, "b");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
"type", "dev",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"devnums", Py_BuildValue("(s,i,i)", devtype,
devnums >> 8,
devnums & 0xff),
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else {
/* Unrecognized file type - pretend it isn't there */
errno = ENOENT;
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
static PyObject *long2str(self, args)
PyObject *self;
PyObject *args;
{
unsigned char s[7];
int sindex;
unsigned long long int l;
PyObject *pylong;
if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
l = PyLong_AsUnsignedLongLong(pylong);
for(sindex = 0; sindex <= 6; sindex++) {
s[sindex] = l % 256;
l /= 256;
}
return Py_BuildValue("s#", s, 7);
}
static PyObject *str2long(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *s;
unsigned long long int l = 0;
int sindex, ssize;
if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
if (ssize != 7) return Py_BuildValue("i", -1);
for(sindex=6; sindex >= 0; sindex--)
l = l*256 + s[sindex];
return PyLong_FromLongLong(l);
}
static PyMethodDef CMethods[] = {
{"make_file_dict", c_make_file_dict, METH_VARARGS,
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS,
"Convert long int to 7 byte string"},
{"str2long", str2long, METH_VARARGS,
"Convert 7 byte string to long int"},
{NULL, NULL, 0, NULL}
};
void initC(void)
{
(void) Py_InitModule("C", CMethods);
}
from __future__ import generators from __future__ import generators
import types, os, tempfile, cPickle, shutil, traceback import types, os, tempfile, cPickle, shutil, traceback, pickle
####################################################################### #######################################################################
# #
...@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection): ...@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
def _putobj(self, obj, req_num): def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe""" """Send a generic python obj down the outpipe"""
self._write("o", cPickle.dumps(obj, 1), req_num) # for some reason there is an error when cPickle is used below..
self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num): def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe""" """Send buffer buf down the outpipe"""
...@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection): ...@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
def _write(self, headerchar, data, req_num): def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe""" """Write header and then data to the pipe"""
self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data))) self.outpipe.write(headerchar + chr(req_num) +
C.long2str(long(len(data))))
self.outpipe.write(data) self.outpipe.write(data)
self.outpipe.flush() self.outpipe.flush()
...@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection): ...@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
"""Read length bytes from inpipe, returning result""" """Read length bytes from inpipe, returning result"""
return self.inpipe.read(length) return self.inpipe.read(length)
def _s2l(self, s): def _s2l_old(self, s):
"""Convert string to long int""" """Convert string to long int"""
assert len(s) == 7 assert len(s) == 7
l = 0L l = 0L
for i in range(7): l = l*256 + ord(s[i]) for i in range(7): l = l*256 + ord(s[i])
return l return l
def _l2s(self, l): def _l2s_old(self, l):
"""Convert long int to string""" """Convert long int to string"""
s = "" s = ""
for i in range(7): for i in range(7):
...@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection): ...@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
try: try:
format_string, req_num, length = (header_string[0], format_string, req_num, length = (header_string[0],
ord(header_string[1]), ord(header_string[1]),
self._s2l(header_string[2:])) C.str2long(header_string[2:]))
except IndexError: raise ConnectionError() except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal") if format_string == "q": raise ConnectionQuit("Received quit signal")
...@@ -490,7 +492,7 @@ class VirtualFile: ...@@ -490,7 +492,7 @@ class VirtualFile:
# everything has to be available here for remote connection's use, but # everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities. # put at bottom to reduce circularities.
import Globals, Time, Rdiff, Hardlink, FilenameMapping import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
from static import * from static import *
from lazy import * from lazy import *
from log import * from log import *
......
...@@ -31,7 +31,7 @@ class DSRPath(RPath): ...@@ -31,7 +31,7 @@ class DSRPath(RPath):
newmtime - holds the new mtime newmtime - holds the new mtime
""" """
def __init__(self, source, *args): def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP """Initialize DSRP
Source should be true iff the DSRPath is taken from the Source should be true iff the DSRPath is taken from the
...@@ -42,10 +42,11 @@ class DSRPath(RPath): ...@@ -42,10 +42,11 @@ class DSRPath(RPath):
otherwise use the same arguments as the RPath initializer. otherwise use the same arguments as the RPath initializer.
""" """
if len(args) == 1 and isinstance(args[0], RPath): if base == 0:
rp = args[0] assert isinstance(conn_or_rp, RPath)
RPath.__init__(self, rp.conn, rp.base, rp.index) RPath.__init__(self, conn_or_rp.conn,
else: RPath.__init__(self, *args) conn_or_rp.base, conn_or_rp.index)
else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass": if source != "bypass":
# "bypass" val is used when unpackaging over connection # "bypass" val is used when unpackaging over connection
......
import cPickle import cPickle, array
import Globals import Globals, C
####################################################################### #######################################################################
# #
...@@ -13,7 +13,7 @@ class UnwrapFile: ...@@ -13,7 +13,7 @@ class UnwrapFile:
def __init__(self, file): def __init__(self, file):
self.file = file self.file = file
def _s2l(self, s): def _s2l_old(self, s):
"""Convert string to long int""" """Convert string to long int"""
assert len(s) == 7 assert len(s) == 7
l = 0L l = 0L
...@@ -31,8 +31,9 @@ class UnwrapFile: ...@@ -31,8 +31,9 @@ class UnwrapFile:
""" """
header = self.file.read(8) header = self.file.read(8)
if not header: return None, None if not header: return None, None
assert len(header) == 8, "Header is only %d bytes" % len(header) if len(header) != 8:
type, length = header[0], self._s2l(header[1:]) assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length) buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf) if type == "o": return type, cPickle.loads(buf)
else: return type, buf else: return type, buf
...@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile): ...@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
""" """
UnwrapFile.__init__(self, iwf.file) UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf self.iwf = iwf
self.bufferlist = [initial_data] self.buffer = initial_data
self.bufferlen = len(initial_data)
self.closed = None self.closed = None
def check_consistency(self):
l = len("".join(self.bufferlist))
assert l == self.bufferlen, \
"Length of IVF bufferlist doesn't match (%s, %s)" % \
(l, self.bufferlen)
def read(self, length): def read(self, length):
"""Read length bytes from the file, updating buffers as necessary"""
assert not self.closed assert not self.closed
if self.iwf.currently_in_file: if self.iwf.currently_in_file:
while length >= self.bufferlen: while length >= len(self.buffer):
if not self.addtobuffer(): break if not self.addtobuffer(): break
real_len = min(length, self.bufferlen) real_len = min(length, len(self.buffer))
combined_buffer = "".join(self.bufferlist) return_val = self.buffer[:real_len]
assert len(combined_buffer) == self.bufferlen, \ self.buffer = self.buffer[real_len:]
(len(combined_buffer), self.bufferlen) return return_val
self.bufferlist = [combined_buffer[real_len:]]
self.bufferlen = self.bufferlen - real_len
return combined_buffer[:real_len]
def addtobuffer(self): def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer""" """Read a chunk from the file and add it to the buffer"""
...@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile): ...@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
type, data = self._get() type, data = self._get()
assert type == "c", "Type is %s instead of c" % type assert type == "c", "Type is %s instead of c" % type
if data: if data:
self.bufferlen = self.bufferlen + len(data) self.buffer += data
self.bufferlist.append(data)
return 1 return 1
else: else:
self.iwf.currently_in_file = None self.iwf.currently_in_file = None
...@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile): ...@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
"""Currently just reads whats left and discards it""" """Currently just reads whats left and discards it"""
while self.iwf.currently_in_file: while self.iwf.currently_in_file:
self.addtobuffer() self.addtobuffer()
self.bufferlist = [] self.buffer = ""
self.bufferlen = 0
self.closed = 1 self.closed = 1
...@@ -145,45 +135,43 @@ class FileWrappingIter: ...@@ -145,45 +135,43 @@ class FileWrappingIter:
def __init__(self, iter): def __init__(self, iter):
"""Initialize with iter""" """Initialize with iter"""
self.iter = iter self.iter = iter
self.bufferlist = [] self.array_buf = array.array('c')
self.bufferlen = 0L
self.currently_in_file = None self.currently_in_file = None
self.closed = None self.closed = None
def read(self, length): def read(self, length):
"""Return next length bytes in file""" """Return next length bytes in file"""
assert not self.closed assert not self.closed
while self.bufferlen < length: while len(self.array_buf) < length:
if not self.addtobuffer(): break if not self.addtobuffer(): break
combined_buffer = "".join(self.bufferlist) result = self.array_buf[:length].tostring()
assert len(combined_buffer) == self.bufferlen del self.array_buf[:length]
real_len = min(self.bufferlen, length) return result
self.bufferlen = self.bufferlen - real_len
self.bufferlist = [combined_buffer[real_len:]]
return combined_buffer[:real_len]
def addtobuffer(self): def addtobuffer(self):
"""Updates self.bufferlist and self.bufferlen, adding on a chunk """Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator, Returns None if we have reached the end of the iterator,
otherwise return true. otherwise return true.
""" """
array_buf = self.array_buf
if self.currently_in_file: if self.currently_in_file:
buf = "c" + self.addfromfile() array_buf.fromstring("c")
array_buf.fromstring(self.addfromfile())
else: else:
try: currentobj = self.iter.next() try: currentobj = self.iter.next()
except StopIteration: return None except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"): if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj self.currently_in_file = currentobj
buf = "f" + self.addfromfile() array_buf.fromstring("f")
array_buf.fromstring(self.addfromfile())
else: else:
pickle = cPickle.dumps(currentobj, 1) pickle = cPickle.dumps(currentobj, 1)
buf = "o" + self._l2s(len(pickle)) + pickle array_buf.fromstring("o")
array_buf.fromstring(C.long2str(long(len(pickle))))
self.bufferlist.append(buf) array_buf.fromstring(pickle)
self.bufferlen = self.bufferlen + len(buf)
return 1 return 1
def addfromfile(self): def addfromfile(self):
...@@ -192,9 +180,9 @@ class FileWrappingIter: ...@@ -192,9 +180,9 @@ class FileWrappingIter:
if not buf: if not buf:
assert not self.currently_in_file.close() assert not self.currently_in_file.close()
self.currently_in_file = None self.currently_in_file = None
return self._l2s(len(buf)) + buf return C.long2str(long(len(buf))) + buf
def _l2s(self, l): def _l2s_old(self, l):
"""Convert long int to string of 7 characters""" """Convert long int to string of 7 characters"""
s = "" s = ""
for i in range(7): for i in range(7):
...@@ -210,26 +198,28 @@ class BufferedRead: ...@@ -210,26 +198,28 @@ class BufferedRead:
"""Buffer the .read() calls to the given file """Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent This is used to lessen overhead and latency when a file is sent
over a connection. over a connection. Profiling said that arrays were faster than
strings here.
""" """
def __init__(self, file): def __init__(self, file):
self.file = file self.file = file
self.buffer = "" self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize self.bufsize = Globals.conn_bufsize
def read(self, l = -1): def read(self, l = -1):
array_buf = self.array_buf
if l < 0: # Read as much as possible if l < 0: # Read as much as possible
result = self.buffer + self.file.read() result = array_buf.tostring() + self.file.read()
self.buffer = "" del array_buf[:]
return result return result
if len(self.buffer) < l: # Try to make buffer as long as l if len(array_buf) < l: # Try to make buffer at least as long as l
self.buffer += self.file.read(max(self.bufsize, array_buf.fromstring(self.file.read(max(self.bufsize, l)))
l - len(self.buffer))) result = array_buf[:l].tostring()
actual_size = min(l, len(self.buffer)) del array_buf[:l]
result = self.buffer[:actual_size]
self.buffer = self.buffer[actual_size:]
return result return result
def close(self): return self.file.close() def close(self): return self.file.close()
from log import *
...@@ -8,9 +8,9 @@ statistics afterwards. ...@@ -8,9 +8,9 @@ statistics afterwards.
""" """
__no_execute__ = 1 __no_execute__ = 1
execfile("main.py") import sys, rdiff_backup.Main, profile, pstats
import profile, pstats profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output") "profile-output")
p = pstats.Stats("profile-output") p = pstats.Stats("profile-output")
p.sort_stats('time') p.sort_stats('time')
p.print_stats(40) p.print_stats(40)
......
...@@ -217,6 +217,7 @@ class RORPath(RPathStatic): ...@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
(not Globals.change_ownership or self.issym())): (not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership # Don't compare gid/uid for symlinks or if not change_ownership
pass pass
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif (not other.data.has_key(key) or elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None self.data[key] != other.data[key]): return None
...@@ -425,7 +426,7 @@ class RPath(RORPath): ...@@ -425,7 +426,7 @@ class RPath(RORPath):
if base is not None: self.path = "/".join((base,) + index) if base is not None: self.path = "/".join((base,) + index)
self.file = None self.file = None
if data or base is None: self.data = data if data or base is None: self.data = data
else: self.setdata() else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self): def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index, return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
...@@ -448,6 +449,10 @@ class RPath(RORPath): ...@@ -448,6 +449,10 @@ class RPath(RORPath):
self.path = "/".join((self.base,) + self.index) self.path = "/".join((self.base,) + self.index)
def setdata(self): def setdata(self):
"""Set data dictionary using C extension"""
self.data = self.conn.C.make_file_dict(self.path)
def setdata_old(self):
"""Create the data dictionary""" """Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path) statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None: if statblock is None:
......
...@@ -94,7 +94,9 @@ class Select: ...@@ -94,7 +94,9 @@ class Select:
self.starting_index = starting_index self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath, self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func) self.iterate_starting_from, sel_func)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) elif self.quoting_on:
self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning # only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents self.iterate_parents = starting_index is not None and iterate_parents
...@@ -102,6 +104,52 @@ class Select: ...@@ -102,6 +104,52 @@ class Select:
self.__iter__ = lambda: self self.__iter__ = lambda: self
return self return self
def Iterate_fast(self, dsrpath, sel_func):
"""Like Iterate, but don't recur, saving time
This is a bit harder to read than Iterate/iterate_in_dir, but
it should be faster because it only recurs to half as much
depth. It doesn't handle the quoting case.
"""
def error_handler(exc, filename):
Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
return None
def diryield(dsrpath):
s = sel_func(dsrpath)
if s == 0: return
elif s == 1:
yield dsrpath
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp): yield dsrp
elif sel_func(new_dsrp) == 1: yield new_dsrp
elif s == 2:
yielded_something = None
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp):
if not yielded_something:
yielded_something = 1
yield dsrpath
yield dsrp
elif sel_func(new_dsrp) == 1:
if not yielded_something:
yielded_something = 1
yield dsrpath
yield new_dsrp
if dsrpath.isdir():
for dsrp in diryield(dsrpath): yield dsrp
elif sel_func(dsrpath) == 1: yield dsrpath
def Iterate(self, dsrpath, rec_func, sel_func): def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath """Return iterator yielding dsrps in dsrpath
......
import re, os import re, os
# The current version of rdiff-backup # The current version of rdiff-backup
version = "0.8.0" version = "0.9.0"
# If this is set, use this value in seconds as the current time # If this is set, use this value in seconds as the current time
# instead of reading it from the clock. # instead of reading it from the clock.
......
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <Python.h>
#include <errno.h>
static PyObject *c_make_file_dict(self, args)
PyObject *self;
PyObject *args;
{
char *filename, filetype[5];
struct stat sbuf;
mode_t mode;
if (!PyArg_ParseTuple(args, "s", &filename)) return NULL;
if (lstat(filename, &sbuf) != 0) {
if (errno == ENOENT || errno == ENOTDIR)
return Py_BuildValue("{s:s}", "type", NULL);
else {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
mode = sbuf.st_mode;
/* Build return dictionary from stat struct */
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISSOCK(mode) || S_ISFIFO(mode)) {
/* Regular files, directories, sockets, and fifos */
if S_ISREG(mode) strcpy(filetype, "reg");
else if S_ISDIR(mode) strcpy(filetype, "dir");
else if S_ISSOCK(mode) strcpy(filetype, "sock");
else strcpy(filetype, "fifo");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:l,s:l}",
"type", filetype,
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else if S_ISLNK(mode) {
/* Symbolic links */
char linkname[1024];
int len_link = readlink(filename, linkname, 1023);
if (len_link < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
linkname[len_link] = '\0';
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:s}",
"type", "sym",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"linkname", linkname);
} else if (S_ISCHR(mode) || S_ISBLK(mode)) {
/* Device files */
char devtype[2];
int devnums = (int)sbuf.st_rdev;
if S_ISCHR(mode) strcpy(devtype, "c");
else strcpy(devtype, "b");
return Py_BuildValue("{s:s,s:l,s:i,s:i,s:i,s:l,s:i,s:i,s:O}",
"type", "dev",
"size", (long int)sbuf.st_size,
"perms", (int)(mode & S_IRWXU),
"uid", (int)sbuf.st_uid,
"gid", (int)sbuf.st_gid,
"inode", (long int)sbuf.st_ino,
"devloc", (int)sbuf.st_dev,
"nlink", (int)sbuf.st_nlink,
"devnums", Py_BuildValue("(s,i,i)", devtype,
devnums >> 8,
devnums & 0xff),
"mtime", (long int)sbuf.st_mtime,
"atime", (long int)sbuf.st_atime);
} else {
/* Unrecognized file type - pretend it isn't there */
errno = ENOENT;
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}
}
static PyObject *long2str(self, args)
PyObject *self;
PyObject *args;
{
unsigned char s[7];
int sindex;
unsigned long long int l;
PyObject *pylong;
if (!PyArg_ParseTuple(args, "O", &pylong)) return NULL;
l = PyLong_AsUnsignedLongLong(pylong);
for(sindex = 0; sindex <= 6; sindex++) {
s[sindex] = l % 256;
l /= 256;
}
return Py_BuildValue("s#", s, 7);
}
static PyObject *str2long(self, args)
PyObject *self;
PyObject *args;
{
unsigned char *s;
unsigned long long int l = 0;
int sindex, ssize;
if (!PyArg_ParseTuple(args, "s#", &s, &ssize)) return NULL;
if (ssize != 7) return Py_BuildValue("i", -1);
for(sindex=6; sindex >= 0; sindex--)
l = l*256 + s[sindex];
return PyLong_FromLongLong(l);
}
static PyMethodDef CMethods[] = {
{"make_file_dict", c_make_file_dict, METH_VARARGS,
"Make dictionary from file stat"},
{"long2str", long2str, METH_VARARGS,
"Convert long int to 7 byte string"},
{"str2long", str2long, METH_VARARGS,
"Convert 7 byte string to long int"},
{NULL, NULL, 0, NULL}
};
void initC(void)
{
(void) Py_InitModule("C", CMethods);
}
from __future__ import generators from __future__ import generators
import types, os, tempfile, cPickle, shutil, traceback import types, os, tempfile, cPickle, shutil, traceback, pickle
####################################################################### #######################################################################
# #
...@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection): ...@@ -119,7 +119,8 @@ class LowLevelPipeConnection(Connection):
def _putobj(self, obj, req_num): def _putobj(self, obj, req_num):
"""Send a generic python obj down the outpipe""" """Send a generic python obj down the outpipe"""
self._write("o", cPickle.dumps(obj, 1), req_num) # for some reason there is an error when cPickle is used below..
self._write("o", pickle.dumps(obj, 1), req_num)
def _putbuf(self, buf, req_num): def _putbuf(self, buf, req_num):
"""Send buffer buf down the outpipe""" """Send buffer buf down the outpipe"""
...@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection): ...@@ -181,7 +182,8 @@ class LowLevelPipeConnection(Connection):
def _write(self, headerchar, data, req_num): def _write(self, headerchar, data, req_num):
"""Write header and then data to the pipe""" """Write header and then data to the pipe"""
self.outpipe.write(headerchar + chr(req_num) + self._l2s(len(data))) self.outpipe.write(headerchar + chr(req_num) +
C.long2str(long(len(data))))
self.outpipe.write(data) self.outpipe.write(data)
self.outpipe.flush() self.outpipe.flush()
...@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection): ...@@ -189,14 +191,14 @@ class LowLevelPipeConnection(Connection):
"""Read length bytes from inpipe, returning result""" """Read length bytes from inpipe, returning result"""
return self.inpipe.read(length) return self.inpipe.read(length)
def _s2l(self, s): def _s2l_old(self, s):
"""Convert string to long int""" """Convert string to long int"""
assert len(s) == 7 assert len(s) == 7
l = 0L l = 0L
for i in range(7): l = l*256 + ord(s[i]) for i in range(7): l = l*256 + ord(s[i])
return l return l
def _l2s(self, l): def _l2s_old(self, l):
"""Convert long int to string""" """Convert long int to string"""
s = "" s = ""
for i in range(7): for i in range(7):
...@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection): ...@@ -214,7 +216,7 @@ class LowLevelPipeConnection(Connection):
try: try:
format_string, req_num, length = (header_string[0], format_string, req_num, length = (header_string[0],
ord(header_string[1]), ord(header_string[1]),
self._s2l(header_string[2:])) C.str2long(header_string[2:]))
except IndexError: raise ConnectionError() except IndexError: raise ConnectionError()
if format_string == "q": raise ConnectionQuit("Received quit signal") if format_string == "q": raise ConnectionQuit("Received quit signal")
...@@ -490,7 +492,7 @@ class VirtualFile: ...@@ -490,7 +492,7 @@ class VirtualFile:
# everything has to be available here for remote connection's use, but # everything has to be available here for remote connection's use, but
# put at bottom to reduce circularities. # put at bottom to reduce circularities.
import Globals, Time, Rdiff, Hardlink, FilenameMapping import Globals, Time, Rdiff, Hardlink, FilenameMapping, C
from static import * from static import *
from lazy import * from lazy import *
from log import * from log import *
......
...@@ -31,7 +31,7 @@ class DSRPath(RPath): ...@@ -31,7 +31,7 @@ class DSRPath(RPath):
newmtime - holds the new mtime newmtime - holds the new mtime
""" """
def __init__(self, source, *args): def __init__(self, source, conn_or_rp, base = 0, index = ()):
"""Initialize DSRP """Initialize DSRP
Source should be true iff the DSRPath is taken from the Source should be true iff the DSRPath is taken from the
...@@ -42,10 +42,11 @@ class DSRPath(RPath): ...@@ -42,10 +42,11 @@ class DSRPath(RPath):
otherwise use the same arguments as the RPath initializer. otherwise use the same arguments as the RPath initializer.
""" """
if len(args) == 1 and isinstance(args[0], RPath): if base == 0:
rp = args[0] assert isinstance(conn_or_rp, RPath)
RPath.__init__(self, rp.conn, rp.base, rp.index) RPath.__init__(self, conn_or_rp.conn,
else: RPath.__init__(self, *args) conn_or_rp.base, conn_or_rp.index)
else: RPath.__init__(self, conn_or_rp, base, index)
if source != "bypass": if source != "bypass":
# "bypass" val is used when unpackaging over connection # "bypass" val is used when unpackaging over connection
......
import cPickle import cPickle, array
import Globals import Globals, C
####################################################################### #######################################################################
# #
...@@ -13,7 +13,7 @@ class UnwrapFile: ...@@ -13,7 +13,7 @@ class UnwrapFile:
def __init__(self, file): def __init__(self, file):
self.file = file self.file = file
def _s2l(self, s): def _s2l_old(self, s):
"""Convert string to long int""" """Convert string to long int"""
assert len(s) == 7 assert len(s) == 7
l = 0L l = 0L
...@@ -31,8 +31,9 @@ class UnwrapFile: ...@@ -31,8 +31,9 @@ class UnwrapFile:
""" """
header = self.file.read(8) header = self.file.read(8)
if not header: return None, None if not header: return None, None
assert len(header) == 8, "Header is only %d bytes" % len(header) if len(header) != 8:
type, length = header[0], self._s2l(header[1:]) assert None, "Header %s is only %d bytes" % (header, len(header))
type, length = header[0], C.str2long(header[1:])
buf = self.file.read(length) buf = self.file.read(length)
if type == "o": return type, cPickle.loads(buf) if type == "o": return type, cPickle.loads(buf)
else: return type, buf else: return type, buf
...@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile): ...@@ -82,29 +83,20 @@ class IterVirtualFile(UnwrapFile):
""" """
UnwrapFile.__init__(self, iwf.file) UnwrapFile.__init__(self, iwf.file)
self.iwf = iwf self.iwf = iwf
self.bufferlist = [initial_data] self.buffer = initial_data
self.bufferlen = len(initial_data)
self.closed = None self.closed = None
def check_consistency(self):
l = len("".join(self.bufferlist))
assert l == self.bufferlen, \
"Length of IVF bufferlist doesn't match (%s, %s)" % \
(l, self.bufferlen)
def read(self, length): def read(self, length):
"""Read length bytes from the file, updating buffers as necessary"""
assert not self.closed assert not self.closed
if self.iwf.currently_in_file: if self.iwf.currently_in_file:
while length >= self.bufferlen: while length >= len(self.buffer):
if not self.addtobuffer(): break if not self.addtobuffer(): break
real_len = min(length, self.bufferlen) real_len = min(length, len(self.buffer))
combined_buffer = "".join(self.bufferlist) return_val = self.buffer[:real_len]
assert len(combined_buffer) == self.bufferlen, \ self.buffer = self.buffer[real_len:]
(len(combined_buffer), self.bufferlen) return return_val
self.bufferlist = [combined_buffer[real_len:]]
self.bufferlen = self.bufferlen - real_len
return combined_buffer[:real_len]
def addtobuffer(self): def addtobuffer(self):
"""Read a chunk from the file and add it to the buffer""" """Read a chunk from the file and add it to the buffer"""
...@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile): ...@@ -112,8 +104,7 @@ class IterVirtualFile(UnwrapFile):
type, data = self._get() type, data = self._get()
assert type == "c", "Type is %s instead of c" % type assert type == "c", "Type is %s instead of c" % type
if data: if data:
self.bufferlen = self.bufferlen + len(data) self.buffer += data
self.bufferlist.append(data)
return 1 return 1
else: else:
self.iwf.currently_in_file = None self.iwf.currently_in_file = None
...@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile): ...@@ -123,8 +114,7 @@ class IterVirtualFile(UnwrapFile):
"""Currently just reads whats left and discards it""" """Currently just reads whats left and discards it"""
while self.iwf.currently_in_file: while self.iwf.currently_in_file:
self.addtobuffer() self.addtobuffer()
self.bufferlist = [] self.buffer = ""
self.bufferlen = 0
self.closed = 1 self.closed = 1
...@@ -145,45 +135,43 @@ class FileWrappingIter: ...@@ -145,45 +135,43 @@ class FileWrappingIter:
def __init__(self, iter): def __init__(self, iter):
"""Initialize with iter""" """Initialize with iter"""
self.iter = iter self.iter = iter
self.bufferlist = [] self.array_buf = array.array('c')
self.bufferlen = 0L
self.currently_in_file = None self.currently_in_file = None
self.closed = None self.closed = None
def read(self, length): def read(self, length):
"""Return next length bytes in file""" """Return next length bytes in file"""
assert not self.closed assert not self.closed
while self.bufferlen < length: while len(self.array_buf) < length:
if not self.addtobuffer(): break if not self.addtobuffer(): break
combined_buffer = "".join(self.bufferlist) result = self.array_buf[:length].tostring()
assert len(combined_buffer) == self.bufferlen del self.array_buf[:length]
real_len = min(self.bufferlen, length) return result
self.bufferlen = self.bufferlen - real_len
self.bufferlist = [combined_buffer[real_len:]]
return combined_buffer[:real_len]
def addtobuffer(self): def addtobuffer(self):
"""Updates self.bufferlist and self.bufferlen, adding on a chunk """Updates self.buffer, adding a chunk from the iterator.
Returns None if we have reached the end of the iterator, Returns None if we have reached the end of the iterator,
otherwise return true. otherwise return true.
""" """
array_buf = self.array_buf
if self.currently_in_file: if self.currently_in_file:
buf = "c" + self.addfromfile() array_buf.fromstring("c")
array_buf.fromstring(self.addfromfile())
else: else:
try: currentobj = self.iter.next() try: currentobj = self.iter.next()
except StopIteration: return None except StopIteration: return None
if hasattr(currentobj, "read") and hasattr(currentobj, "close"): if hasattr(currentobj, "read") and hasattr(currentobj, "close"):
self.currently_in_file = currentobj self.currently_in_file = currentobj
buf = "f" + self.addfromfile() array_buf.fromstring("f")
array_buf.fromstring(self.addfromfile())
else: else:
pickle = cPickle.dumps(currentobj, 1) pickle = cPickle.dumps(currentobj, 1)
buf = "o" + self._l2s(len(pickle)) + pickle array_buf.fromstring("o")
array_buf.fromstring(C.long2str(long(len(pickle))))
self.bufferlist.append(buf) array_buf.fromstring(pickle)
self.bufferlen = self.bufferlen + len(buf)
return 1 return 1
def addfromfile(self): def addfromfile(self):
...@@ -192,9 +180,9 @@ class FileWrappingIter: ...@@ -192,9 +180,9 @@ class FileWrappingIter:
if not buf: if not buf:
assert not self.currently_in_file.close() assert not self.currently_in_file.close()
self.currently_in_file = None self.currently_in_file = None
return self._l2s(len(buf)) + buf return C.long2str(long(len(buf))) + buf
def _l2s(self, l): def _l2s_old(self, l):
"""Convert long int to string of 7 characters""" """Convert long int to string of 7 characters"""
s = "" s = ""
for i in range(7): for i in range(7):
...@@ -210,26 +198,28 @@ class BufferedRead: ...@@ -210,26 +198,28 @@ class BufferedRead:
"""Buffer the .read() calls to the given file """Buffer the .read() calls to the given file
This is used to lessen overhead and latency when a file is sent This is used to lessen overhead and latency when a file is sent
over a connection. over a connection. Profiling said that arrays were faster than
strings here.
""" """
def __init__(self, file): def __init__(self, file):
self.file = file self.file = file
self.buffer = "" self.array_buf = array.array('c')
self.bufsize = Globals.conn_bufsize self.bufsize = Globals.conn_bufsize
def read(self, l = -1): def read(self, l = -1):
array_buf = self.array_buf
if l < 0: # Read as much as possible if l < 0: # Read as much as possible
result = self.buffer + self.file.read() result = array_buf.tostring() + self.file.read()
self.buffer = "" del array_buf[:]
return result return result
if len(self.buffer) < l: # Try to make buffer as long as l if len(array_buf) < l: # Try to make buffer at least as long as l
self.buffer += self.file.read(max(self.bufsize, array_buf.fromstring(self.file.read(max(self.bufsize, l)))
l - len(self.buffer))) result = array_buf[:l].tostring()
actual_size = min(l, len(self.buffer)) del array_buf[:l]
result = self.buffer[:actual_size]
self.buffer = self.buffer[actual_size:]
return result return result
def close(self): return self.file.close() def close(self): return self.file.close()
from log import *
...@@ -8,9 +8,9 @@ statistics afterwards. ...@@ -8,9 +8,9 @@ statistics afterwards.
""" """
__no_execute__ = 1 __no_execute__ = 1
execfile("main.py") import sys, rdiff_backup.Main, profile, pstats
import profile, pstats profile.run("rdiff_backup.Main.Main(%s)" % repr(sys.argv[1:]),
profile.run("Globals.Main.Main(%s)" % repr(sys.argv[1:]), "profile-output") "profile-output")
p = pstats.Stats("profile-output") p = pstats.Stats("profile-output")
p.sort_stats('time') p.sort_stats('time')
p.print_stats(40) p.print_stats(40)
......
...@@ -217,6 +217,7 @@ class RORPath(RPathStatic): ...@@ -217,6 +217,7 @@ class RORPath(RPathStatic):
(not Globals.change_ownership or self.issym())): (not Globals.change_ownership or self.issym())):
# Don't compare gid/uid for symlinks or if not change_ownership # Don't compare gid/uid for symlinks or if not change_ownership
pass pass
elif key == 'atime' and not Globals.preserve_atime: pass
elif key == 'devloc' or key == 'inode' or key == 'nlink': pass elif key == 'devloc' or key == 'inode' or key == 'nlink': pass
elif (not other.data.has_key(key) or elif (not other.data.has_key(key) or
self.data[key] != other.data[key]): return None self.data[key] != other.data[key]): return None
...@@ -425,7 +426,7 @@ class RPath(RORPath): ...@@ -425,7 +426,7 @@ class RPath(RORPath):
if base is not None: self.path = "/".join((base,) + index) if base is not None: self.path = "/".join((base,) + index)
self.file = None self.file = None
if data or base is None: self.data = data if data or base is None: self.data = data
else: self.setdata() else: self.data = self.conn.C.make_file_dict(self.path)
def __str__(self): def __str__(self):
return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index, return "Path: %s\nIndex: %s\nData: %s" % (self.path, self.index,
...@@ -448,6 +449,10 @@ class RPath(RORPath): ...@@ -448,6 +449,10 @@ class RPath(RORPath):
self.path = "/".join((self.base,) + self.index) self.path = "/".join((self.base,) + self.index)
def setdata(self): def setdata(self):
"""Set data dictionary using C extension"""
self.data = self.conn.C.make_file_dict(self.path)
def setdata_old(self):
"""Create the data dictionary""" """Create the data dictionary"""
statblock = self.conn.RPathStatic.tupled_lstat(self.path) statblock = self.conn.RPathStatic.tupled_lstat(self.path)
if statblock is None: if statblock is None:
......
...@@ -94,7 +94,9 @@ class Select: ...@@ -94,7 +94,9 @@ class Select:
self.starting_index = starting_index self.starting_index = starting_index
self.iter = self.iterate_starting_from(self.dsrpath, self.iter = self.iterate_starting_from(self.dsrpath,
self.iterate_starting_from, sel_func) self.iterate_starting_from, sel_func)
else: self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func) elif self.quoting_on:
self.iter = self.Iterate(self.dsrpath, self.Iterate, sel_func)
else: self.iter = self.Iterate_fast(self.dsrpath, sel_func)
# only iterate parents if we are not starting from beginning # only iterate parents if we are not starting from beginning
self.iterate_parents = starting_index is not None and iterate_parents self.iterate_parents = starting_index is not None and iterate_parents
...@@ -102,6 +104,52 @@ class Select: ...@@ -102,6 +104,52 @@ class Select:
self.__iter__ = lambda: self self.__iter__ = lambda: self
return self return self
def Iterate_fast(self, dsrpath, sel_func):
"""Like Iterate, but don't recur, saving time
This is a bit harder to read than Iterate/iterate_in_dir, but
it should be faster because it only recurs to half as much
depth. It doesn't handle the quoting case.
"""
def error_handler(exc, filename):
Log("Error initializing file %s/%s" % (dsrpath.path, filename), 2)
return None
def diryield(dsrpath):
s = sel_func(dsrpath)
if s == 0: return
elif s == 1:
yield dsrpath
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp): yield dsrp
elif sel_func(new_dsrp) == 1: yield new_dsrp
elif s == 2:
yielded_something = None
for filename in Robust.listrp(dsrpath):
new_dsrp = Robust.check_common_error(error_handler,
dsrpath.append, [filename])
if new_dsrp:
if new_dsrp.isdir():
for dsrp in diryield(new_dsrp):
if not yielded_something:
yielded_something = 1
yield dsrpath
yield dsrp
elif sel_func(new_dsrp) == 1:
if not yielded_something:
yielded_something = 1
yield dsrpath
yield new_dsrp
if dsrpath.isdir():
for dsrp in diryield(dsrpath): yield dsrp
elif sel_func(dsrpath) == 1: yield dsrpath
def Iterate(self, dsrpath, rec_func, sel_func): def Iterate(self, dsrpath, rec_func, sel_func):
"""Return iterator yielding dsrps in dsrpath """Return iterator yielding dsrps in dsrpath
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment