Commit 52b9d2c2 authored by joshn's avatar joshn

Use Unicode for paths internally to add support for Unicode on Windows.


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@1053 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent a7274518
New in v1.3.4 (????/??/??) New in v1.3.4 (????/??/??)
--------------------------- ---------------------------
Start using Unicode internally for filenames. This fixes Unicode support
on Windows (Josh Nisly)
Don't print "Fatal Error" if --check-destination-dir completed successfully. Don't print "Fatal Error" if --check-destination-dir completed successfully.
Thanks to Serge Zub for the suggestion. (Andrew Ferguson) Thanks to Serge Zub for the suggestion. (Andrew Ferguson)
......
...@@ -158,7 +158,10 @@ class QuotedRPath(rpath.RPath): ...@@ -158,7 +158,10 @@ class QuotedRPath(rpath.RPath):
correctly and append()ed to the currect QuotedRPath. correctly and append()ed to the currect QuotedRPath.
""" """
return map(unquote, self.conn.os.listdir(self.path)) path = self.path
if type(path) != unicode:
path = unicode(path, 'utf-8')
return map(unquote, self.conn.os.listdir(path))
def __str__(self): def __str__(self):
return "QuotedPath: %s\nIndex: %s\nData: %s" % \ return "QuotedPath: %s\nIndex: %s\nData: %s" % \
......
...@@ -57,7 +57,8 @@ class ExtendedAttributes: ...@@ -57,7 +57,8 @@ class ExtendedAttributes:
def read_from_rp(self, rp): def read_from_rp(self, rp):
"""Set the extended attributes from an rpath""" """Set the extended attributes from an rpath"""
try: try:
attr_list = rp.conn.xattr.listxattr(rp.path, rp.issym()) attr_list = rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
rp.issym())
except IOError, exc: except IOError, exc:
if exc[0] in (errno.EOPNOTSUPP, errno.EPERM, errno.ETXTBSY): if exc[0] in (errno.EOPNOTSUPP, errno.EPERM, errno.ETXTBSY):
return # if not supported, consider empty return # if not supported, consider empty
...@@ -74,7 +75,8 @@ class ExtendedAttributes: ...@@ -74,7 +75,8 @@ class ExtendedAttributes:
continue continue
try: try:
self.attr_dict[attr] = \ self.attr_dict[attr] = \
rp.conn.xattr.getxattr(rp.path, attr, rp.issym()) rp.conn.xattr.getxattr(rp.path.encode('utf-8'),
attr, rp.issym())
except IOError, exc: except IOError, exc:
# File probably modified while reading, just continue # File probably modified while reading, just continue
if exc[0] == errno.ENODATA: continue if exc[0] == errno.ENODATA: continue
...@@ -86,9 +88,11 @@ class ExtendedAttributes: ...@@ -86,9 +88,11 @@ class ExtendedAttributes:
def clear_rp(self, rp): def clear_rp(self, rp):
"""Delete all the extended attributes in rpath""" """Delete all the extended attributes in rpath"""
try: try:
for name in rp.conn.xattr.listxattr(rp.path, rp.issym()): for name in rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
rp.issym()):
try: try:
rp.conn.xattr.removexattr(rp.path, name, rp.issym()) rp.conn.xattr.removexattr(rp.path.encode('utf-8'),
name, rp.issym())
except IOError, exc: except IOError, exc:
# SELinux attributes cannot be removed, and we don't want # SELinux attributes cannot be removed, and we don't want
# to bail out or be too noisy at low log levels. # to bail out or be too noisy at low log levels.
...@@ -111,7 +115,8 @@ class ExtendedAttributes: ...@@ -111,7 +115,8 @@ class ExtendedAttributes:
self.clear_rp(rp) self.clear_rp(rp)
for (name, value) in self.attr_dict.iteritems(): for (name, value) in self.attr_dict.iteritems():
try: try:
rp.conn.xattr.setxattr(rp.path, name, value, 0, rp.issym()) rp.conn.xattr.setxattr(rp.path.encode('utf-8'), name,
value, 0, rp.issym())
except IOError, exc: except IOError, exc:
# Mac and Linux attributes have different namespaces, so # Mac and Linux attributes have different namespaces, so
# fail gracefully if can't call setxattr # fail gracefully if can't call setxattr
...@@ -149,13 +154,14 @@ def ea_compare_rps(rp1, rp2): ...@@ -149,13 +154,14 @@ def ea_compare_rps(rp1, rp2):
def EA2Record(ea): def EA2Record(ea):
"""Convert ExtendedAttributes object to text record""" """Convert ExtendedAttributes object to text record"""
str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath())] str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath().encode('utf-8'))]
for (name, val) in ea.attr_dict.iteritems(): for (name, val) in ea.attr_dict.iteritems():
if not val: str_list.append(name) if not val: str_list.append(name)
else: else:
encoded_val = base64.encodestring(val).replace('\n', '') encoded_val = base64.encodestring(val).replace('\n', '')
try: try:
str_list.append('%s=0s%s' % (C.acl_quote(name), encoded_val)) str_list.append('%s=0s%s' % (C.acl_quote(name.encode('utf-8')),
encoded_val))
except UnicodeEncodeError: except UnicodeEncodeError:
log.Log("Warning: unable to store Unicode extended attribute %s" log.Log("Warning: unable to store Unicode extended attribute %s"
% repr(name), 3) % repr(name), 3)
...@@ -169,7 +175,8 @@ def Record2EA(record): ...@@ -169,7 +175,8 @@ def Record2EA(record):
raise metadata.ParsingError("Bad record beginning: " + first[:8]) raise metadata.ParsingError("Bad record beginning: " + first[:8])
filename = first[8:] filename = first[8:]
if filename == '.': index = () if filename == '.': index = ()
else: index = tuple(C.acl_unquote(filename).split('/')) else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
'utf-8').split('/'))
ea = ExtendedAttributes(index) ea = ExtendedAttributes(index)
for line in lines: for line in lines:
...@@ -194,7 +201,7 @@ class EAExtractor(metadata.FlatExtractor): ...@@ -194,7 +201,7 @@ class EAExtractor(metadata.FlatExtractor):
def filename_to_index(self, filename): def filename_to_index(self, filename):
"""Convert possibly quoted filename to index tuple""" """Convert possibly quoted filename to index tuple"""
if filename == '.': return () if filename == '.': return ()
else: return tuple(C.acl_unquote(filename).split('/')) else: return tuple(C.acl_unquote(filename.encode('utf-8')).split('/'))
class ExtendedAttributesFile(metadata.FlatFile): class ExtendedAttributesFile(metadata.FlatFile):
"""Store/retrieve EAs from extended_attributes file""" """Store/retrieve EAs from extended_attributes file"""
...@@ -379,7 +386,7 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None, ...@@ -379,7 +386,7 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None,
else: acl = posix1e.ACL() else: acl = posix1e.ACL()
try: try:
acl.applyto(rp.path) acl.applyto(rp.path.encode('utf-8'))
except IOError, exc: except IOError, exc:
if exc[0] == errno.EOPNOTSUPP: if exc[0] == errno.EOPNOTSUPP:
log.Log("Warning: unable to set ACL on %s: %s" % log.Log("Warning: unable to set ACL on %s: %s" %
...@@ -391,12 +398,12 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None, ...@@ -391,12 +398,12 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None,
if default_entry_list: if default_entry_list:
def_acl = list_to_acl(default_entry_list, map_names) def_acl = list_to_acl(default_entry_list, map_names)
else: def_acl = posix1e.ACL() else: def_acl = posix1e.ACL()
def_acl.applyto(rp.path, posix1e.ACL_TYPE_DEFAULT) def_acl.applyto(rp.path.encode('utf-8'), posix1e.ACL_TYPE_DEFAULT)
def get_acl_lists_from_rp(rp): def get_acl_lists_from_rp(rp):
"""Returns (acl_list, def_acl_list) from an rpath. Call locally""" """Returns (acl_list, def_acl_list) from an rpath. Call locally"""
assert rp.conn is Globals.local_connection assert rp.conn is Globals.local_connection
try: acl = posix1e.ACL(file=rp.path) try: acl = posix1e.ACL(file=rp.path.encode('utf-8'))
except IOError, exc: except IOError, exc:
if exc[0] == errno.EOPNOTSUPP: if exc[0] == errno.EOPNOTSUPP:
acl = None acl = None
...@@ -406,7 +413,7 @@ def get_acl_lists_from_rp(rp): ...@@ -406,7 +413,7 @@ def get_acl_lists_from_rp(rp):
acl = None acl = None
else: raise else: raise
if rp.isdir(): if rp.isdir():
try: def_acl = posix1e.ACL(filedef=rp.path) try: def_acl = posix1e.ACL(filedef=rp.path.encode('utf-8'))
except IOError, exc: except IOError, exc:
if exc[0] == errno.EOPNOTSUPP: if exc[0] == errno.EOPNOTSUPP:
def_acl = None def_acl = None
...@@ -533,7 +540,8 @@ def acl_compare_rps(rp1, rp2): ...@@ -533,7 +540,8 @@ def acl_compare_rps(rp1, rp2):
def ACL2Record(acl): def ACL2Record(acl):
"""Convert an AccessControlLists object into a text record""" """Convert an AccessControlLists object into a text record"""
return '# file: %s\n%s\n' % (C.acl_quote(acl.get_indexpath()), str(acl)) return '# file: %s\n%s\n' % \
(C.acl_quote(acl.get_indexpath().encode('utf-8')), str(acl))
def Record2ACL(record): def Record2ACL(record):
"""Convert text record to an AccessControlLists object""" """Convert text record to an AccessControlLists object"""
...@@ -543,7 +551,8 @@ def Record2ACL(record): ...@@ -543,7 +551,8 @@ def Record2ACL(record):
raise metadata.ParsingError("Bad record beginning: "+ first_line) raise metadata.ParsingError("Bad record beginning: "+ first_line)
filename = first_line[8:] filename = first_line[8:]
if filename == '.': index = () if filename == '.': index = ()
else: index = tuple(C.acl_unquote(filename).split('/')) else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
'utf-8').split('/'))
return AccessControlLists(index, record[newline_pos:]) return AccessControlLists(index, record[newline_pos:])
class ACLExtractor(EAExtractor): class ACLExtractor(EAExtractor):
......
...@@ -125,7 +125,11 @@ class Logger: ...@@ -125,7 +125,11 @@ class Logger:
"""Write the message to the log file, if possible""" """Write the message to the log file, if possible"""
if self.log_file_open: if self.log_file_open:
if self.log_file_local: if self.log_file_local:
self.logfp.write(self.format(message, self.verbosity)) str = self.format(message, self.verbosity)
if type(str) != unicode:
str = unicode(str, 'utf-8')
str = str.encode('utf-8')
self.logfp.write(str)
self.logfp.flush() self.logfp.flush()
else: self.log_file_conn.log.Log.log_to_file(message) else: self.log_file_conn.log.Log.log_to_file(message)
...@@ -133,7 +137,14 @@ class Logger: ...@@ -133,7 +137,14 @@ class Logger:
"""Write message to stdout/stderr""" """Write message to stdout/stderr"""
if verbosity <= 2 or Globals.server: termfp = sys.stderr if verbosity <= 2 or Globals.server: termfp = sys.stderr
else: termfp = sys.stdout else: termfp = sys.stdout
termfp.write(self.format(message, self.term_verbosity)) str = self.format(message, self.term_verbosity)
if type(str) != unicode:
str = unicode(str, 'utf-8')
try:
# Try to log as unicode, but fall back to ascii (for Windows)
termfp.write(str.encode('utf-8'))
except UnicodeDecodeError:
termfp.write(str.encode('ascii', 'replace'))
def conn(self, direction, result, req_num): def conn(self, direction, result, req_num):
"""Log some data on the connection """Log some data on the connection
...@@ -165,10 +176,17 @@ class Logger: ...@@ -165,10 +176,17 @@ class Logger:
def exception_to_string(self, arglist = []): def exception_to_string(self, arglist = []):
"""Return string version of current exception plus what's in arglist""" """Return string version of current exception plus what's in arglist"""
type, value, tb = sys.exc_info() type, value, tb = sys.exc_info()
s = ("Exception '%s' raised of class '%s':\n%s" % s = (u"Exception '%s' raised of class '%s':\n%s" %
(value, type, "".join(traceback.format_tb(tb)))) (value, type, u"".join(traceback.format_tb(tb))))
s = s.encode('ascii', 'replace')
if arglist: if arglist:
s += "__Arguments:\n" + "\n".join(map(str, arglist)) s += "__Arguments:"
for arg in arglist:
s += "\n"
try:
s += str(arg)
except UnicodeError:
s += unicode(arg).encode('ascii', 'replace')
return s return s
def exception(self, only_terminal = 0, verbosity = 5): def exception(self, only_terminal = 0, verbosity = 5):
...@@ -259,7 +277,8 @@ class ErrorLog: ...@@ -259,7 +277,8 @@ class ErrorLog:
"""Return log string to put in error log""" """Return log string to put in error log"""
assert (error_type == "ListError" or error_type == "UpdateError" or assert (error_type == "ListError" or error_type == "UpdateError" or
error_type == "SpecialFileError"), "Unknown type "+error_type error_type == "SpecialFileError"), "Unknown type "+error_type
return "%s %s %s" % (error_type, cls.get_indexpath(rp), str(exc)) str = u"%s %s %s" % (error_type, cls.get_indexpath(rp), unicode(exc))
return str.encode('utf-8')
def close(cls): def close(cls):
"""Close the error log file""" """Close the error log file"""
......
...@@ -55,7 +55,7 @@ field names and values. ...@@ -55,7 +55,7 @@ field names and values.
""" """
from __future__ import generators from __future__ import generators
import re, gzip, os, binascii import re, gzip, os, binascii, codecs
import log, Globals, rpath, Time, robust, increment, static, rorpiter import log, Globals, rpath, Time, robust, increment, static, rorpiter
class ParsingError(Exception): class ParsingError(Exception):
...@@ -376,16 +376,18 @@ class FlatFile: ...@@ -376,16 +376,18 @@ class FlatFile:
compress = 1 compress = 1
if mode == 'r': if mode == 'r':
self.rp = rp_base self.rp = rp_base
self.fileobj = self.rp.open("rb", compress) self.fileobj = rpath.UnicodeFile(self.rp.open("rb", compress))
else: else:
assert mode == 'w' assert mode == 'w'
if compress and check_path and not rp_base.isinccompressed(): if compress and check_path and not rp_base.isinccompressed():
def callback(rp): self.rp = rp def callback(rp): self.rp = rp
self.fileobj = rpath.MaybeGzip(rp_base, callback) self.fileobj = rpath.UnicodeFile(rpath.MaybeGzip(rp_base,
callback))
else: else:
self.rp = rp_base self.rp = rp_base
assert not self.rp.lstat(), self.rp assert not self.rp.lstat(), self.rp
self.fileobj = self.rp.open("wb", compress = compress) self.fileobj = rpath.UnicodeFile(self.rp.open("wb",
compress = compress))
def write_record(self, record): def write_record(self, record):
"""Write a (text) record into the file""" """Write a (text) record into the file"""
......
...@@ -35,7 +35,7 @@ are dealing with are local or remote. ...@@ -35,7 +35,7 @@ are dealing with are local or remote.
""" """
import os, stat, re, sys, shutil, gzip, socket, time, errno import os, stat, re, sys, shutil, gzip, socket, time, errno, codecs
import Globals, Time, static, log, user_group, C import Globals, Time, static, log, user_group, C
try: try:
...@@ -284,6 +284,8 @@ def make_file_dict(filename): ...@@ -284,6 +284,8 @@ def make_file_dict(filename):
""" """
if os.name != 'nt': if os.name != 'nt':
try: try:
if type(filename) == unicode:
filename = filename.encode('utf-8')
return C.make_file_dict(filename) return C.make_file_dict(filename)
except OSError, error: except OSError, error:
# Unicode filenames should be process by the Python version # Unicode filenames should be process by the Python version
...@@ -333,7 +335,7 @@ def make_file_dict_python(filename): ...@@ -333,7 +335,7 @@ def make_file_dict_python(filename):
data['nlink'] = statblock[stat.ST_NLINK] data['nlink'] = statblock[stat.ST_NLINK]
if os.name == 'nt': if os.name == 'nt':
attribs = win32file.GetFileAttributes(filename) attribs = win32file.GetFileAttributesW(filename)
if attribs & winnt.FILE_ATTRIBUTE_REPARSE_POINT: if attribs & winnt.FILE_ATTRIBUTE_REPARSE_POINT:
data['type'] = 'sym' data['type'] = 'sym'
data['linkname'] = None data['linkname'] = None
...@@ -995,7 +997,12 @@ class RPath(RORPath): ...@@ -995,7 +997,12 @@ class RPath(RORPath):
def listdir(self): def listdir(self):
"""Return list of string paths returned by os.listdir""" """Return list of string paths returned by os.listdir"""
return self.conn.os.listdir(self.path) path = self.path
# Use pass in unicode to os.listdir, so that the returned
# entries are in unicode.
if type(path) != unicode:
path = unicode(path, 'utf-8')
return self.conn.os.listdir(path)
def symlink(self, linktext): def symlink(self, linktext):
"""Make symlink at self.path pointing to linktext""" """Make symlink at self.path pointing to linktext"""
...@@ -1406,6 +1413,23 @@ class RPath(RORPath): ...@@ -1406,6 +1413,23 @@ class RPath(RORPath):
write_win_acl(self, acl) write_win_acl(self, acl)
self.data['win_acl'] = acl self.data['win_acl'] = acl
class UnicodeFile:
""" Wraps a RPath and reads/writes unicode. """
def __init__(self, fileobj):
self.fileobj = fileobj
def read(self, length = -1):
return unicode(self.fileobj.read(length), 'utf-8')
def write(self, buf):
if type(buf) != unicode:
buf = unicode(buf, 'utf-8')
return self.fileobj.write(buf.encode('utf-8'))
def close(self):
return self.fileobj.close()
class RPathFileHook: class RPathFileHook:
"""Look like a file, but add closing hook""" """Look like a file, but add closing hook"""
def __init__(self, file, closing_thunk): def __init__(self, file, closing_thunk):
...@@ -1429,6 +1453,18 @@ class GzipFile(gzip.GzipFile): ...@@ -1429,6 +1453,18 @@ class GzipFile(gzip.GzipFile):
messages. Use this class instead to clean those up. messages. Use this class instead to clean those up.
""" """
def __init__(self, filename=None, mode=None):
""" This is needed because we need to write an
encoded filename to the file, but use normal
unicode with the filename."""
if mode and 'b' not in mode:
mode += 'b'
if type(filename) != unicode:
filename = unicode(filename, 'utf-8')
fileobj = open(filename, mode or 'rb')
gzip.GzipFile.__init__(self, filename.encode('utf-8'),
mode=mode, fileobj=fileobj)
def __del__(self): pass def __del__(self): pass
def __getattr__(self, name): def __getattr__(self, name):
if name == 'fileno': return self.fileobj.fileno if name == 'fileno': return self.fileobj.fileno
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
"""Generate and process aggregated backup information""" """Generate and process aggregated backup information"""
import re, os, time import re, os, time
import Globals, Time, increment, log, static, metadata import Globals, Time, increment, log, static, metadata, rpath
class StatsException(Exception): pass class StatsException(Exception): pass
...@@ -219,13 +219,13 @@ class StatsObj: ...@@ -219,13 +219,13 @@ class StatsObj:
def write_stats_to_rp(self, rp): def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath""" """Write statistics string to given rpath"""
fp = rp.open("wb") fp = rpath.UnicodeFile(rp.open("wb"))
fp.write(self.get_stats_string()) fp.write(self.get_stats_string())
assert not fp.close() assert not fp.close()
def read_stats_from_rp(self, rp): def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience""" """Set statistics from rpath, return self for convenience"""
fp = rp.open("r") fp = rpath.UnicodeFile(rp.open("r"))
self.set_stats_from_string(fp.read()) self.set_stats_from_string(fp.read())
fp.close() fp.close()
return self return self
...@@ -364,7 +364,8 @@ class FileStats: ...@@ -364,7 +364,8 @@ class FileStats:
suffix = Globals.compression and 'data.gz' or 'data' suffix = Globals.compression and 'data.gz' or 'data'
cls._rp = increment.get_inc(rpbase, suffix, Time.curtime) cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
assert not cls._rp.lstat() assert not cls._rp.lstat()
cls._fileobj = cls._rp.open("wb", compress = Globals.compression) cls._fileobj = rpath.UnicodeFile(cls._rp.open("wb",
compress = Globals.compression))
cls._line_sep = Globals.null_separator and '\0' or '\n' cls._line_sep = Globals.null_separator and '\0' or '\n'
cls.write_docstring() cls.write_docstring()
......
...@@ -181,7 +181,7 @@ class ACL: ...@@ -181,7 +181,7 @@ class ACL:
def __str__(self): def __str__(self):
return '# file: %s\n%s\n' % \ return '# file: %s\n%s\n' % \
(C.acl_quote(self.get_indexpath()), unicode(self.__acl)) (self.get_indexpath(), unicode(self.__acl))
def from_string(self, acl_str): def from_string(self, acl_str):
lines = acl_str.splitlines() lines = acl_str.splitlines()
...@@ -189,7 +189,7 @@ class ACL: ...@@ -189,7 +189,7 @@ class ACL:
raise metadata.ParsingError("Bad record beginning: " + lines[0][:8]) raise metadata.ParsingError("Bad record beginning: " + lines[0][:8])
filename = lines[0][8:] filename = lines[0][8:]
if filename == '.': self.index = () if filename == '.': self.index = ()
else: self.index = tuple(C.acl_unquote(filename).split('/')) else: self.index = tuple(filename.split('/'))
self.__acl = lines[1] self.__acl = lines[1]
def Record2WACL(record): def Record2WACL(record):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment