Commit 52b9d2c2 authored by joshn's avatar joshn

Use Unicode for paths internally to add support for Unicode on Windows.


git-svn-id: http://svn.savannah.nongnu.org/svn/rdiff-backup@1053 2b77aa54-bcbc-44c9-a7ec-4f6cf2b41109
parent a7274518
New in v1.3.4 (????/??/??)
---------------------------
Start using Unicode internally for filenames. This fixes Unicode support
on Windows (Josh Nisly)
Don't print "Fatal Error" if --check-destination-dir completed successfully.
Thanks to Serge Zub for the suggestion. (Andrew Ferguson)
......
......@@ -158,7 +158,10 @@ class QuotedRPath(rpath.RPath):
correctly and append()ed to the currect QuotedRPath.
"""
return map(unquote, self.conn.os.listdir(self.path))
path = self.path
if type(path) != unicode:
path = unicode(path, 'utf-8')
return map(unquote, self.conn.os.listdir(path))
def __str__(self):
return "QuotedPath: %s\nIndex: %s\nData: %s" % \
......
......@@ -57,7 +57,8 @@ class ExtendedAttributes:
def read_from_rp(self, rp):
"""Set the extended attributes from an rpath"""
try:
attr_list = rp.conn.xattr.listxattr(rp.path, rp.issym())
attr_list = rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
rp.issym())
except IOError, exc:
if exc[0] in (errno.EOPNOTSUPP, errno.EPERM, errno.ETXTBSY):
return # if not supported, consider empty
......@@ -74,7 +75,8 @@ class ExtendedAttributes:
continue
try:
self.attr_dict[attr] = \
rp.conn.xattr.getxattr(rp.path, attr, rp.issym())
rp.conn.xattr.getxattr(rp.path.encode('utf-8'),
attr, rp.issym())
except IOError, exc:
# File probably modified while reading, just continue
if exc[0] == errno.ENODATA: continue
......@@ -86,9 +88,11 @@ class ExtendedAttributes:
def clear_rp(self, rp):
"""Delete all the extended attributes in rpath"""
try:
for name in rp.conn.xattr.listxattr(rp.path, rp.issym()):
for name in rp.conn.xattr.listxattr(rp.path.encode('utf-8'),
rp.issym()):
try:
rp.conn.xattr.removexattr(rp.path, name, rp.issym())
rp.conn.xattr.removexattr(rp.path.encode('utf-8'),
name, rp.issym())
except IOError, exc:
# SELinux attributes cannot be removed, and we don't want
# to bail out or be too noisy at low log levels.
......@@ -111,7 +115,8 @@ class ExtendedAttributes:
self.clear_rp(rp)
for (name, value) in self.attr_dict.iteritems():
try:
rp.conn.xattr.setxattr(rp.path, name, value, 0, rp.issym())
rp.conn.xattr.setxattr(rp.path.encode('utf-8'), name,
value, 0, rp.issym())
except IOError, exc:
# Mac and Linux attributes have different namespaces, so
# fail gracefully if can't call setxattr
......@@ -149,13 +154,14 @@ def ea_compare_rps(rp1, rp2):
def EA2Record(ea):
"""Convert ExtendedAttributes object to text record"""
str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath())]
str_list = ['# file: %s' % C.acl_quote(ea.get_indexpath().encode('utf-8'))]
for (name, val) in ea.attr_dict.iteritems():
if not val: str_list.append(name)
else:
encoded_val = base64.encodestring(val).replace('\n', '')
try:
str_list.append('%s=0s%s' % (C.acl_quote(name), encoded_val))
str_list.append('%s=0s%s' % (C.acl_quote(name.encode('utf-8')),
encoded_val))
except UnicodeEncodeError:
log.Log("Warning: unable to store Unicode extended attribute %s"
% repr(name), 3)
......@@ -169,7 +175,8 @@ def Record2EA(record):
raise metadata.ParsingError("Bad record beginning: " + first[:8])
filename = first[8:]
if filename == '.': index = ()
else: index = tuple(C.acl_unquote(filename).split('/'))
else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
'utf-8').split('/'))
ea = ExtendedAttributes(index)
for line in lines:
......@@ -194,7 +201,7 @@ class EAExtractor(metadata.FlatExtractor):
def filename_to_index(self, filename):
"""Convert possibly quoted filename to index tuple"""
if filename == '.': return ()
else: return tuple(C.acl_unquote(filename).split('/'))
else: return tuple(C.acl_unquote(filename.encode('utf-8')).split('/'))
class ExtendedAttributesFile(metadata.FlatFile):
"""Store/retrieve EAs from extended_attributes file"""
......@@ -379,7 +386,7 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None,
else: acl = posix1e.ACL()
try:
acl.applyto(rp.path)
acl.applyto(rp.path.encode('utf-8'))
except IOError, exc:
if exc[0] == errno.EOPNOTSUPP:
log.Log("Warning: unable to set ACL on %s: %s" %
......@@ -391,12 +398,12 @@ def set_rp_acl(rp, entry_list = None, default_entry_list = None,
if default_entry_list:
def_acl = list_to_acl(default_entry_list, map_names)
else: def_acl = posix1e.ACL()
def_acl.applyto(rp.path, posix1e.ACL_TYPE_DEFAULT)
def_acl.applyto(rp.path.encode('utf-8'), posix1e.ACL_TYPE_DEFAULT)
def get_acl_lists_from_rp(rp):
"""Returns (acl_list, def_acl_list) from an rpath. Call locally"""
assert rp.conn is Globals.local_connection
try: acl = posix1e.ACL(file=rp.path)
try: acl = posix1e.ACL(file=rp.path.encode('utf-8'))
except IOError, exc:
if exc[0] == errno.EOPNOTSUPP:
acl = None
......@@ -406,7 +413,7 @@ def get_acl_lists_from_rp(rp):
acl = None
else: raise
if rp.isdir():
try: def_acl = posix1e.ACL(filedef=rp.path)
try: def_acl = posix1e.ACL(filedef=rp.path.encode('utf-8'))
except IOError, exc:
if exc[0] == errno.EOPNOTSUPP:
def_acl = None
......@@ -533,7 +540,8 @@ def acl_compare_rps(rp1, rp2):
def ACL2Record(acl):
"""Convert an AccessControlLists object into a text record"""
return '# file: %s\n%s\n' % (C.acl_quote(acl.get_indexpath()), str(acl))
return '# file: %s\n%s\n' % \
(C.acl_quote(acl.get_indexpath().encode('utf-8')), str(acl))
def Record2ACL(record):
"""Convert text record to an AccessControlLists object"""
......@@ -543,7 +551,8 @@ def Record2ACL(record):
raise metadata.ParsingError("Bad record beginning: "+ first_line)
filename = first_line[8:]
if filename == '.': index = ()
else: index = tuple(C.acl_unquote(filename).split('/'))
else: index = tuple(unicode(C.acl_unquote(filename.encode('utf-8')),
'utf-8').split('/'))
return AccessControlLists(index, record[newline_pos:])
class ACLExtractor(EAExtractor):
......
......@@ -125,7 +125,11 @@ class Logger:
"""Write the message to the log file, if possible"""
if self.log_file_open:
if self.log_file_local:
self.logfp.write(self.format(message, self.verbosity))
str = self.format(message, self.verbosity)
if type(str) != unicode:
str = unicode(str, 'utf-8')
str = str.encode('utf-8')
self.logfp.write(str)
self.logfp.flush()
else: self.log_file_conn.log.Log.log_to_file(message)
......@@ -133,7 +137,14 @@ class Logger:
"""Write message to stdout/stderr"""
if verbosity <= 2 or Globals.server: termfp = sys.stderr
else: termfp = sys.stdout
termfp.write(self.format(message, self.term_verbosity))
str = self.format(message, self.term_verbosity)
if type(str) != unicode:
str = unicode(str, 'utf-8')
try:
# Try to log as unicode, but fall back to ascii (for Windows)
termfp.write(str.encode('utf-8'))
except UnicodeDecodeError:
termfp.write(str.encode('ascii', 'replace'))
def conn(self, direction, result, req_num):
"""Log some data on the connection
......@@ -165,10 +176,17 @@ class Logger:
def exception_to_string(self, arglist = []):
"""Return string version of current exception plus what's in arglist"""
type, value, tb = sys.exc_info()
s = ("Exception '%s' raised of class '%s':\n%s" %
(value, type, "".join(traceback.format_tb(tb))))
s = (u"Exception '%s' raised of class '%s':\n%s" %
(value, type, u"".join(traceback.format_tb(tb))))
s = s.encode('ascii', 'replace')
if arglist:
s += "__Arguments:\n" + "\n".join(map(str, arglist))
s += "__Arguments:"
for arg in arglist:
s += "\n"
try:
s += str(arg)
except UnicodeError:
s += unicode(arg).encode('ascii', 'replace')
return s
def exception(self, only_terminal = 0, verbosity = 5):
......@@ -259,7 +277,8 @@ class ErrorLog:
"""Return log string to put in error log"""
assert (error_type == "ListError" or error_type == "UpdateError" or
error_type == "SpecialFileError"), "Unknown type "+error_type
return "%s %s %s" % (error_type, cls.get_indexpath(rp), str(exc))
str = u"%s %s %s" % (error_type, cls.get_indexpath(rp), unicode(exc))
return str.encode('utf-8')
def close(cls):
"""Close the error log file"""
......
......@@ -55,7 +55,7 @@ field names and values.
"""
from __future__ import generators
import re, gzip, os, binascii
import re, gzip, os, binascii, codecs
import log, Globals, rpath, Time, robust, increment, static, rorpiter
class ParsingError(Exception):
......@@ -376,16 +376,18 @@ class FlatFile:
compress = 1
if mode == 'r':
self.rp = rp_base
self.fileobj = self.rp.open("rb", compress)
self.fileobj = rpath.UnicodeFile(self.rp.open("rb", compress))
else:
assert mode == 'w'
if compress and check_path and not rp_base.isinccompressed():
def callback(rp): self.rp = rp
self.fileobj = rpath.MaybeGzip(rp_base, callback)
self.fileobj = rpath.UnicodeFile(rpath.MaybeGzip(rp_base,
callback))
else:
self.rp = rp_base
assert not self.rp.lstat(), self.rp
self.fileobj = self.rp.open("wb", compress = compress)
self.fileobj = rpath.UnicodeFile(self.rp.open("wb",
compress = compress))
def write_record(self, record):
"""Write a (text) record into the file"""
......
......@@ -35,7 +35,7 @@ are dealing with are local or remote.
"""
import os, stat, re, sys, shutil, gzip, socket, time, errno
import os, stat, re, sys, shutil, gzip, socket, time, errno, codecs
import Globals, Time, static, log, user_group, C
try:
......@@ -284,6 +284,8 @@ def make_file_dict(filename):
"""
if os.name != 'nt':
try:
if type(filename) == unicode:
filename = filename.encode('utf-8')
return C.make_file_dict(filename)
except OSError, error:
# Unicode filenames should be process by the Python version
......@@ -333,7 +335,7 @@ def make_file_dict_python(filename):
data['nlink'] = statblock[stat.ST_NLINK]
if os.name == 'nt':
attribs = win32file.GetFileAttributes(filename)
attribs = win32file.GetFileAttributesW(filename)
if attribs & winnt.FILE_ATTRIBUTE_REPARSE_POINT:
data['type'] = 'sym'
data['linkname'] = None
......@@ -995,7 +997,12 @@ class RPath(RORPath):
def listdir(self):
"""Return list of string paths returned by os.listdir"""
return self.conn.os.listdir(self.path)
path = self.path
# Use pass in unicode to os.listdir, so that the returned
# entries are in unicode.
if type(path) != unicode:
path = unicode(path, 'utf-8')
return self.conn.os.listdir(path)
def symlink(self, linktext):
"""Make symlink at self.path pointing to linktext"""
......@@ -1406,6 +1413,23 @@ class RPath(RORPath):
write_win_acl(self, acl)
self.data['win_acl'] = acl
class UnicodeFile:
""" Wraps a RPath and reads/writes unicode. """
def __init__(self, fileobj):
self.fileobj = fileobj
def read(self, length = -1):
return unicode(self.fileobj.read(length), 'utf-8')
def write(self, buf):
if type(buf) != unicode:
buf = unicode(buf, 'utf-8')
return self.fileobj.write(buf.encode('utf-8'))
def close(self):
return self.fileobj.close()
class RPathFileHook:
"""Look like a file, but add closing hook"""
def __init__(self, file, closing_thunk):
......@@ -1429,6 +1453,18 @@ class GzipFile(gzip.GzipFile):
messages. Use this class instead to clean those up.
"""
def __init__(self, filename=None, mode=None):
""" This is needed because we need to write an
encoded filename to the file, but use normal
unicode with the filename."""
if mode and 'b' not in mode:
mode += 'b'
if type(filename) != unicode:
filename = unicode(filename, 'utf-8')
fileobj = open(filename, mode or 'rb')
gzip.GzipFile.__init__(self, filename.encode('utf-8'),
mode=mode, fileobj=fileobj)
def __del__(self): pass
def __getattr__(self, name):
if name == 'fileno': return self.fileobj.fileno
......
......@@ -20,7 +20,7 @@
"""Generate and process aggregated backup information"""
import re, os, time
import Globals, Time, increment, log, static, metadata
import Globals, Time, increment, log, static, metadata, rpath
class StatsException(Exception): pass
......@@ -219,13 +219,13 @@ class StatsObj:
def write_stats_to_rp(self, rp):
"""Write statistics string to given rpath"""
fp = rp.open("wb")
fp = rpath.UnicodeFile(rp.open("wb"))
fp.write(self.get_stats_string())
assert not fp.close()
def read_stats_from_rp(self, rp):
"""Set statistics from rpath, return self for convenience"""
fp = rp.open("r")
fp = rpath.UnicodeFile(rp.open("r"))
self.set_stats_from_string(fp.read())
fp.close()
return self
......@@ -364,7 +364,8 @@ class FileStats:
suffix = Globals.compression and 'data.gz' or 'data'
cls._rp = increment.get_inc(rpbase, suffix, Time.curtime)
assert not cls._rp.lstat()
cls._fileobj = cls._rp.open("wb", compress = Globals.compression)
cls._fileobj = rpath.UnicodeFile(cls._rp.open("wb",
compress = Globals.compression))
cls._line_sep = Globals.null_separator and '\0' or '\n'
cls.write_docstring()
......
......@@ -181,7 +181,7 @@ class ACL:
def __str__(self):
return '# file: %s\n%s\n' % \
(C.acl_quote(self.get_indexpath()), unicode(self.__acl))
(self.get_indexpath(), unicode(self.__acl))
def from_string(self, acl_str):
lines = acl_str.splitlines()
......@@ -189,7 +189,7 @@ class ACL:
raise metadata.ParsingError("Bad record beginning: " + lines[0][:8])
filename = lines[0][8:]
if filename == '.': self.index = ()
else: self.index = tuple(C.acl_unquote(filename).split('/'))
else: self.index = tuple(filename.split('/'))
self.__acl = lines[1]
def Record2WACL(record):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment