Commit 5bf0ca88 authored by Jason Madden's avatar Jason Madden

Consistently handle text and binary modes in the three FileObject classes.

And add tests.

Fixes #1441.
parent 625c88b0
......@@ -11,6 +11,19 @@
3.5.9, 3.6.9, 3.7.5 and 3.8.0 (final). It is also tested with PyPy2
7.2 and PyPy 3.6 7.2
- The file objects (FileObjectPosix, FileObjectThread) now
consistently text and binary modes. If neither 'b' nor 't' is given
in the mode, they will read and write native strings. If 't' is
given, they will always work with unicode strings, and 'b' will
always work with byte strings. See :issue:`1441`.
- The file objects accept *encoding*, *errors* and *newline*
arguments. On Python 2, these are only used if 't' is in the mode.
- The default mode for FileObjectPosix changed from ``rb`` to simply
``r``, for consistency with the other file objects and the standard
``open`` and ``io.open`` functions.
1.5a2 (2019-10-21)
==================
......
......@@ -5,8 +5,7 @@ try:
except ImportError:
EBADF = 9
import os
from io import TextIOWrapper
import io
import functools
import sys
......@@ -14,6 +13,7 @@ import sys
from gevent.hub import _get_hub_noargs as get_hub
from gevent._compat import integer_types
from gevent._compat import reraise
from gevent._compat import fspath
from gevent.lock import Semaphore, DummySemaphore
class cancel_wait_ex(IOError):
......@@ -32,7 +32,8 @@ class FileObjectClosed(IOError):
class FileObjectBase(object):
"""
Internal base class to ensure a level of consistency
between FileObjectPosix and FileObjectThread
between :class:`~.FileObjectPosix`, :class:`~.FileObjectThread`
and :class:`~.FileObjectBlock`.
"""
# List of methods we delegate to the wrapping IO object, if they
......@@ -60,17 +61,18 @@ class FileObjectBase(object):
)
# Whether we are translating universal newlines or not.
# Whether we should apply a TextWrapper (the names are historical).
# Subclasses should set these before calling our constructor.
_translate = False
_translate_encoding = None
_translate_errors = None
_translate_newline = None # None means universal
def __init__(self, io, closefd):
def __init__(self, fobj, closefd):
"""
:param io: An io.IOBase-like object.
:param fobj: An io.IOBase-like object.
"""
self._io = io
self._io = fobj
# We don't actually use this property ourself, but we save it (and
# pass it along) for compatibility.
self._close = closefd
......@@ -78,7 +80,9 @@ class FileObjectBase(object):
if self._translate:
# This automatically handles delegation by assigning to
# self.io
self.translate_newlines(None, self._translate_encoding, self._translate_errors)
self.translate_newlines(None,
self._translate_encoding,
self._translate_errors)
else:
self._do_delegate_methods()
......@@ -108,7 +112,7 @@ class FileObjectBase(object):
return method
def translate_newlines(self, mode, *text_args, **text_kwargs):
wrapper = TextIOWrapper(self._io, *text_args, **text_kwargs)
wrapper = io.TextIOWrapper(self._io, *text_args, **text_kwargs)
if mode:
wrapper.mode = mode
self.io = wrapper
......@@ -123,9 +127,9 @@ class FileObjectBase(object):
if self._io is None:
return
io = self._io
fobj = self._io
self._io = None
self._do_close(io, self._close)
self._do_close(fobj, self._close)
def _do_close(self, fobj, closefd):
raise NotImplementedError()
......@@ -147,22 +151,71 @@ class FileObjectBase(object):
def __exit__(self, *args):
self.close()
@classmethod
def _open_raw(cls, fobj, mode='r', buffering=-1,
encoding=None, errors=None, newline=None, closefd=True):
"""
Uses :func:`io.open` on *fobj* and returns the IO object.
This is a compatibility wrapper for Python 2 and Python 3. It
supports PathLike objects for *fobj* on all versions.
If the object is already an object with a ``fileno`` method,
it is returned unchanged.
On Python 2, if the mode only specifies read, write or append,
and encoding and errors are not specified, then
:obj:`io.FileIO` is used to get the IO object. This ensures we
return native strings unless explicitly requested.
.. versionchanged: 1.5
Support closefd for Python 2 native string readers.
"""
if hasattr(fobj, 'fileno'):
return fobj
if not isinstance(fobj, integer_types):
# Not an integer. Support PathLike on Python 2 and Python <= 3.5.
fobj = fspath(fobj)
closefd = True
if bytes is str and mode in ('r', 'r+', 'w', 'w+', 'a', 'a+') \
and encoding is None and errors is None:
# Python 2, default open. Return native str type, not unicode, which
# is what would happen with io.open('r'), but we don't want to open the file
# in binary mode since that skips newline conversion.
fobj = io.FileIO(fobj, mode, closefd=closefd)
if '+' in mode:
BufFactory = io.BufferedRandom
elif mode[0] == 'r':
BufFactory = io.BufferedReader
else:
BufFactory = io.BufferedWriter
if buffering == -1:
fobj = BufFactory(fobj)
elif buffering != 0:
fobj = BufFactory(fobj, buffering)
else:
# Python 3, or we have a mode that Python 2's os.fdopen/open can't handle
# (x) or they explicitly asked for binary or text mode.
fobj = io.open(fobj, mode, buffering, encoding, errors, newline, closefd)
return fobj
class FileObjectBlock(FileObjectBase):
def __init__(self, fobj, *args, **kwargs):
closefd = kwargs.pop('close', True)
if kwargs:
raise TypeError('Unexpected arguments: %r' % kwargs.keys())
if isinstance(fobj, integer_types):
if not closefd:
# we cannot do this, since fdopen object will close the descriptor
raise TypeError('FileObjectBlock does not support close=False on an fd.')
fobj = os.fdopen(fobj, *args)
closefd = kwargs['closefd'] = kwargs.pop('close', True)
if 'bufsize' in kwargs: # compat with other constructors
kwargs['buffering'] = kwargs.pop('bufsize')
fobj = self._open_raw(fobj, *args, **kwargs)
super(FileObjectBlock, self).__init__(fobj, closefd)
def _do_close(self, fobj, closefd):
fobj.close()
class FileObjectThread(FileObjectBase):
"""
A file-like object wrapping another file-like object, performing all blocking
......@@ -176,12 +229,18 @@ class FileObjectThread(FileObjectBase):
The file object is closed using the threadpool. Note that whether or
not this action is synchronous or asynchronous is not documented.
.. versionchanged:: 1.5
Accept str and ``PathLike`` objects for *fobj* on all versions of Python.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* arguments.
"""
def __init__(self, fobj, mode=None, bufsize=-1, close=True, threadpool=None, lock=True):
def __init__(self, fobj, mode='r', bufsize=-1, close=True, threadpool=None, lock=True,
encoding=None, errors=None, newline=None):
"""
:param fobj: The underlying file-like object to wrap, or an integer fileno
that will be pass to :func:`os.fdopen` along with *mode* and *bufsize*.
:param fobj: The underlying file-like object to wrap, or something
acceptable to :func:`io.open` (along with *mode* and *bufsize*, which is translated
to *buffering*).
:keyword bool lock: If True (the default) then all operations will
be performed one-by-one. Note that this does not guarantee that, if using
this file object from multiple threads/greenlets, operations will be performed
......@@ -200,16 +259,9 @@ class FileObjectThread(FileObjectBase):
self.lock = DummySemaphore()
if not hasattr(self.lock, '__enter__'):
raise TypeError('Expected a Semaphore or boolean, got %r' % type(self.lock))
if isinstance(fobj, integer_types):
if not closefd:
# we cannot do this, since fdopen object will close the descriptor
raise TypeError('FileObjectThread does not support close=False on an fd.')
if mode is None:
assert bufsize == -1, "If you use the default mode, you can't choose a bufsize"
fobj = os.fdopen(fobj)
else:
fobj = os.fdopen(fobj, mode, bufsize)
fobj = self._open_raw(fobj, mode, bufsize,
encoding=encoding, errors=errors, newline=newline,
closefd=close)
self.__io_holder = [fobj] # signal for _wrap_method
super(FileObjectThread, self).__init__(fobj, closefd)
......@@ -244,8 +296,8 @@ class FileObjectThread(FileObjectBase):
def _do_delegate_methods(self):
super(FileObjectThread, self)._do_delegate_methods()
if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''):
self.read1 = self.read
# if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''):
# self.read1 = self.read
self.__io_holder[0] = self._io
def _extra_repr(self):
......
......@@ -10,6 +10,8 @@ from io import RawIOBase
from io import UnsupportedOperation
from gevent._compat import reraise
from gevent._compat import PY2
from gevent._compat import PY3
from gevent._fileobjectcommon import cancel_wait_ex
from gevent._fileobjectcommon import FileObjectBase
from gevent.hub import get_hub
......@@ -183,6 +185,8 @@ class FlushingBufferedWriter(BufferedWriter):
return ret
_marker = object()
class FileObjectPosix(FileObjectBase):
"""
A file-like object that operates on non-blocking files but
......@@ -234,12 +238,21 @@ class FileObjectPosix(FileObjectBase):
better file-like semantics (and portability to Python 3).
.. versionchanged:: 1.2a1
Document the ``fileio`` attribute for non-blocking reads.
.. versionchanged:: 1.5
The default value for *mode* was changed from ``rb`` to ``r``.
.. versionchanged:: 1.5
Support strings and ``PathLike`` objects for ``fobj`` on all versions
of Python. Note that caution above.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* argument.
"""
#: platform specific default for the *bufsize* parameter
default_bufsize = io.DEFAULT_BUFFER_SIZE
def __init__(self, fobj, mode='rb', bufsize=-1, close=True):
def __init__(self, fobj, mode='r', bufsize=-1, close=True,
encoding=None, errors=None, newline=_marker):
# pylint:disable=too-many-locals
"""
:param fobj: Either an integer fileno, or an object supporting the
usual :meth:`socket.fileno` method. The file *will* be
......@@ -248,7 +261,7 @@ class FileObjectPosix(FileObjectBase):
(where the "b" or "U" can be omitted).
If "U" is part of the mode, universal newlines will be used. On Python 2,
if 't' is not in the mode, this will result in returning byte (native) strings;
putting 't' in the mode will return text strings. This may cause
putting 't' in the mode will return text (unicode) strings. This may cause
:exc:`UnicodeDecodeError` to be raised.
:keyword int bufsize: If given, the size of the buffer to use. The default
value means to use a platform-specific default
......@@ -274,15 +287,40 @@ class FileObjectPosix(FileObjectBase):
fileno = fobj
fobj = None
else:
# The underlying object, if we have to open it, should be
# in binary mode. We'll take care of any coding needed.
raw_mode = mode.replace('t', 'b')
raw_mode = raw_mode + 'b' if 'b' not in raw_mode else raw_mode
new_fobj = self._open_raw(fobj, raw_mode, bufsize, closefd=False)
if new_fobj is not fobj:
close = True
fobj = new_fobj
fileno = fobj.fileno()
if not isinstance(fileno, int):
raise TypeError('fileno must be int: %r' % fileno)
orig_mode = mode
mode = (mode or 'rb').replace('b', '')
self.__fobj = fobj
assert isinstance(fileno, int)
# We want text mode if:
# - We're on Python 3, and no 'b' is in the mode.
# - A 't' is in the mode on any version.
# - We're on Python 2 and no 'b' is in the mode, and an encoding or errors is
# given.
text_mode = (
't' in mode
or (PY3 and 'b' not in mode)
or (PY2 and 'b' not in mode and (encoding, errors) != (None, None))
)
if text_mode:
self._translate = True
self._translate_newline = os.linesep if newline is _marker else newline
self._translate_encoding = encoding
self._transalate_errors = errors
if 'U' in mode:
self._translate = True
if bytes is str and 't' not in mode:
self._translate_newline = None
if PY2 and not text_mode:
# We're going to be producing unicode objects, but
# universal newlines doesn't do that in the stdlib,
# so fix that to return str objects. The fix is two parts:
......@@ -301,20 +339,6 @@ class FileObjectPosix(FileObjectBase):
return wrapped
return m
self._wrap_method = wrap_method
mode = mode.replace('U', '')
else:
self._translate = False
mode = mode.replace('t', '')
if len(mode) != 1 and mode not in 'rw': # pragma: no cover
# Python 3 builtin `open` raises a ValueError for invalid modes;
# Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
# enabled (which it usually was). Match Python 3 because it makes more sense
# and because __debug__ may not be enabled.
# NOTE: This is preventing a mode like 'rwb' for binary random access;
# that code was never tested and was explicitly marked as "not used"
raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,))
self._orig_bufsize = bufsize
......@@ -323,10 +347,10 @@ class FileObjectPosix(FileObjectBase):
elif bufsize == 0:
bufsize = 1
if mode == 'r':
if mode[0] == 'r':
IOFamily = BufferedReader
else:
assert mode == 'w'
assert mode[0] == 'w'
IOFamily = BufferedWriter
if self._orig_bufsize == 0:
# We could also simply pass self.fileio as *io*, but this way
......@@ -335,7 +359,7 @@ class FileObjectPosix(FileObjectBase):
IOFamily = FlushingBufferedWriter
self._fobj = fobj
# This attribute is documented as available for non-blocking reads.
self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)
......@@ -349,8 +373,13 @@ class FileObjectPosix(FileObjectBase):
# self.fileio already knows whether or not to close the
# file descriptor
self.fileio.close()
if closefd and self.__fobj is not None:
try:
self.__fobj.close()
except IOError:
pass
finally:
self._fobj = None
self.__fobj = None
self.fileio = None
def __iter__(self):
......
......@@ -7,6 +7,7 @@ import unittest
import gevent
from gevent import fileobject
from gevent._compat import PY2
import gevent.testing as greentest
from gevent.testing.sysinfo import PY3
......@@ -21,7 +22,7 @@ except NameError:
"Python 2 fallback"
def writer(fobj, line):
def Writer(fobj, line):
for character in line:
fobj.write(character)
fobj.flush()
......@@ -34,7 +35,7 @@ def close_fd_quietly(fd):
except (IOError, OSError):
pass
class TestFileObjectBlock(greentest.TestCase):
class TestFileObjectBlock(greentest.TestCase): # serves as a base for the concurrent tests too
def _getTargetClass(self):
return fileobject.FileObjectBlock
......@@ -127,6 +128,34 @@ class TestFileObjectBlock(greentest.TestCase):
self.assertEqual(native_data, s)
self.assertEqual(native_data, fileobj_data)
def test_str_default_to_native(self):
# With no 'b' or 't' given, read and write native str.
fileno, path = tempfile.mkstemp('.gevent_test_str_default')
self.addCleanup(os.remove, path)
os.write(fileno, b'abcdefg')
os.close(fileno)
with open(path, 'r') as f:
native_data = f.read()
with self._makeOne(path, 'r') as f:
gevent_data = f.read()
self.assertEqual(native_data, gevent_data)
def test_text_encoding(self):
fileno, path = tempfile.mkstemp('.gevent_test_str_default')
self.addCleanup(os.remove, path)
os.write(fileno, u'\N{SNOWMAN}'.encode('utf-8'))
os.close(fileno)
with self._makeOne(path, 'r+', bufsize=5, encoding='utf-8') as f:
gevent_data = f.read()
self.assertEqual(u'\N{SNOWMAN}', gevent_data)
def test_close_pipe(self):
# Issue #190, 203
r, w = os.pipe()
......@@ -140,14 +169,31 @@ class ConcurrentFileObjectMixin(object):
# Additional tests for fileobjects that cooperate
# and we have full control of the implementation
def test_read1(self):
def test_read1_binary_present(self):
# Issue #840
r, w = os.pipe()
x = self._makeOne(r)
y = self._makeOne(w, 'w')
self._close_on_teardown(x)
self._close_on_teardown(y)
self.assertTrue(hasattr(x, 'read1'))
reader = self._makeOne(r, 'rb')
self._close_on_teardown(reader)
writer = self._makeOne(w, 'w')
self._close_on_teardown(writer)
self.assertTrue(hasattr(reader, 'read1'), dir(reader))
def test_read1_text_not_present(self):
# Only defined for binary.
r, w = os.pipe()
reader = self._makeOne(r, 'rt')
self._close_on_teardown(reader)
self.addCleanup(os.close, w)
self.assertFalse(hasattr(reader, 'read1'), dir(reader))
def test_read1_default(self):
# If just 'r' is given, whether it has one or not
# depends on if we're Python 2 or 3.
r, w = os.pipe()
self.addCleanup(os.close, w)
reader = self._makeOne(r)
self._close_on_teardown(reader)
self.assertEqual(PY2, hasattr(reader, 'read1'))
def test_bufsize_0(self):
# Issue #840
......@@ -168,7 +214,7 @@ class ConcurrentFileObjectMixin(object):
import warnings
r, w = os.pipe()
lines = [b'line1\n', b'line2\r', b'line3\r\n', b'line4\r\nline5', b'\nline6']
g = gevent.spawn(writer, self._makeOne(w, 'wb'), lines)
g = gevent.spawn(Writer, self._makeOne(w, 'wb'), lines)
try:
with warnings.catch_warnings():
......@@ -188,13 +234,12 @@ class TestFileObjectThread(ConcurrentFileObjectMixin,
def _getTargetClass(self):
return fileobject.FileObjectThread
# FileObjectThread uses os.fdopen() when passed a file-descriptor,
# which returns an object with a destructor that can't be
# bypassed, so we can't even create one that way
def test_del_noclose(self):
with self.assertRaisesRegex(TypeError,
'FileObjectThread does not support close=False on an fd.'):
self._test_del(close=False)
# In the past, we used os.fdopen() when given a file descriptor,
# and that has a destructor that can't be bypassed, so
# close=false wasn't allowed. Now that we do everything with the
# io module, it is allowed.
self._test_del(close=False)
# We don't test this with FileObjectThread. Sometimes the
# visibility of the 'close' operation, which happens in a
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment