Commit 2e233774 authored by Jason Madden's avatar Jason Madden Committed by GitHub

Merge pull request #1485 from gevent/issue1441

More simplification and unification of the file objects
parents 83726b6d 79021f13
......@@ -15,7 +15,8 @@
consistently text and binary modes. If neither 'b' nor 't' is given
in the mode, they will read and write native strings. If 't' is
given, they will always work with unicode strings, and 'b' will
always work with byte strings. See :issue:`1441`.
always work with byte strings. (FileObjectPosix already worked this
way.) See :issue:`1441`.
- The file objects accept *encoding*, *errors* and *newline*
arguments. On Python 2, these are only used if 't' is in the mode.
......@@ -24,6 +25,10 @@
``r``, for consistency with the other file objects and the standard
``open`` and ``io.open`` functions.
- Fix ``FilObjectPosix`` improperly being used from multiple
greenlets. Previously this was hidden by forcing buffering, which
raised ``RuntimeError``.
1.5a2 (2019-10-21)
==================
......
"""
gevent internals.
"""
from __future__ import absolute_import, print_function, division
try:
......@@ -8,7 +11,7 @@ except ImportError:
import io
import functools
import sys
import os
from gevent.hub import _get_hub_noargs as get_hub
from gevent._compat import PY2
......@@ -23,35 +26,256 @@ class cancel_wait_ex(IOError):
super(cancel_wait_ex, self).__init__(
EBADF, 'File descriptor was closed in another greenlet')
class FileObjectClosed(IOError):
def __init__(self):
super(FileObjectClosed, self).__init__(
EBADF, 'Bad file descriptor (FileObject was closed)')
class _UniversalNewlineBytesWrapper(io.TextIOWrapper):
class UniversalNewlineBytesWrapper(io.TextIOWrapper):
"""
Uses TextWrapper to decode universal newlines, but returns the
results as bytes.
This is for Python 2 where the 'rU' mode did that.
"""
def __init__(self, fobj):
io.TextIOWrapper.__init__(self, fobj, encoding='latin-1', newline=None)
mode = None
def __init__(self, fobj, line_buffering):
# latin-1 has the ability to round-trip arbitrary bytes.
io.TextIOWrapper.__init__(self, fobj, encoding='latin-1',
newline=None,
line_buffering=line_buffering)
def read(self, *args, **kwargs):
result = io.TextIOWrapper.read(self, *args, **kwargs)
return result.encode('latin-1')
def readline(self, *args, **kwargs):
result = io.TextIOWrapper.readline(self, *args, **kwargs)
def readline(self, limit=-1):
result = io.TextIOWrapper.readline(self, limit)
return result.encode('latin-1')
def readlines(self, *args, **kwargs):
result = io.TextIOWrapper.readlines(self, *args, **kwargs)
return [x.encode('latin-1') for x in result]
def __iter__(self):
# readlines() is implemented in terms of __iter__
# and TextIOWrapper.__iter__ checks that readline returns
# a unicode object, which we don't, so we override
return self
def __next__(self):
line = self.readline()
if not line:
raise StopIteration
return line
next = __next__
class FlushingBufferedWriter(io.BufferedWriter):
def write(self, b):
ret = io.BufferedWriter.write(self, b)
self.flush()
return ret
class OpenDescriptor(object): # pylint:disable=too-many-instance-attributes
"""
Interprets the arguments to `open`. Internal use only.
Originally based on code in the stdlib's _pyio.py (Python implementation of
the :mod:`io` module), but modified for gevent:
- Native strings are returned on Python 2 when neither
'b' nor 't' are in the mode string and no encoding is specified.
- Universal newlines work in that mode.
- Allows unbuffered text IO.
"""
@staticmethod
def _collapse_arg(preferred_val, old_val, default):
if preferred_val is not None and old_val is not None:
raise TypeError
if preferred_val is None and old_val is None:
return default
return preferred_val if preferred_val is not None else old_val
def __init__(self, fobj, mode='r', bufsize=None, close=None,
encoding=None, errors=None, newline=None,
buffering=None, closefd=None):
# Based on code in the stdlib's _pyio.py from 3.8.
# pylint:disable=too-many-locals,too-many-branches,too-many-statements
closefd = self._collapse_arg(closefd, close, True)
del close
buffering = self._collapse_arg(buffering, bufsize, -1)
del bufsize
if not hasattr(fobj, 'fileno'):
if not isinstance(fobj, integer_types):
# Not a fd. Support PathLike on Python 2 and Python <= 3.5.
fobj = fspath(fobj)
if not isinstance(fobj, (str, bytes) + integer_types): # pragma: no cover
raise TypeError("invalid file: %r" % fobj)
if isinstance(fobj, (str, bytes)):
closefd = True
if not isinstance(mode, str):
raise TypeError("invalid mode: %r" % mode)
if not isinstance(buffering, integer_types):
raise TypeError("invalid buffering: %r" % buffering)
if encoding is not None and not isinstance(encoding, str):
raise TypeError("invalid encoding: %r" % encoding)
if errors is not None and not isinstance(errors, str):
raise TypeError("invalid errors: %r" % errors)
modes = set(mode)
if modes - set("axrwb+tU") or len(mode) > len(modes):
raise ValueError("invalid mode: %r" % mode)
creating = "x" in modes
reading = "r" in modes
writing = "w" in modes
appending = "a" in modes
updating = "+" in modes
text = "t" in modes
binary = "b" in modes
universal = 'U' in modes
can_write = creating or writing or appending or updating
if universal:
if can_write:
raise ValueError("mode U cannot be combined with 'x', 'w', 'a', or '+'")
import warnings
warnings.warn("'U' mode is deprecated",
DeprecationWarning, 4)
reading = True
if text and binary:
raise ValueError("can't have text and binary mode at once")
if creating + reading + writing + appending > 1:
raise ValueError("can't have read/write/append mode at once")
if not (creating or reading or writing or appending):
raise ValueError("must have exactly one of read/write/append mode")
if binary and encoding is not None:
raise ValueError("binary mode doesn't take an encoding argument")
if binary and errors is not None:
raise ValueError("binary mode doesn't take an errors argument")
if binary and newline is not None:
raise ValueError("binary mode doesn't take a newline argument")
if binary and buffering == 1:
import warnings
warnings.warn("line buffering (buffering=1) isn't supported in binary "
"mode, the default buffer size will be used",
RuntimeWarning, 4)
self.fobj = fobj
self.fileio_mode = (
(creating and "x" or "")
+ (reading and "r" or "")
+ (writing and "w" or "")
+ (appending and "a" or "")
+ (updating and "+" or "")
)
self.mode = self.fileio_mode + ('t' if text else '') + ('b' if binary else '')
self.creating = creating
self.reading = reading
self.writing = writing
self.appending = appending
self.updating = updating
self.text = text
self.binary = binary
self.can_write = can_write
self.can_read = reading or updating
self.native = (
not self.text and not self.binary # Neither t nor b given.
and not encoding and not errors # And no encoding or error handling either.
)
self.universal = universal
self.buffering = buffering
self.encoding = encoding
self.errors = errors
self.newline = newline
self.closefd = closefd
default_buffer_size = io.DEFAULT_BUFFER_SIZE
def is_fd(self):
return isinstance(self.fobj, integer_types)
def open(self):
return self.open_raw_and_wrapped()[1]
def open_raw_and_wrapped(self):
raw = self.open_raw()
try:
return raw, self.wrapped(raw)
except:
raw.close()
raise
def open_raw(self):
if hasattr(self.fobj, 'fileno'):
return self.fobj
return io.FileIO(self.fobj, self.fileio_mode, self.closefd)
def wrapped(self, raw):
"""
Wraps the raw IO object (`RawIOBase`) in buffers, text decoding,
and newline handling.
"""
# pylint:disable=too-many-branches
result = raw
buffering = self.buffering
line_buffering = False
if buffering == 1 or buffering < 0 and raw.isatty():
buffering = -1
line_buffering = True
if buffering < 0:
buffering = self.default_buffer_size
try:
bs = os.fstat(raw.fileno()).st_blksize
except (OSError, AttributeError):
pass
else:
if bs > 1:
buffering = bs
if buffering < 0: # pragma: no cover
raise ValueError("invalid buffering size")
if buffering != 0:
if self.updating:
Buffer = io.BufferedRandom
elif self.creating or self.writing or self.appending:
Buffer = io.BufferedWriter
elif self.reading:
Buffer = io.BufferedReader
else: # prgama: no cover
raise ValueError("unknown mode: %r" % self.mode)
try:
result = Buffer(raw, buffering)
except AttributeError:
# Python 2 file() objects don't have the readable/writable
# attributes. But they handle their own buffering.
result = raw
if self.binary:
return result
if PY2 and self.native:
# Neither text mode nor binary mode specified.
if self.universal:
# universal was requested, e.g., 'rU'
result = UniversalNewlineBytesWrapper(result, line_buffering)
else:
result = io.TextIOWrapper(result, self.encoding, self.errors, self.newline,
line_buffering)
try:
result.mode = self.mode
except (AttributeError, TypeError):
# AttributeError: No such attribute
# TypeError: Readonly attribute (py2)
pass
return result
class FileObjectBase(object):
......@@ -86,30 +310,14 @@ class FileObjectBase(object):
)
# Whether we should apply a TextWrapper (the names are historical).
# Subclasses should set these before calling our constructor.
_translate = False
_translate_mode = None
_translate_encoding = None
_translate_errors = None
_translate_newline = None # None means universal
_io = None
def __init__(self, fobj, closefd):
"""
:param fobj: An io.IOBase-like object.
"""
self._io = fobj
# We don't actually use this property ourself, but we save it (and
# pass it along) for compatibility.
self._close = closefd
if self._translate:
# This automatically handles delegation by assigning to
# self.io
self.translate_newlines(self._translate_mode,
self._translate_encoding,
self._translate_errors)
else:
self._do_delegate_methods()
......@@ -137,17 +345,6 @@ class FileObjectBase(object):
"""
return method
def translate_newlines(self, mode, *text_args, **text_kwargs):
if mode == 'byte_newlines':
wrapper = _UniversalNewlineBytesWrapper(self._io)
mode = None
else:
wrapper = io.TextIOWrapper(self._io, *text_args, **text_kwargs)
if mode:
wrapper.mode = mode # pylint:disable=attribute-defined-outside-init
self.io = wrapper
self._translate = True
@property
def closed(self):
"""True if the file is closed"""
......@@ -181,78 +378,35 @@ class FileObjectBase(object):
def __exit__(self, *args):
self.close()
# Modes that work with native strings on Python 2
_NATIVE_PY2_MODES = ('r', 'r+', 'w', 'w+', 'a', 'a+')
def __iter__(self):
return self
if PY2:
@classmethod
def _use_FileIO(cls, mode, encoding, errors):
return mode in cls._NATIVE_PY2_MODES \
and encoding is None and errors is None
else:
@classmethod
def _use_FileIO(cls, mode, encoding, errors): # pylint:disable=unused-argument
return False
def __next__(self):
line = self.readline()
if not line:
raise StopIteration
return line
@classmethod
def _open_raw(cls, fobj, mode='r', buffering=-1,
encoding=None, errors=None, newline=None, closefd=True):
"""
Uses :func:`io.open` on *fobj* and returns the IO object.
next = __next__
This is a compatibility wrapper for Python 2 and Python 3. It
supports PathLike objects for *fobj* on all versions.
def __bool__(self):
return True
If the object is already an object with a ``fileno`` method,
it is returned unchanged.
__nonzero__ = __bool__
On Python 2, if the mode only specifies read, write or append,
and encoding and errors are not specified, then
:obj:`io.FileIO` is used to get the IO object. This ensures we
return native strings unless explicitly requested.
.. versionchanged: 1.5
Support closefd for Python 2 native string readers.
class FileObjectBlock(FileObjectBase):
"""
if hasattr(fobj, 'fileno'):
return fobj
if not isinstance(fobj, integer_types):
# Not an integer. Support PathLike on Python 2 and Python <= 3.5.
fobj = fspath(fobj)
closefd = True
if cls._use_FileIO(mode, encoding, errors):
# Python 2, default open. Return native str type, not unicode, which
# is what would happen with io.open('r'), but we don't want to open the file
# in binary mode since that skips newline conversion.
fobj = io.FileIO(fobj, mode, closefd=closefd)
if '+' in mode:
BufFactory = io.BufferedRandom
elif mode[0] == 'r':
BufFactory = io.BufferedReader
else:
BufFactory = io.BufferedWriter
FileObjectBlock()
if buffering == -1:
fobj = BufFactory(fobj)
elif buffering != 0:
fobj = BufFactory(fobj, buffering)
else:
# Python 3, or we have a mode that Python 2's os.fdopen/open can't handle
# (x) or they explicitly asked for binary or text mode.
fobj = io.open(fobj, mode, buffering, encoding, errors, newline, closefd)
return fobj
A simple synchronous wrapper around a file object.
class FileObjectBlock(FileObjectBase):
Adds no concurrency or gevent compatibility.
"""
def __init__(self, fobj, *args, **kwargs):
closefd = kwargs['closefd'] = kwargs.pop('close', True)
if 'bufsize' in kwargs: # compat with other constructors
kwargs['buffering'] = kwargs.pop('bufsize')
fobj = self._open_raw(fobj, *args, **kwargs)
super(FileObjectBlock, self).__init__(fobj, closefd)
descriptor = OpenDescriptor(fobj, *args, **kwargs)
super(FileObjectBlock, self).__init__(descriptor.open(), descriptor.closefd)
def _do_close(self, fobj, closefd):
fobj.close()
......@@ -260,6 +414,8 @@ class FileObjectBlock(FileObjectBase):
class FileObjectThread(FileObjectBase):
"""
FileObjectThread()
A file-like object wrapping another file-like object, performing all blocking
operations on that object in a background thread.
......@@ -270,29 +426,25 @@ class FileObjectThread(FileObjectBase):
.. versionchanged:: 1.1b1
The file object is closed using the threadpool. Note that whether or
not this action is synchronous or asynchronous is not documented.
.. versionchanged:: 1.5
Accept str and ``PathLike`` objects for *fobj* on all versions of Python.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* arguments.
"""
def __init__(self, fobj, mode='r', bufsize=-1, close=True, threadpool=None, lock=True,
encoding=None, errors=None, newline=None):
def __init__(self, *args, **kwargs):
"""
:param fobj: The underlying file-like object to wrap, or something
acceptable to :func:`io.open` (along with *mode* and *bufsize*, which is translated
to *buffering*).
:keyword bool lock: If True (the default) then all operations will
be performed one-by-one. Note that this does not guarantee that, if using
this file object from multiple threads/greenlets, operations will be performed
in any particular order, only that no two operations will be attempted at the
same time. You can also pass your own :class:`gevent.lock.Semaphore` to synchronize
file operations with an external resource.
:keyword bool close: If True (the default) then when this object is closed,
the underlying object is closed as well.
:keyword bool closefd: If True (the default) then when this object is closed,
the underlying object is closed as well. If *fobj* is a path, then
*closefd* must be True.
"""
closefd = close
lock = kwargs.pop('lock', True)
threadpool = kwargs.pop('threadpool', None)
descriptor = OpenDescriptor(*args, **kwargs)
self.threadpool = threadpool or get_hub().threadpool
self.lock = lock
if self.lock is True:
......@@ -301,18 +453,9 @@ class FileObjectThread(FileObjectBase):
self.lock = DummySemaphore()
if not hasattr(self.lock, '__enter__'):
raise TypeError('Expected a Semaphore or boolean, got %r' % type(self.lock))
using_fileio = self._use_FileIO(mode.replace('U', ''), encoding, errors)
universal_newline = 'U' in mode or (not using_fileio and newline is None)
mode = mode.replace('U', '')
fobj = self._open_raw(fobj, mode, bufsize,
encoding=encoding, errors=errors, newline=newline,
closefd=close)
if self._use_FileIO(mode, encoding, errors) and universal_newline:
self._translate_mode = 'byte_newlines'
self._translate = True
self.__io_holder = [fobj] # signal for _wrap_method
super(FileObjectThread, self).__init__(fobj, closefd)
self.__io_holder = [descriptor.open()] # signal for _wrap_method
super(FileObjectThread, self).__init__(self.__io_holder[0], descriptor.closefd)
def _do_close(self, fobj, closefd):
self.__io_holder[0] = None # for _wrap_method
......
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import io
from io import BufferedReader
from io import BufferedWriter
from io import BytesIO
from io import DEFAULT_BUFFER_SIZE
from io import FileIO
from io import RawIOBase
from io import UnsupportedOperation
from gevent._compat import reraise
from gevent._compat import PY2
from gevent._compat import PY3
from gevent._fileobjectcommon import cancel_wait_ex
from gevent._fileobjectcommon import FileObjectBase
from gevent._fileobjectcommon import OpenDescriptor
from gevent._hub_primitives import wait_on_watcher
from gevent.hub import get_hub
from gevent.os import _read
from gevent.os import _write
......@@ -22,34 +23,38 @@ from gevent.os import make_nonblocking
class GreenFileDescriptorIO(RawIOBase):
# Internal, undocumented, class. All that's documented is that this
# is a IOBase object. Constructor is private.
# Note that RawIOBase has a __del__ method that calls
# self.close(). (In C implementations like CPython, this is
# the type's tp_dealloc slot; prior to Python 3, the object doesn't
# appear to have a __del__ method, even though it functionally does)
_read_event = None
_write_event = None
_read_watcher = None
_write_watcher = None
_closed = False
_seekable = None
_keep_alive = None # An object that needs to live as long as we do.
def __init__(self, fileno, mode='r', closefd=True):
RawIOBase.__init__(self) # Python 2: pylint:disable=no-member,non-parent-init-called
def __init__(self, fileno, open_descriptor, closefd=True):
RawIOBase.__init__(self)
self._closefd = closefd
self._fileno = fileno
self.mode = open_descriptor.fileio_mode
make_nonblocking(fileno)
readable = 'r' in mode
writable = 'w' in mode
readable = open_descriptor.can_read
writable = open_descriptor.can_write
self.hub = get_hub()
io_watcher = self.hub.loop.io
try:
if readable:
self._read_event = io_watcher(fileno, 1)
self._read_watcher = io_watcher(fileno, 1)
if writable:
self._write_event = io_watcher(fileno, 2)
self._write_watcher = io_watcher(fileno, 2)
except:
# If anything goes wrong, it's important to go ahead and
# close these watchers *now*, especially under libuv, so
......@@ -67,11 +72,19 @@ class GreenFileDescriptorIO(RawIOBase):
self.close()
raise
def isatty(self):
# TODO: Couldn't we just subclass FileIO?
f = FileIO(self._fileno, 'r', False)
try:
return f.isatty()
finally:
f.close()
def readable(self):
return self._read_event is not None
return self._read_watcher is not None
def writable(self):
return self._write_event is not None
return self._write_watcher is not None
def seekable(self):
if self._seekable is None:
......@@ -91,10 +104,10 @@ class GreenFileDescriptorIO(RawIOBase):
return self._closed
def __destroy_events(self):
read_event = self._read_event
write_event = self._write_event
read_event = self._read_watcher
write_event = self._write_watcher
hub = self.hub
self.hub = self._read_event = self._write_event = None
self.hub = self._read_watcher = self._write_watcher = None
if read_event is not None:
hub.cancel_wait(read_event, cancel_wait_ex, True)
......@@ -110,9 +123,14 @@ class GreenFileDescriptorIO(RawIOBase):
self._closed = True
self.__destroy_events()
fileno = self._fileno
keep_alive = self._keep_alive
self._fileno = self._keep_alive = None
try:
if self._closefd:
self._fileno = None
os.close(fileno)
finally:
if hasattr(keep_alive, 'close'):
keep_alive.close()
# RawIOBase provides a 'read' method that will call readall() if
# the `size` was missing or -1 and otherwise call readinto(). We
......@@ -122,20 +140,26 @@ class GreenFileDescriptorIO(RawIOBase):
# this was fixed in Python 3.3, but we still need our workaround for 2.7. See
# https://github.com/gevent/gevent/issues/675)
def __read(self, n):
if self._read_event is None:
if self._read_watcher is None:
raise UnsupportedOperation('read')
while True:
while 1:
try:
return _read(self._fileno, n)
except (IOError, OSError) as ex:
if ex.args[0] not in ignored_errors:
raise
self.hub.wait(self._read_event)
wait_on_watcher(self._read_watcher, None, None, self.hub)
def readall(self):
ret = BytesIO()
while True:
try:
data = self.__read(DEFAULT_BUFFER_SIZE)
except cancel_wait_ex:
# We were closed while reading. A buffered reader
# just returns what it has handy at that point,
# so we do to.
data = None
if not data:
break
ret.write(data)
......@@ -154,7 +178,7 @@ class GreenFileDescriptorIO(RawIOBase):
return n
def write(self, b):
if self._write_event is None:
if self._write_watcher is None:
raise UnsupportedOperation('write')
while True:
try:
......@@ -162,7 +186,7 @@ class GreenFileDescriptorIO(RawIOBase):
except (IOError, OSError) as ex:
if ex.args[0] not in ignored_errors:
raise
self.hub.wait(self._write_event)
wait_on_watcher(self._write_watcher, None, None, self.hub)
def seek(self, offset, whence=0):
try:
......@@ -176,19 +200,38 @@ class GreenFileDescriptorIO(RawIOBase):
# See https://github.com/gevent/gevent/issues/1323
reraise(IOError, IOError(*ex.args), sys.exc_info()[2])
def __repr__(self):
return "<%s at 0x%x fileno=%s mode=%r>" % (
type(self).__name__, id(self), self._fileno, self.mode
)
class FlushingBufferedWriter(BufferedWriter):
def write(self, b):
ret = BufferedWriter.write(self, b)
self.flush()
return ret
class GreenOpenDescriptor(OpenDescriptor):
def open_raw(self):
if self.is_fd():
fileio = GreenFileDescriptorIO(self.fobj, self, closefd=self.closefd)
else:
closefd = False
# Either an existing file object or a path string (which
# we open to get a file object). In either case, the other object
# owns the descriptor and we must not close it.
closefd = False
if hasattr(self.fobj, 'fileno'):
raw = self.fobj
else:
raw = OpenDescriptor.open_raw(self)
fileno = raw.fileno()
fileio = GreenFileDescriptorIO(fileno, self, closefd=closefd)
fileio._keep_alive = raw
return fileio
_marker = object()
class FileObjectPosix(FileObjectBase):
"""
FileObjectPosix()
A file-like object that operates on non-blocking files but
provides a synchronous, cooperative interface.
......@@ -213,11 +256,6 @@ class FileObjectPosix(FileObjectBase):
:func:`~gevent.os.tp_read` and :func:`~gevent.os.tp_write` to bypass this
concern.
.. note::
Random read/write (e.g., ``mode='rwb'``) is not supported.
For that, use :class:`io.BufferedRWPair` around two instance of this
class.
.. tip::
Although this object provides a :meth:`fileno` method and so
can itself be passed to :func:`fcntl.fcntl`, setting the
......@@ -238,41 +276,6 @@ class FileObjectPosix(FileObjectBase):
better file-like semantics (and portability to Python 3).
.. versionchanged:: 1.2a1
Document the ``fileio`` attribute for non-blocking reads.
.. versionchanged:: 1.5
The default value for *mode* was changed from ``rb`` to ``r``.
.. versionchanged:: 1.5
Support strings and ``PathLike`` objects for ``fobj`` on all versions
of Python. Note that caution above.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* argument.
"""
#: platform specific default for the *bufsize* parameter
default_bufsize = io.DEFAULT_BUFFER_SIZE
def __init__(self, fobj, mode='r', bufsize=-1, close=True,
encoding=None, errors=None, newline=_marker):
# pylint:disable=too-many-locals
"""
:param fobj: Either an integer fileno, or an object supporting the
usual :meth:`socket.fileno` method. The file *will* be
put in non-blocking mode using :func:`gevent.os.make_nonblocking`.
:keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb"
(where the "b" or "U" can be omitted).
If "U" is part of the mode, universal newlines will be used. On Python 2,
if 't' is not in the mode, this will result in returning byte (native) strings;
putting 't' in the mode will return text (unicode) strings. This may cause
:exc:`UnicodeDecodeError` to be raised.
:keyword int bufsize: If given, the size of the buffer to use. The default
value means to use a platform-specific default
Other values are interpreted as for the :mod:`io` package.
Buffering is ignored in text mode.
.. versionchanged:: 1.3a1
On Python 2, enabling universal newlines no longer forces unicode
IO.
.. versionchanged:: 1.2a1
A bufsize of 0 in write mode is no longer forced to be 1.
......@@ -281,73 +284,27 @@ class FileObjectPosix(FileObjectBase):
bufsize of 0 was flushed when a newline was written, while
in gevent 1.1 it was flushed when more than one byte was
written. Note that this may have performance impacts.
.. versionchanged:: 1.3a1
On Python 2, enabling universal newlines no longer forces unicode
IO.
.. versionchanged:: 1.5
The default value for *mode* was changed from ``rb`` to ``r``. This is consistent
with :func:`open`, :func:`io.open`, and :class:`~.FileObjectThread`, which is the
default ``FileObject`` on some platforms.
.. versionchanged:: 1.5
Stop forcing buffering. Previously, given a ``buffering=0`` argument,
*buffering* would be set to 1, and ``buffering=1`` would be forced to
the default buffer size. This was a workaround for a long-standing concurrency
issue. Now the *buffering* argument is interpreted as intended.
"""
if isinstance(fobj, int):
fileno = fobj
fobj = None
else:
# The underlying object, if we have to open it, should be
# in binary mode. We'll take care of any coding needed.
raw_mode = mode.replace('t', 'b')
raw_mode = raw_mode + 'b' if 'b' not in raw_mode else raw_mode
new_fobj = self._open_raw(fobj, raw_mode, bufsize, closefd=False)
if new_fobj is not fobj:
close = True
fobj = new_fobj
fileno = fobj.fileno()
self.__fobj = fobj
assert isinstance(fileno, int)
# We want text mode if:
# - We're on Python 3, and no 'b' is in the mode.
# - A 't' is in the mode on any version.
# - We're on Python 2 and no 'b' is in the mode, and an encoding or errors is
# given.
text_mode = (
't' in mode
or (PY3 and 'b' not in mode)
or (PY2 and 'b' not in mode and (encoding, errors) != (None, None))
)
if text_mode:
self._translate = True
self._translate_newline = os.linesep if newline is _marker else newline
self._translate_encoding = encoding
self._transalate_errors = errors
if 'U' in mode:
self._translate = True
self._translate_newline = None
if PY2 and not text_mode:
self._translate_mode = 'byte_newlines'
self._orig_bufsize = bufsize
if bufsize < 0 or bufsize == 1:
bufsize = self.default_bufsize
elif bufsize == 0:
bufsize = 1
if mode[0] == 'r':
IOFamily = BufferedReader
else:
assert mode[0] == 'w'
IOFamily = BufferedWriter
if self._orig_bufsize == 0:
# We could also simply pass self.fileio as *io*, but this way
# we at least consistently expose a BufferedWriter in our *io*
# attribute.
IOFamily = FlushingBufferedWriter
default_bufsize = DEFAULT_BUFFER_SIZE
def __init__(self, *args, **kwargs):
descriptor = GreenOpenDescriptor(*args, **kwargs)
# This attribute is documented as available for non-blocking reads.
self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close)
buffered_fobj = IOFamily(self.fileio, bufsize)
super(FileObjectPosix, self).__init__(buffered_fobj, close)
self.fileio, buffered_fobj = descriptor.open_raw_and_wrapped()
super(FileObjectPosix, self).__init__(buffered_fobj, descriptor.closefd)
def _do_close(self, fobj, closefd):
try:
......@@ -355,14 +312,5 @@ class FileObjectPosix(FileObjectBase):
# self.fileio already knows whether or not to close the
# file descriptor
self.fileio.close()
if closefd and self.__fobj is not None:
try:
self.__fobj.close()
except IOError:
pass
finally:
self.__fobj = None
self.fileio = None
def __iter__(self):
return self._io
......@@ -261,7 +261,9 @@ class socket(object):
except the only mode characters supported are 'r', 'w' and 'b'.
The semantics are similar too.
"""
# (XXX refactor to share code?)
# XXX refactor to share code? We ought to be able to use our FileObject,
# adding the appropriate amount of refcounting. At the very least we can use our
# OpenDescriptor to handle the parsing.
for c in mode:
if c not in {"r", "w", "b"}:
raise ValueError("invalid mode %r (only r, w, b allowed)")
......
......@@ -56,14 +56,16 @@ if 'namedtuple' in __all__:
# See notes in _socket2.py. Python 3 returns much nicer
# `io` object wrapped around a SocketIO class.
assert not hasattr(__ssl__._fileobject, '__enter__') # pylint:disable=no-member
if hasattr(__ssl__, '_fileobject'):
assert not hasattr(__ssl__._fileobject, '__enter__') # pylint:disable=no-member
class _fileobject(__ssl__._fileobject): # pylint:disable=no-member
class _fileobject(getattr(__ssl__, '_fileobject', object)): # pylint:disable=no-member
def __enter__(self):
return self
def __exit__(self, *args):
# pylint:disable=no-member
if not self.closed:
self.close()
......@@ -96,14 +98,14 @@ def create_default_context(purpose=Purpose.SERVER_AUTH, cafile=None,
context = SSLContext(PROTOCOL_SSLv23)
# SSLv2 considered harmful.
context.options |= OP_NO_SSLv2
context.options |= OP_NO_SSLv2 # pylint:disable=no-member
# SSLv3 has problematic security and is only required for really old
# clients such as IE6 on Windows XP
context.options |= OP_NO_SSLv3
context.options |= OP_NO_SSLv3 # pylint:disable=no-member
# disable compression to prevent CRIME attacks (OpenSSL 1.0+)
context.options |= getattr(_ssl, "OP_NO_COMPRESSION", 0)
context.options |= getattr(_ssl, "OP_NO_COMPRESSION", 0) # pylint:disable=no-member
if purpose == Purpose.SERVER_AUTH:
# verify certs and host name in client mode
......@@ -112,11 +114,11 @@ def create_default_context(purpose=Purpose.SERVER_AUTH, cafile=None,
elif purpose == Purpose.CLIENT_AUTH:
# Prefer the server's ciphers by default so that we get stronger
# encryption
context.options |= getattr(_ssl, "OP_CIPHER_SERVER_PREFERENCE", 0)
context.options |= getattr(_ssl, "OP_CIPHER_SERVER_PREFERENCE", 0) # pylint:disable=no-member
# Use single use keys in order to improve forward secrecy
context.options |= getattr(_ssl, "OP_SINGLE_DH_USE", 0)
context.options |= getattr(_ssl, "OP_SINGLE_ECDH_USE", 0)
context.options |= getattr(_ssl, "OP_SINGLE_DH_USE", 0) # pylint:disable=no-member
context.options |= getattr(_ssl, "OP_SINGLE_ECDH_USE", 0) # pylint:disable=no-member
# disallow ciphers with known vulnerabilities
context.set_ciphers(_RESTRICTED_SERVER_CIPHERS)
......@@ -146,10 +148,10 @@ def _create_unverified_context(protocol=PROTOCOL_SSLv23, cert_reqs=None,
context = SSLContext(protocol)
# SSLv2 considered harmful.
context.options |= OP_NO_SSLv2
context.options |= OP_NO_SSLv2 # pylint:disable=no-member
# SSLv3 has problematic security and is only required for really old
# clients such as IE6 on Windows XP
context.options |= OP_NO_SSLv3
context.options |= OP_NO_SSLv3 # pylint:disable=no-member
if cert_reqs is not None:
context.verify_mode = cert_reqs
......
"""
Wrappers to make file-like objects cooperative.
.. class:: FileObject
.. class:: FileObject(fobj, mode='r', buffering=-1, closefd=True, encoding=None, errors=None, newline=None)
The main entry point to the file-like gevent-compatible behaviour. It will be defined
to be the best available implementation.
The main entry point to the file-like gevent-compatible behaviour. It
will be defined to be the best available implementation.
All the parameters are as for :func:`io.open`.
:param fobj: Usually a file descriptor of a socket. Can also be
another object with a ``fileno()`` method, or an object that can
be passed to ``io.open()`` (e.g., a file system path). If the object
is not a socket, the results will vary based on the platform and the
type of object being opened.
All supported versions of Python allow :class:`os.PathLike` objects.
.. versionchanged:: 1.5
Accept str and ``PathLike`` objects for *fobj* on all versions of Python.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* arguments.
.. versionchanged:: 1.5
Accept *closefd* and *buffering* instead of *close* and *bufsize* arguments.
The latter remain for backwards compatibility.
There are two main implementations of ``FileObject``. On all systems,
there is :class:`FileObjectThread` which uses the built-in native
......@@ -12,9 +30,11 @@ threadpool to avoid blocking the entire interpreter. On UNIX systems
(those that support the :mod:`fcntl` module), there is also
:class:`FileObjectPosix` which uses native non-blocking semantics.
A third class, :class:`FileObjectBlock`, is simply a wrapper that executes everything
synchronously (and so is not gevent-compatible). It is provided for testing and debugging
purposes.
A third class, :class:`FileObjectBlock`, is simply a wrapper that
executes everything synchronously (and so is not gevent-compatible).
It is provided for testing and debugging purposes.
All classes have the same signature; some may accept extra keyword arguments.
Configuration
=============
......@@ -27,7 +47,9 @@ You may also set it to the fully qualified class name of another
object that implements the file interface to use one of your own
objects.
.. note:: The environment variable must be set at the time this module
.. note::
The environment variable must be set at the time this module
is first imported.
Classes
......@@ -58,4 +80,6 @@ from gevent._fileobjectcommon import FileObjectBlock
# None of the possible objects can live in this module because
# we would get an import cycle and the config couldn't be set from code.
# TODO: zope.hookable would be great for allowing this to be imported
# without requiring configuration but still being very fast.
FileObject = config.fileobject
......@@ -37,7 +37,9 @@ import os
import signal
import sys
import traceback
from gevent.event import AsyncResult
from gevent.exceptions import ConcurrentObjectUseError
from gevent.hub import _get_hub_noargs as get_hub
from gevent.hub import linkproxy
from gevent.hub import sleep
......@@ -428,7 +430,7 @@ else:
_set_inheritable = lambda i, v: True
def FileObject(*args):
def FileObject(*args, **kwargs):
# Defer importing FileObject until we need it
# to allow it to be configured more easily.
from gevent.fileobject import FileObject as _FileObject
......@@ -611,26 +613,20 @@ class Popen(object):
if PY3 and text_mode:
# Under Python 3, if we left on the 'b' we'd get different results
# depending on whether we used FileObjectPosix or FileObjectThread
self.stdin = FileObject(p2cwrite, 'wb', bufsize)
self.stdin.translate_newlines(None,
write_through=True,
line_buffering=(bufsize == 1),
self.stdin = FileObject(p2cwrite, 'w', bufsize,
encoding=self.encoding, errors=self.errors)
else:
self.stdin = FileObject(p2cwrite, 'wb', bufsize)
if c2pread != -1:
if universal_newlines or text_mode:
if PY3:
# FileObjectThread doesn't support the 'U' qualifier
# with a bufsize of 0
self.stdout = FileObject(c2pread, 'rb', bufsize)
self.stdout = FileObject(c2pread, 'r', bufsize,
encoding=self.encoding, errors=self.errors)
# NOTE: Universal Newlines are broken on Windows/Py3, at least
# in some cases. This is true in the stdlib subprocess module
# as well; the following line would fix the test cases in
# test__subprocess.py that depend on python_universal_newlines,
# but would be inconsistent with the stdlib:
#msvcrt.setmode(self.stdout.fileno(), os.O_TEXT)
self.stdout.translate_newlines('r', encoding=self.encoding, errors=self.errors)
else:
self.stdout = FileObject(c2pread, 'rU', bufsize)
else:
......@@ -638,8 +634,8 @@ class Popen(object):
if errread != -1:
if universal_newlines or text_mode:
if PY3:
self.stderr = FileObject(errread, 'rb', bufsize)
self.stderr.translate_newlines(None, encoding=encoding, errors=errors)
self.stderr = FileObject(errread, 'r', bufsize,
encoding=encoding, errors=errors)
else:
self.stderr = FileObject(errread, 'rU', bufsize)
else:
......@@ -756,7 +752,13 @@ class Popen(object):
def _read():
try:
data = pipe.read()
except RuntimeError:
except (
# io.Buffered* can raise RuntimeError: 'reentrant call'
RuntimeError,
# unbuffered Posix IO that we're already waiting on
# can raise this. Closing the pipe will free those greenlets up.
ConcurrentObjectUseError
):
return
if not data:
return
......
......@@ -24,9 +24,13 @@ import sys
import gevent.core
from gevent import _compat as gsysinfo
VERBOSE = sys.argv.count('-v') > 1
# Python implementations
PYPY = gsysinfo.PYPY
CPYTHON = not PYPY
VERBOSE = sys.argv.count('-v') > 1
# Platform/operating system
WIN = gsysinfo.WIN
LINUX = gsysinfo.LINUX
OSX = gsysinfo.OSX
......
from __future__ import print_function
import functools
import gc
import io
import os
import sys
import tempfile
import gc
import unittest
import gevent
from gevent import fileobject
from gevent._fileobjectcommon import OpenDescriptor
from gevent._compat import PY2
from gevent._compat import PY3
from gevent._compat import text_type
import gevent.testing as greentest
from gevent.testing.sysinfo import PY3
from gevent.testing.flaky import reraiseFlakyTestRaceConditionLibuv
from gevent.testing.skipping import skipOnLibuvOnCIOnPyPy
from gevent.testing.skipping import skipOnLibuv
from gevent.testing import sysinfo
try:
ResourceWarning
......@@ -35,8 +39,18 @@ def close_fd_quietly(fd):
except (IOError, OSError):
pass
def skipUnlessWorksWithRegularFiles(func):
@functools.wraps(func)
def f(self):
if not self.WORKS_WITH_REGULAR_FILES:
self.skipTest("Doesn't work with regular files")
func(self)
return f
class TestFileObjectBlock(greentest.TestCase): # serves as a base for the concurrent tests too
WORKS_WITH_REGULAR_FILES = True
def _getTargetClass(self):
return fileobject.FileObjectBlock
......@@ -86,8 +100,7 @@ class TestFileObjectBlock(greentest.TestCase): # serves as a base for the concur
def test_del_close(self):
self._test_del(close=True)
@skipOnLibuvOnCIOnPyPy("This appears to crash on libuv/pypy/travis.")
# No idea why, can't duplicate locally.
@skipUnlessWorksWithRegularFiles
def test_seek(self):
fileno, path = tempfile.mkstemp('.gevent.test__fileobject.test_seek')
self.addCleanup(os.remove, path)
......@@ -102,16 +115,7 @@ class TestFileObjectBlock(greentest.TestCase): # serves as a base for the concur
native_data = f.read(1024)
with open(path, 'rb') as f_raw:
try:
f = self._makeOne(f_raw, 'rb', close=False)
except ValueError:
# libuv on Travis can raise EPERM
# from FileObjectPosix. I can't produce it on mac os locally,
# don't know what the issue is. This started happening on Jan 19,
# in the branch that caused all watchers to be explicitly closed.
# That shouldn't have any effect on io watchers, though, which were
# already being explicitly closed.
reraiseFlakyTestRaceConditionLibuv()
if PY3 or hasattr(f, 'seekable'):
# On Python 3, all objects should have seekable.
......@@ -128,33 +132,66 @@ class TestFileObjectBlock(greentest.TestCase): # serves as a base for the concur
self.assertEqual(native_data, s)
self.assertEqual(native_data, fileobj_data)
def test_str_default_to_native(self):
# With no 'b' or 't' given, read and write native str.
fileno, path = tempfile.mkstemp('.gevent_test_str_default')
def __check_native_matches(self, byte_data, open_mode,
meth='read', **open_kwargs):
fileno, path = tempfile.mkstemp('.gevent_test_' + open_mode)
self.addCleanup(os.remove, path)
os.write(fileno, b'abcdefg')
os.write(fileno, byte_data)
os.close(fileno)
with open(path, 'r') as f:
native_data = f.read()
with io.open(path, open_mode, **open_kwargs) as f:
native_data = getattr(f, meth)()
with self._makeOne(path, 'r') as f:
gevent_data = f.read()
with self._makeOne(path, open_mode, **open_kwargs) as f:
gevent_data = getattr(f, meth)()
self.assertEqual(native_data, gevent_data)
return gevent_data
def test_text_encoding(self):
fileno, path = tempfile.mkstemp('.gevent_test_str_default')
self.addCleanup(os.remove, path)
@skipUnlessWorksWithRegularFiles
def test_str_default_to_native(self):
# With no 'b' or 't' given, read and write native str.
gevent_data = self.__check_native_matches(b'abcdefg', 'r')
self.assertIsInstance(gevent_data, str)
os.write(fileno, u'\N{SNOWMAN}'.encode('utf-8'))
os.close(fileno)
@skipUnlessWorksWithRegularFiles
def test_text_encoding(self):
gevent_data = self.__check_native_matches(
u'\N{SNOWMAN}'.encode('utf-8'),
'r+',
buffering=5, encoding='utf-8'
)
self.assertIsInstance(gevent_data, text_type)
@skipUnlessWorksWithRegularFiles
def test_does_not_leak_on_exception(self):
# If an exception occurs during opening,
# everything still gets cleaned up.
pass
with self._makeOne(path, 'r+', bufsize=5, encoding='utf-8') as f:
gevent_data = f.read()
@skipUnlessWorksWithRegularFiles
def test_rbU_produces_bytes(self):
# Including U in rb still produces bytes.
# Note that the universal newline behaviour is
# essentially ignored in explicit bytes mode.
gevent_data = self.__check_native_matches(
b'line1\nline2\r\nline3\rlastline\n\n',
'rbU',
meth='readlines',
)
self.assertIsInstance(gevent_data[0], bytes)
self.assertEqual(len(gevent_data), 4)
@skipUnlessWorksWithRegularFiles
def test_rU_produces_native(self):
gevent_data = self.__check_native_matches(
b'line1\nline2\r\nline3\rlastline\n\n',
'rU',
meth='readlines',
)
self.assertIsInstance(gevent_data[0], str)
self.assertEqual(u'\N{SNOWMAN}', gevent_data)
def test_close_pipe(self):
# Issue #190, 203
......@@ -264,6 +301,12 @@ class TestFileObjectThread(ConcurrentFileObjectMixin,
class TestFileObjectPosix(ConcurrentFileObjectMixin,
TestFileObjectBlock):
if sysinfo.LIBUV and sysinfo.LINUX:
# On Linux, initializing the watcher for a regular
# file results in libuv raising EPERM. But that works
# fine on other platforms.
WORKS_WITH_REGULAR_FILES = False
def _getTargetClass(self):
return fileobject.FileObjectPosix
......@@ -293,14 +336,6 @@ class TestFileObjectPosix(ConcurrentFileObjectMixin,
self.assertEqual(io_ex.args, os_ex.args)
self.assertEqual(str(io_ex), str(os_ex))
@skipOnLibuv("libuv on linux raises EPERM ") # but works fine on macOS
def test_str_default_to_native(self):
TestFileObjectBlock.test_str_default_to_native(self)
@skipOnLibuv("libuv in linux raises EPERM")
def test_text_encoding(self):
TestFileObjectBlock.test_text_encoding(self)
class TestTextMode(unittest.TestCase):
def test_default_mode_writes_linesep(self):
......@@ -323,6 +358,39 @@ class TestTextMode(unittest.TestCase):
self.assertEqual(data, os.linesep.encode('ascii'))
class TestOpenDescriptor(greentest.TestCase):
def _makeOne(self, *args, **kwargs):
return OpenDescriptor(*args, **kwargs)
def _check(self, regex, kind, *args, **kwargs):
with self.assertRaisesRegex(kind, regex):
self._makeOne(*args, **kwargs)
case = lambda re, **kwargs: (re, TypeError, kwargs)
vase = lambda re, **kwargs: (re, ValueError, kwargs)
CASES = (
case('mode', mode=42),
case('buffering', buffering='nope'),
case('encoding', encoding=42),
case('errors', errors=42),
vase('mode', mode='aoeug'),
vase('mode U cannot be combined', mode='wU'),
vase('text and binary', mode='rtb'),
vase('append mode at once', mode='rw'),
vase('exactly one', mode='+'),
vase('take an encoding', mode='rb', encoding='ascii'),
vase('take an errors', mode='rb', errors='strict'),
vase('take a newline', mode='rb', newline='\n'),
)
def pop():
for regex, kind, kwargs in TestOpenDescriptor.CASES:
setattr(
TestOpenDescriptor, 'test_' + regex,
lambda self, _re=regex, _kind=kind, _kw=kwargs: self._check(_re, _kind, 1, **_kw)
)
pop()
if __name__ == '__main__':
......
......@@ -57,7 +57,7 @@ class BlockingTestMixin(object):
self.fail("blocking function '%r' appeared not to block" %
block_func)
self.t.join(10) # make sure the thread terminates
if self.t.isAlive():
if self.t.is_alive():
self.fail("trigger function '%r' appeared to not return" %
trigger_func)
return self.result
......@@ -72,7 +72,7 @@ class BlockingTestMixin(object):
block_func(*block_args)
finally:
self.t.join(10) # make sure the thread terminates
if self.t.isAlive():
if self.t.is_alive():
self.fail("trigger function '%r' appeared to not return" %
trigger_func)
if not self.t.startedEvent.isSet():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment