Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gevent
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
gevent
Commits
5bf0ca88
Commit
5bf0ca88
authored
Dec 02, 2019
by
Jason Madden
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Consistently handle text and binary modes in the three FileObject classes.
And add tests. Fixes #1441.
parent
625c88b0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
214 additions
and
75 deletions
+214
-75
CHANGES.rst
CHANGES.rst
+13
-0
src/gevent/_fileobjectcommon.py
src/gevent/_fileobjectcommon.py
+87
-35
src/gevent/_fileobjectposix.py
src/gevent/_fileobjectposix.py
+54
-25
src/gevent/tests/test__fileobject.py
src/gevent/tests/test__fileobject.py
+60
-15
No files found.
CHANGES.rst
View file @
5bf0ca88
...
...
@@ -11,6 +11,19 @@
3.5.9, 3.6.9, 3.7.5 and 3.8.0 (final). It is also tested with PyPy2
7.2 and PyPy 3.6 7.2
- The file objects (FileObjectPosix, FileObjectThread) now
consistently text and binary modes. If neither 'b' nor 't' is given
in the mode, they will read and write native strings. If 't' is
given, they will always work with unicode strings, and 'b' will
always work with byte strings. See :issue:`1441`.
- The file objects accept *encoding*, *errors* and *newline*
arguments. On Python 2, these are only used if 't' is in the mode.
- The default mode for FileObjectPosix changed from ``rb`` to simply
``r``, for consistency with the other file objects and the standard
``open`` and ``io.open`` functions.
1.5a2 (2019-10-21)
==================
...
...
src/gevent/_fileobjectcommon.py
View file @
5bf0ca88
...
...
@@ -5,8 +5,7 @@ try:
except
ImportError
:
EBADF
=
9
import
os
from
io
import
TextIOWrapper
import
io
import
functools
import
sys
...
...
@@ -14,6 +13,7 @@ import sys
from
gevent.hub
import
_get_hub_noargs
as
get_hub
from
gevent._compat
import
integer_types
from
gevent._compat
import
reraise
from
gevent._compat
import
fspath
from
gevent.lock
import
Semaphore
,
DummySemaphore
class
cancel_wait_ex
(
IOError
):
...
...
@@ -32,7 +32,8 @@ class FileObjectClosed(IOError):
class
FileObjectBase
(
object
):
"""
Internal base class to ensure a level of consistency
between FileObjectPosix and FileObjectThread
between :class:`~.FileObjectPosix`, :class:`~.FileObjectThread`
and :class:`~.FileObjectBlock`.
"""
# List of methods we delegate to the wrapping IO object, if they
...
...
@@ -60,17 +61,18 @@ class FileObjectBase(object):
)
# Whether we are translating universal newlines or not.
# Whether we should apply a TextWrapper (the names are historical).
# Subclasses should set these before calling our constructor.
_translate
=
False
_translate_encoding
=
None
_translate_errors
=
None
_translate_newline
=
None
# None means universal
def
__init__
(
self
,
io
,
closefd
):
def
__init__
(
self
,
fobj
,
closefd
):
"""
:param
io
: An io.IOBase-like object.
:param
fobj
: An io.IOBase-like object.
"""
self
.
_io
=
io
self
.
_io
=
fobj
# We don't actually use this property ourself, but we save it (and
# pass it along) for compatibility.
self
.
_close
=
closefd
...
...
@@ -78,7 +80,9 @@ class FileObjectBase(object):
if
self
.
_translate
:
# This automatically handles delegation by assigning to
# self.io
self
.
translate_newlines
(
None
,
self
.
_translate_encoding
,
self
.
_translate_errors
)
self
.
translate_newlines
(
None
,
self
.
_translate_encoding
,
self
.
_translate_errors
)
else
:
self
.
_do_delegate_methods
()
...
...
@@ -108,7 +112,7 @@ class FileObjectBase(object):
return
method
def
translate_newlines
(
self
,
mode
,
*
text_args
,
**
text_kwargs
):
wrapper
=
TextIOWrapper
(
self
.
_io
,
*
text_args
,
**
text_kwargs
)
wrapper
=
io
.
TextIOWrapper
(
self
.
_io
,
*
text_args
,
**
text_kwargs
)
if
mode
:
wrapper
.
mode
=
mode
self
.
io
=
wrapper
...
...
@@ -123,9 +127,9 @@ class FileObjectBase(object):
if
self
.
_io
is
None
:
return
io
=
self
.
_io
fobj
=
self
.
_io
self
.
_io
=
None
self
.
_do_close
(
io
,
self
.
_close
)
self
.
_do_close
(
fobj
,
self
.
_close
)
def
_do_close
(
self
,
fobj
,
closefd
):
raise
NotImplementedError
()
...
...
@@ -147,22 +151,71 @@ class FileObjectBase(object):
def
__exit__
(
self
,
*
args
):
self
.
close
()
@
classmethod
def
_open_raw
(
cls
,
fobj
,
mode
=
'r'
,
buffering
=-
1
,
encoding
=
None
,
errors
=
None
,
newline
=
None
,
closefd
=
True
):
"""
Uses :func:`io.open` on *fobj* and returns the IO object.
This is a compatibility wrapper for Python 2 and Python 3. It
supports PathLike objects for *fobj* on all versions.
If the object is already an object with a ``fileno`` method,
it is returned unchanged.
On Python 2, if the mode only specifies read, write or append,
and encoding and errors are not specified, then
:obj:`io.FileIO` is used to get the IO object. This ensures we
return native strings unless explicitly requested.
.. versionchanged: 1.5
Support closefd for Python 2 native string readers.
"""
if
hasattr
(
fobj
,
'fileno'
):
return
fobj
if
not
isinstance
(
fobj
,
integer_types
):
# Not an integer. Support PathLike on Python 2 and Python <= 3.5.
fobj
=
fspath
(
fobj
)
closefd
=
True
if
bytes
is
str
and
mode
in
(
'r'
,
'r+'
,
'w'
,
'w+'
,
'a'
,
'a+'
)
\
and
encoding
is
None
and
errors
is
None
:
# Python 2, default open. Return native str type, not unicode, which
# is what would happen with io.open('r'), but we don't want to open the file
# in binary mode since that skips newline conversion.
fobj
=
io
.
FileIO
(
fobj
,
mode
,
closefd
=
closefd
)
if
'+'
in
mode
:
BufFactory
=
io
.
BufferedRandom
elif
mode
[
0
]
==
'r'
:
BufFactory
=
io
.
BufferedReader
else
:
BufFactory
=
io
.
BufferedWriter
if
buffering
==
-
1
:
fobj
=
BufFactory
(
fobj
)
elif
buffering
!=
0
:
fobj
=
BufFactory
(
fobj
,
buffering
)
else
:
# Python 3, or we have a mode that Python 2's os.fdopen/open can't handle
# (x) or they explicitly asked for binary or text mode.
fobj
=
io
.
open
(
fobj
,
mode
,
buffering
,
encoding
,
errors
,
newline
,
closefd
)
return
fobj
class
FileObjectBlock
(
FileObjectBase
):
def
__init__
(
self
,
fobj
,
*
args
,
**
kwargs
):
closefd
=
kwargs
.
pop
(
'close'
,
True
)
if
kwargs
:
raise
TypeError
(
'Unexpected arguments: %r'
%
kwargs
.
keys
())
if
isinstance
(
fobj
,
integer_types
):
if
not
closefd
:
# we cannot do this, since fdopen object will close the descriptor
raise
TypeError
(
'FileObjectBlock does not support close=False on an fd.'
)
fobj
=
os
.
fdopen
(
fobj
,
*
args
)
closefd
=
kwargs
[
'closefd'
]
=
kwargs
.
pop
(
'close'
,
True
)
if
'bufsize'
in
kwargs
:
# compat with other constructors
kwargs
[
'buffering'
]
=
kwargs
.
pop
(
'bufsize'
)
fobj
=
self
.
_open_raw
(
fobj
,
*
args
,
**
kwargs
)
super
(
FileObjectBlock
,
self
).
__init__
(
fobj
,
closefd
)
def
_do_close
(
self
,
fobj
,
closefd
):
fobj
.
close
()
class
FileObjectThread
(
FileObjectBase
):
"""
A file-like object wrapping another file-like object, performing all blocking
...
...
@@ -176,12 +229,18 @@ class FileObjectThread(FileObjectBase):
The file object is closed using the threadpool. Note that whether or
not this action is synchronous or asynchronous is not documented.
.. versionchanged:: 1.5
Accept str and ``PathLike`` objects for *fobj* on all versions of Python.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* arguments.
"""
def
__init__
(
self
,
fobj
,
mode
=
None
,
bufsize
=-
1
,
close
=
True
,
threadpool
=
None
,
lock
=
True
):
def
__init__
(
self
,
fobj
,
mode
=
'r'
,
bufsize
=-
1
,
close
=
True
,
threadpool
=
None
,
lock
=
True
,
encoding
=
None
,
errors
=
None
,
newline
=
None
):
"""
:param fobj: The underlying file-like object to wrap, or an integer fileno
that will be pass to :func:`os.fdopen` along with *mode* and *bufsize*.
:param fobj: The underlying file-like object to wrap, or something
acceptable to :func:`io.open` (along with *mode* and *bufsize*, which is translated
to *buffering*).
:keyword bool lock: If True (the default) then all operations will
be performed one-by-one. Note that this does not guarantee that, if using
this file object from multiple threads/greenlets, operations will be performed
...
...
@@ -200,16 +259,9 @@ class FileObjectThread(FileObjectBase):
self
.
lock
=
DummySemaphore
()
if
not
hasattr
(
self
.
lock
,
'__enter__'
):
raise
TypeError
(
'Expected a Semaphore or boolean, got %r'
%
type
(
self
.
lock
))
if
isinstance
(
fobj
,
integer_types
):
if
not
closefd
:
# we cannot do this, since fdopen object will close the descriptor
raise
TypeError
(
'FileObjectThread does not support close=False on an fd.'
)
if
mode
is
None
:
assert
bufsize
==
-
1
,
"If you use the default mode, you can't choose a bufsize"
fobj
=
os
.
fdopen
(
fobj
)
else
:
fobj
=
os
.
fdopen
(
fobj
,
mode
,
bufsize
)
fobj
=
self
.
_open_raw
(
fobj
,
mode
,
bufsize
,
encoding
=
encoding
,
errors
=
errors
,
newline
=
newline
,
closefd
=
close
)
self
.
__io_holder
=
[
fobj
]
# signal for _wrap_method
super
(
FileObjectThread
,
self
).
__init__
(
fobj
,
closefd
)
...
...
@@ -244,8 +296,8 @@ class FileObjectThread(FileObjectBase):
def
_do_delegate_methods
(
self
):
super
(
FileObjectThread
,
self
).
_do_delegate_methods
()
if
not
hasattr
(
self
,
'read1'
)
and
'r'
in
getattr
(
self
.
_io
,
'mode'
,
''
):
self
.
read1
=
self
.
read
#
if not hasattr(self, 'read1') and 'r' in getattr(self._io, 'mode', ''):
#
self.read1 = self.read
self
.
__io_holder
[
0
]
=
self
.
_io
def
_extra_repr
(
self
):
...
...
src/gevent/_fileobjectposix.py
View file @
5bf0ca88
...
...
@@ -10,6 +10,8 @@ from io import RawIOBase
from
io
import
UnsupportedOperation
from
gevent._compat
import
reraise
from
gevent._compat
import
PY2
from
gevent._compat
import
PY3
from
gevent._fileobjectcommon
import
cancel_wait_ex
from
gevent._fileobjectcommon
import
FileObjectBase
from
gevent.hub
import
get_hub
...
...
@@ -183,6 +185,8 @@ class FlushingBufferedWriter(BufferedWriter):
return
ret
_marker
=
object
()
class
FileObjectPosix
(
FileObjectBase
):
"""
A file-like object that operates on non-blocking files but
...
...
@@ -234,12 +238,21 @@ class FileObjectPosix(FileObjectBase):
better file-like semantics (and portability to Python 3).
.. versionchanged:: 1.2a1
Document the ``fileio`` attribute for non-blocking reads.
.. versionchanged:: 1.5
The default value for *mode* was changed from ``rb`` to ``r``.
.. versionchanged:: 1.5
Support strings and ``PathLike`` objects for ``fobj`` on all versions
of Python. Note that caution above.
.. versionchanged:: 1.5
Add *encoding*, *errors* and *newline* argument.
"""
#: platform specific default for the *bufsize* parameter
default_bufsize
=
io
.
DEFAULT_BUFFER_SIZE
def
__init__
(
self
,
fobj
,
mode
=
'rb'
,
bufsize
=-
1
,
close
=
True
):
def
__init__
(
self
,
fobj
,
mode
=
'r'
,
bufsize
=-
1
,
close
=
True
,
encoding
=
None
,
errors
=
None
,
newline
=
_marker
):
# pylint:disable=too-many-locals
"""
:param fobj: Either an integer fileno, or an object supporting the
usual :meth:`socket.fileno` method. The file *will* be
...
...
@@ -248,7 +261,7 @@ class FileObjectPosix(FileObjectBase):
(where the "b" or "U" can be omitted).
If "U" is part of the mode, universal newlines will be used. On Python 2,
if 't' is not in the mode, this will result in returning byte (native) strings;
putting 't' in the mode will return text strings. This may cause
putting 't' in the mode will return text
(unicode)
strings. This may cause
:exc:`UnicodeDecodeError` to be raised.
:keyword int bufsize: If given, the size of the buffer to use. The default
value means to use a platform-specific default
...
...
@@ -274,15 +287,40 @@ class FileObjectPosix(FileObjectBase):
fileno
=
fobj
fobj
=
None
else
:
# The underlying object, if we have to open it, should be
# in binary mode. We'll take care of any coding needed.
raw_mode
=
mode
.
replace
(
't'
,
'b'
)
raw_mode
=
raw_mode
+
'b'
if
'b'
not
in
raw_mode
else
raw_mode
new_fobj
=
self
.
_open_raw
(
fobj
,
raw_mode
,
bufsize
,
closefd
=
False
)
if
new_fobj
is
not
fobj
:
close
=
True
fobj
=
new_fobj
fileno
=
fobj
.
fileno
()
if
not
isinstance
(
fileno
,
int
):
raise
TypeError
(
'fileno must be int: %r'
%
fileno
)
orig_mode
=
mode
mode
=
(
mode
or
'rb'
).
replace
(
'b'
,
''
)
self
.
__fobj
=
fobj
assert
isinstance
(
fileno
,
int
)
# We want text mode if:
# - We're on Python 3, and no 'b' is in the mode.
# - A 't' is in the mode on any version.
# - We're on Python 2 and no 'b' is in the mode, and an encoding or errors is
# given.
text_mode
=
(
't'
in
mode
or
(
PY3
and
'b'
not
in
mode
)
or
(
PY2
and
'b'
not
in
mode
and
(
encoding
,
errors
)
!=
(
None
,
None
))
)
if
text_mode
:
self
.
_translate
=
True
self
.
_translate_newline
=
os
.
linesep
if
newline
is
_marker
else
newline
self
.
_translate_encoding
=
encoding
self
.
_transalate_errors
=
errors
if
'U'
in
mode
:
self
.
_translate
=
True
if
bytes
is
str
and
't'
not
in
mode
:
self
.
_translate_newline
=
None
if
PY2
and
not
text_mode
:
# We're going to be producing unicode objects, but
# universal newlines doesn't do that in the stdlib,
# so fix that to return str objects. The fix is two parts:
...
...
@@ -301,20 +339,6 @@ class FileObjectPosix(FileObjectBase):
return
wrapped
return
m
self
.
_wrap_method
=
wrap_method
mode
=
mode
.
replace
(
'U'
,
''
)
else
:
self
.
_translate
=
False
mode
=
mode
.
replace
(
't'
,
''
)
if
len
(
mode
)
!=
1
and
mode
not
in
'rw'
:
# pragma: no cover
# Python 3 builtin `open` raises a ValueError for invalid modes;
# Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was
# enabled (which it usually was). Match Python 3 because it makes more sense
# and because __debug__ may not be enabled.
# NOTE: This is preventing a mode like 'rwb' for binary random access;
# that code was never tested and was explicitly marked as "not used"
raise
ValueError
(
'mode can only be [rb, rU, wb], not %r'
%
(
orig_mode
,))
self
.
_orig_bufsize
=
bufsize
...
...
@@ -323,10 +347,10 @@ class FileObjectPosix(FileObjectBase):
elif
bufsize
==
0
:
bufsize
=
1
if
mode
==
'r'
:
if
mode
[
0
]
==
'r'
:
IOFamily
=
BufferedReader
else
:
assert
mode
==
'w'
assert
mode
[
0
]
==
'w'
IOFamily
=
BufferedWriter
if
self
.
_orig_bufsize
==
0
:
# We could also simply pass self.fileio as *io*, but this way
...
...
@@ -335,7 +359,7 @@ class FileObjectPosix(FileObjectBase):
IOFamily
=
FlushingBufferedWriter
self
.
_fobj
=
fobj
# This attribute is documented as available for non-blocking reads.
self
.
fileio
=
GreenFileDescriptorIO
(
fileno
,
mode
,
closefd
=
close
)
...
...
@@ -349,8 +373,13 @@ class FileObjectPosix(FileObjectBase):
# self.fileio already knows whether or not to close the
# file descriptor
self
.
fileio
.
close
()
if
closefd
and
self
.
__fobj
is
not
None
:
try
:
self
.
__fobj
.
close
()
except
IOError
:
pass
finally
:
self
.
_fobj
=
None
self
.
_
_
fobj
=
None
self
.
fileio
=
None
def
__iter__
(
self
):
...
...
src/gevent/tests/test__fileobject.py
View file @
5bf0ca88
...
...
@@ -7,6 +7,7 @@ import unittest
import
gevent
from
gevent
import
fileobject
from
gevent._compat
import
PY2
import
gevent.testing
as
greentest
from
gevent.testing.sysinfo
import
PY3
...
...
@@ -21,7 +22,7 @@ except NameError:
"Python 2 fallback"
def
w
riter
(
fobj
,
line
):
def
W
riter
(
fobj
,
line
):
for
character
in
line
:
fobj
.
write
(
character
)
fobj
.
flush
()
...
...
@@ -34,7 +35,7 @@ def close_fd_quietly(fd):
except
(
IOError
,
OSError
):
pass
class
TestFileObjectBlock
(
greentest
.
TestCase
):
class
TestFileObjectBlock
(
greentest
.
TestCase
):
# serves as a base for the concurrent tests too
def
_getTargetClass
(
self
):
return
fileobject
.
FileObjectBlock
...
...
@@ -127,6 +128,34 @@ class TestFileObjectBlock(greentest.TestCase):
self
.
assertEqual
(
native_data
,
s
)
self
.
assertEqual
(
native_data
,
fileobj_data
)
def
test_str_default_to_native
(
self
):
# With no 'b' or 't' given, read and write native str.
fileno
,
path
=
tempfile
.
mkstemp
(
'.gevent_test_str_default'
)
self
.
addCleanup
(
os
.
remove
,
path
)
os
.
write
(
fileno
,
b'abcdefg'
)
os
.
close
(
fileno
)
with
open
(
path
,
'r'
)
as
f
:
native_data
=
f
.
read
()
with
self
.
_makeOne
(
path
,
'r'
)
as
f
:
gevent_data
=
f
.
read
()
self
.
assertEqual
(
native_data
,
gevent_data
)
def
test_text_encoding
(
self
):
fileno
,
path
=
tempfile
.
mkstemp
(
'.gevent_test_str_default'
)
self
.
addCleanup
(
os
.
remove
,
path
)
os
.
write
(
fileno
,
u'
\
N{SNOWMAN}
'
.
encode
(
'utf-8'
))
os
.
close
(
fileno
)
with
self
.
_makeOne
(
path
,
'r+'
,
bufsize
=
5
,
encoding
=
'utf-8'
)
as
f
:
gevent_data
=
f
.
read
()
self
.
assertEqual
(
u'
\
N{SNOWMAN}
'
,
gevent_data
)
def
test_close_pipe
(
self
):
# Issue #190, 203
r
,
w
=
os
.
pipe
()
...
...
@@ -140,14 +169,31 @@ class ConcurrentFileObjectMixin(object):
# Additional tests for fileobjects that cooperate
# and we have full control of the implementation
def
test_read1
(
self
):
def
test_read1
_binary_present
(
self
):
# Issue #840
r
,
w
=
os
.
pipe
()
x
=
self
.
_makeOne
(
r
)
y
=
self
.
_makeOne
(
w
,
'w'
)
self
.
_close_on_teardown
(
x
)
self
.
_close_on_teardown
(
y
)
self
.
assertTrue
(
hasattr
(
x
,
'read1'
))
reader
=
self
.
_makeOne
(
r
,
'rb'
)
self
.
_close_on_teardown
(
reader
)
writer
=
self
.
_makeOne
(
w
,
'w'
)
self
.
_close_on_teardown
(
writer
)
self
.
assertTrue
(
hasattr
(
reader
,
'read1'
),
dir
(
reader
))
def
test_read1_text_not_present
(
self
):
# Only defined for binary.
r
,
w
=
os
.
pipe
()
reader
=
self
.
_makeOne
(
r
,
'rt'
)
self
.
_close_on_teardown
(
reader
)
self
.
addCleanup
(
os
.
close
,
w
)
self
.
assertFalse
(
hasattr
(
reader
,
'read1'
),
dir
(
reader
))
def
test_read1_default
(
self
):
# If just 'r' is given, whether it has one or not
# depends on if we're Python 2 or 3.
r
,
w
=
os
.
pipe
()
self
.
addCleanup
(
os
.
close
,
w
)
reader
=
self
.
_makeOne
(
r
)
self
.
_close_on_teardown
(
reader
)
self
.
assertEqual
(
PY2
,
hasattr
(
reader
,
'read1'
))
def
test_bufsize_0
(
self
):
# Issue #840
...
...
@@ -168,7 +214,7 @@ class ConcurrentFileObjectMixin(object):
import
warnings
r
,
w
=
os
.
pipe
()
lines
=
[
b'line1
\
n
'
,
b'line2
\
r
'
,
b'line3
\
r
\
n
'
,
b'line4
\
r
\
n
line5'
,
b'
\
n
line6'
]
g
=
gevent
.
spawn
(
w
riter
,
self
.
_makeOne
(
w
,
'wb'
),
lines
)
g
=
gevent
.
spawn
(
W
riter
,
self
.
_makeOne
(
w
,
'wb'
),
lines
)
try
:
with
warnings
.
catch_warnings
():
...
...
@@ -188,13 +234,12 @@ class TestFileObjectThread(ConcurrentFileObjectMixin,
def
_getTargetClass
(
self
):
return
fileobject
.
FileObjectThread
# FileObjectThread uses os.fdopen() when passed a file-descriptor,
# which returns an object with a destructor that can't be
# bypassed, so we can't even create one that way
def
test_del_noclose
(
self
):
with
self
.
assertRaisesRegex
(
TypeError
,
'FileObjectThread does not support close=False on an fd.'
):
self
.
_test_del
(
close
=
False
)
# In the past, we used os.fdopen() when given a file descriptor,
# and that has a destructor that can't be bypassed, so
# close=false wasn't allowed. Now that we do everything with the
# io module, it is allowed.
self
.
_test_del
(
close
=
False
)
# We don't test this with FileObjectThread. Sometimes the
# visibility of the 'close' operation, which happens in a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment