Commit 422a7126 authored by Jason Madden's avatar Jason Madden

Performance and correctness improvements for Greenlets.

- Rework getting the stack, since we're no longer arbitrarily extending it. This is faster, and I found and fixed a bug on CPython.
- Let Cython do more of the work making sure we really have a hub as our parent.
parent 45d00068
......@@ -51,6 +51,19 @@
- Win: Make ``examples/process.py`` do something useful. See
:pr:`1378` by Robert Iannucci.
- Fix certain operations on a Greenlet in an invalid state (with an
invalid parent) to raise a `TypeError` sooner rather than an
`AttributeError` later. This is also slightly faster on CPython with
Cython. Inspired by :issue:`1363` as reported by Carson Ip. This
means that some extreme corner cases that might have passed by
replacing a Greenlet's parent with something that's not a gevent hub
now no longer will.
- Fix: The ``spawning_stack`` for Greenlets on CPython should now have
correct line numbers in more cases.
- Spawning greenlets can be up to 10% faster.
1.4.0 (2019-01-04)
==================
......
......@@ -39,7 +39,7 @@ cdef inline void greenlet_init():
cdef class WaitOperationsGreenlet(SwitchOutGreenletWithLoop):
# The Hub will extend this class.
cpdef wait(self, watcher)
cpdef cancel_wait(self, watcher, error, close_watcher=*)
cpdef _cancel_wait(self, watcher, error, close_watcher)
......
......@@ -21,7 +21,7 @@ _version_info = namedtuple('version_info',
#: .. deprecated:: 1.2
#: Use ``pkg_resources.parse_version(__version__)`` (or the equivalent
#: ``packaging.version.Version(__version__)``).
version_info = _version_info(1, 4, 0, 'dev', 0)
version_info = _version_info(1, 5, 0, 'dev', 0)
#: The human-readable PEP 440 version identifier.
#: Use ``pkg_resources.parse_version(__version__)`` or
......
......@@ -4,6 +4,7 @@ cimport cython
from gevent.__ident cimport IdentRegistry
from gevent.__hub_local cimport get_hub_noargs as get_hub
from gevent.__waiter cimport Waiter
from gevent.__greenlet_primitives cimport SwitchOutGreenletWithLoop
cdef bint _PYPY
cdef sys_getframe
......@@ -15,7 +16,12 @@ cdef InvalidSwitchError
cdef extern from "greenlet/greenlet.h":
ctypedef class greenlet.greenlet [object PyGreenlet]:
pass
# Defining this as a void* means we can't access it as a python attribute
# in the Python code; but we can't define it as a greenlet because that doesn't
# properly handle the case that it can be NULL. So instead we inline a getparent
# function that does the same thing as the green_getparent accessor but without
# going through the overhead of generic attribute lookup.
cdef void* parent
# These are actually macros and so much be included
# (defined) in each .pxd, as are the two functions
......@@ -27,6 +33,17 @@ cdef extern from "greenlet/greenlet.h":
cdef inline greenlet getcurrent():
return PyGreenlet_GetCurrent()
cdef inline object get_generic_parent(greenlet s):
# We don't use any typed functions on the return of this,
# so save the type check by making it just an object.
if s.parent != NULL:
return <object>s.parent
cdef inline SwitchOutGreenletWithLoop get_my_hub(greenlet s):
# Must not be called with s = None
if s.parent != NULL:
return <object>s.parent
cdef bint _greenlet_imported
cdef inline void greenlet_init():
......@@ -44,12 +61,25 @@ cdef extern from "frameobject.h":
ctypedef class types.FrameType [object PyFrameObject]:
cdef CodeType f_code
cdef int f_lineno
# We can't declare this in the object, because it's
# Accessing the f_lineno directly doesn't work. There is an accessor
# function, PyFrame_GetLineNumber that is needed to turn the raw line number
# into the executing line number.
# cdef int f_lineno
# We can't declare this in the object as an object, because it's
# allowed to be NULL, and Cython can't handle that.
# We have to go through the python machinery to get a
# proper None instead.
# cdef FrameType f_back
# proper None instead, or use an inline function.
cdef void* f_back
int PyFrame_GetLineNumber(FrameType frame)
@cython.nonecheck(False)
cdef inline FrameType get_f_back(FrameType frame):
if frame.f_back != NULL:
return <FrameType>frame.f_back
cdef inline int get_f_lineno(FrameType frame):
return PyFrame_GetLineNumber(frame)
cdef void _init()
......@@ -75,25 +105,20 @@ cdef class _Frame:
@cython.final
@cython.locals(frames=list,frame=FrameType)
cdef inline list _extract_stack(int limit)
@cython.final
@cython.locals(previous=_Frame, frame=tuple, f=_Frame)
cdef _Frame _Frame_from_list(list frames)
@cython.locals(frame=FrameType,
newest_Frame=_Frame,
newer_Frame=_Frame,
older_Frame=_Frame)
cdef inline _Frame _extract_stack(int limit)
cdef class Greenlet(greenlet):
cdef readonly object value
cdef readonly tuple args
cdef readonly dict kwargs
cdef readonly object spawning_greenlet
cdef readonly _Frame spawning_stack
cdef public dict spawn_tree_locals
# This is accessed with getattr() dynamically so it
# must be visible to Python
cdef readonly list _spawning_stack_frames
cdef list _links
cdef tuple _exc_info
cdef object _notifier
......@@ -108,10 +133,11 @@ cdef class Greenlet(greenlet):
cpdef rawlink(self, object callback)
cpdef str _formatinfo(self)
# This is a helper function for a @property getter;
# defining locals() for a @property doesn't seem to work.
@cython.locals(reg=IdentRegistry)
cdef _get_minimal_ident(self)
cdef bint __started_but_aborted(self)
cdef bint __start_cancelled_by_kill(self)
cdef bint __start_pending(self)
......@@ -132,8 +158,6 @@ cdef class Greenlet(greenlet):
# TypeError: wrap() takes exactly one argument (0 given)
# cpdef _raise_exception(self)
# Declare a bunch of imports as cdefs so they can
# be accessed directly as static vars without
# doing a module global lookup. This is especially important
......@@ -165,6 +189,7 @@ cdef class _dummy_event:
cdef _dummy_event _cancelled_start_event
cdef _dummy_event _start_completed_event
cpdef _kill(Greenlet glet, object exception, object waiter)
@cython.locals(diehards=list)
cdef _killall3(list greenlets, object exception, object waiter)
......
......@@ -54,9 +54,15 @@ class WaitOperationsGreenlet(SwitchOutGreenletWithLoop): # pylint:disable=undefi
try:
result = waiter.get()
if result is not waiter:
raise InvalidSwitchError('Invalid switch into %s: %r (expected %r)' % (
raise InvalidSwitchError(
'Invalid switch into %s: got %r (expected %r; waiting on %r with %r)' % (
getcurrent(), # pylint:disable=undefined-variable
result, waiter))
result,
waiter,
self,
watcher
)
)
finally:
watcher.stop()
......
......@@ -27,7 +27,7 @@ class ValuedWeakRef(ref):
class IdentRegistry(object):
"""
Maintains a unique mapping of (small) positive integer identifiers
Maintains a unique mapping of (small) non-negative integer identifiers
to objects that can be weakly referenced.
It is guaranteed that no two objects will have the the same
......
......@@ -25,7 +25,6 @@ from gevent._hub_primitives import wait_on_objects as wait
from gevent.timeout import Timeout
from gevent._config import config as GEVENT_CONFIG
from gevent._util import Lazy
from gevent._util import readproperty
from gevent._hub_local import get_hub_noargs as get_hub
from gevent import _waiter
......@@ -45,7 +44,20 @@ __all__ = [
locals()['getcurrent'] = __import__('greenlet').getcurrent
locals()['greenlet_init'] = lambda: None
locals()['Waiter'] = _waiter.Waiter
# With Cython, this raises a TypeError if the parent is *not*
# the hub (SwitchOutGreenletWithLoop); in pure-Python, we will
# very likely get an AttributeError immediately after when we access `loop`;
# The TypeError message is more informative on Python 2.
# This must ONLY be called when we know that `s` is not None and is in fact a greenlet
# object (e.g., when called on `self`)
locals()['get_my_hub'] = lambda s: s.parent
# This must also ONLY be called when we know that S is not None and is in fact a greenlet
# object (including the result of getcurrent())
locals()['get_generic_parent'] = lambda s: s.parent
# Frame access
locals()['get_f_back'] = lambda frame: frame.f_back
locals()['get_f_lineno'] = lambda frame: frame.f_lineno
if _PYPY:
import _continuation # pylint:disable=import-error
......@@ -113,21 +125,15 @@ class _Frame(object):
__slots__ = ('f_code', 'f_lineno', 'f_back')
def __init__(self, f_code, f_lineno, f_back):
self.f_code = f_code
self.f_lineno = f_lineno
self.f_back = f_back
def __init__(self):
self.f_code = None
self.f_back = None
self.f_lineno = 0
@property
def f_globals(self):
return None
def _Frame_from_list(frames):
previous = None
for frame in reversed(frames):
f = _Frame(frame[0], frame[1], previous)
previous = f
return previous
def _extract_stack(limit):
try:
......@@ -142,14 +148,26 @@ def _extract_stack(limit):
# See https://github.com/gevent/gevent/issues/1212
frame = None
frames = []
newest_Frame = None
newer_Frame = None
while limit and frame is not None:
limit -= 1
frames.append((frame.f_code, frame.f_lineno))
frame = frame.f_back
older_Frame = _Frame()
# Arguments are always passed to the constructor as Python objects,
# meaning we wind up boxing the f_lineno just to unbox it if we pass it.
# It's faster to simply assign once the object is created.
older_Frame.f_code = frame.f_code
older_Frame.f_lineno = get_f_lineno(frame) # pylint:disable=undefined-variable
if newer_Frame is not None:
newer_Frame.f_back = older_Frame
newer_Frame = older_Frame
if newest_Frame is None:
newest_Frame = newer_Frame
return frames
frame = get_f_back(frame) # pylint:disable=undefined-variable
return newest_Frame
_greenlet__init__ = greenlet.__init__
......@@ -180,6 +198,10 @@ class Greenlet(greenlet):
a false value to disable ``spawn_tree_locals``, ``spawning_greenlet``,
and ``spawning_stack``. The first two will be None in that case, and the
latter will be empty.
.. versionchanged:: 1.5
Greenlet objects are now more careful to verify that their ``parent`` is really
a gevent hub, raising a ``TypeError`` earlier instead of an ``AttributeError`` later.
"""
# The attributes are documented in the .rst file
......@@ -209,6 +231,15 @@ class Greenlet(greenlet):
# 2.7.14 : Mean +- std dev: 3.37 us +- 0.20 us
# PyPy2 5.10.0 : Mean +- std dev: 4.44 us +- 0.28 us
# Switching to reified frames and some more tuning gets us here:
# 3.7.2 : Mean +- std dev: 2.53 us +- 0.15 us
# 2.7.16 : Mean +- std dev: 2.35 us +- 0.12 us
# PyPy2 7.1 : Mean +- std dev: 11.6 us +- 0.4 us
# Compared to the released 1.4 (tested at the same time):
# 3.7.2 : Mean +- std dev: 3.21 us +- 0.32 us
# 2.7.16 : Mean +- std dev: 3.11 us +- 0.19 us
# PyPy2 7.1 : Mean +- std dev: 12.3 us +- 0.8 us
_greenlet__init__(self, None, get_hub())
......@@ -250,41 +281,44 @@ class Greenlet(greenlet):
self.spawn_tree_locals = spawner.spawn_tree_locals
except AttributeError:
self.spawn_tree_locals = {}
if spawner.parent is not None:
if get_generic_parent(spawner) is not None: # pylint:disable=undefined-variable
# The main greenlet has no parent.
# Its children get separate locals.
spawner.spawn_tree_locals = self.spawn_tree_locals
self._spawning_stack_frames = _extract_stack(self.spawning_stack_limit)
self.spawning_stack = _extract_stack(self.spawning_stack_limit)
# Don't copy the spawning greenlet's
# '_spawning_stack_frames' into ours. That's somewhat
# confusing, and, if we're not careful, a deep spawn tree
# can lead to excessive memory usage (an infinite spawning
# tree could lead to unbounded memory usage without care
# --- see https://github.com/gevent/gevent/issues/1371)
# The _spawning_stack_frames may be cleared out later if we access spawning_stack
else:
# None is the default for all of these in Cython, but we
# need to declare them for pure-Python mode.
self.spawning_greenlet = None
self.spawn_tree_locals = None
self._spawning_stack_frames = None
@Lazy
def spawning_stack(self):
# Store this in the __dict__. We don't use it from the C
# code. It's tempting to discard _spawning_stack_frames
# after this, but child greenlets may still be created
# that need it.
return _Frame_from_list(self._spawning_stack_frames or [])
self.spawning_stack = None
def _get_minimal_ident(self):
reg = self.parent.ident_registry
# Helper function for cython, to allow typing `reg` and making a
# C call to get_ident.
# If we're being accessed from a hub different than the one running
# us, aka get_hub() is not self.parent, then calling hub.ident_registry.get_ident()
# may be quietly broken: it's not thread safe.
# If our parent is no longer the hub for whatever reason, this will raise a
# AttributeError or TypeError.
hub = get_my_hub(self) # type:SwitchOutGreenletWithLoop pylint:disable=undefined-variable
reg = hub.ident_registry
return reg.get_ident(self)
@property
def minimal_ident(self):
"""
A small, unique integer that identifies this object.
A small, unique non-negative integer that identifies this object.
This is similar to :attr:`threading.Thread.ident` (and `id`)
in that as long as this object is alive, no other greenlet *in
......@@ -294,10 +328,16 @@ class Greenlet(greenlet):
will be available for reuse.
To get ids that are unique across all hubs, combine this with
the hub's ``minimal_ident``.
the hub's (``self.parent``) ``minimal_ident``.
Accessing this property from threads other than the thread running
this greenlet is not defined.
.. versionadded:: 1.3a2
"""
# Not @Lazy, implemented manually because _ident is in the structure
# of the greenlet for fast access
if self._ident is None:
self._ident = self._get_minimal_ident()
return self._ident
......@@ -324,7 +364,8 @@ class Greenlet(greenlet):
@property
def loop(self):
# needed by killall
return self.parent.loop
hub = get_my_hub(self) # type:SwitchOutGreenletWithLoop pylint:disable=undefined-variable
return hub.loop
def __nonzero__(self):
return self._start_event is not None and self._exc_info is None
......@@ -526,7 +567,8 @@ class Greenlet(greenlet):
"""Schedule the greenlet to run in this loop iteration"""
if self._start_event is None:
_call_spawn_callbacks(self)
self._start_event = self.parent.loop.run_callback(self.switch)
hub = get_my_hub(self) # type:SwitchOutGreenletWithLoop pylint:disable=undefined-variable
self._start_event = hub.loop.run_callback(self.switch)
def start_later(self, seconds):
"""
......@@ -537,7 +579,8 @@ class Greenlet(greenlet):
"""
if self._start_event is None:
_call_spawn_callbacks(self)
self._start_event = self.parent.loop.timer(seconds)
hub = get_my_hub(self) # pylint:disable=undefined-variable
self._start_event = hub.loop.timer(seconds)
self._start_event.start(self.switch)
@staticmethod
......@@ -662,8 +705,9 @@ class Greenlet(greenlet):
self.__handle_death_before_start((exception,))
else:
waiter = Waiter() if block else None # pylint:disable=undefined-variable
self.parent.loop.run_callback(_kill, self, exception, waiter)
if block:
hub = get_my_hub(self) # pylint:disable=undefined-variable
hub.loop.run_callback(_kill, self, exception, waiter)
if waiter is not None:
waiter.get()
self.join(timeout)
# it should be OK to use kill() in finally or kill a greenlet from more than one place;
......@@ -694,7 +738,7 @@ class Greenlet(greenlet):
try:
t = Timeout._start_new_or_dummy(timeout)
try:
result = self.parent.switch()
result = get_my_hub(self).switch()
if result is not self:
raise InvalidSwitchError('Invalid switch into Greenlet.get(): %r' % (result, ))
finally:
......@@ -728,7 +772,7 @@ class Greenlet(greenlet):
try:
t = Timeout._start_new_or_dummy(timeout)
try:
result = self.parent.switch()
result = get_my_hub(self).switch()
if result is not self:
raise InvalidSwitchError('Invalid switch into Greenlet.join(): %r' % (result, ))
finally:
......@@ -745,7 +789,8 @@ class Greenlet(greenlet):
self._exc_info = (None, None, None)
self.value = result
if self._links and not self._notifier:
self._notifier = self.parent.loop.run_callback(self._notify_links)
hub = get_my_hub(self) # pylint:disable=undefined-variable
self._notifier = hub.loop.run_callback(self._notify_links)
def _report_error(self, exc_info):
if isinstance(exc_info[1], GreenletExit):
......@@ -754,11 +799,12 @@ class Greenlet(greenlet):
self._exc_info = exc_info[0], exc_info[1], dump_traceback(exc_info[2])
hub = get_my_hub(self) # pylint:disable=undefined-variable
if self._links and not self._notifier:
self._notifier = self.parent.loop.run_callback(self._notify_links)
self._notifier = hub.loop.run_callback(self._notify_links)
try:
self.parent.handle_error(self, *exc_info)
hub.handle_error(self, *exc_info)
finally:
del exc_info
......@@ -809,7 +855,8 @@ class Greenlet(greenlet):
raise TypeError('Expected callable: %r' % (callback, ))
self._links.append(callback) # pylint:disable=no-member
if self.ready() and self._links and not self._notifier:
self._notifier = self.parent.loop.run_callback(self._notify_links)
hub = get_my_hub(self) # pylint:disable=undefined-variable
self._notifier = hub.loop.run_callback(self._notify_links)
def link(self, callback, SpawnedLink=SpawnedLink):
"""
......@@ -868,8 +915,8 @@ class Greenlet(greenlet):
link = self._links.pop(0)
try:
link(self)
except: # pylint:disable=bare-except
self.parent.handle_error((link, self), *sys_exc_info())
except: # pylint:disable=bare-except, undefined-variable
get_my_hub(self).handle_error((link, self), *sys_exc_info())
class _dummy_event(object):
......@@ -891,12 +938,14 @@ _cancelled_start_event = _dummy_event()
_start_completed_event = _dummy_event()
# This is *only* called as a callback from the hub via Greenlet.kill(),
# and its first argument is the Greenlet. So we can be sure about the types.
def _kill(glet, exception, waiter):
try:
glet.throw(exception)
except: # pylint:disable=bare-except
except: # pylint:disable=bare-except, undefined-variable
# XXX do we need this here?
glet.parent.handle_error(glet, *sys_exc_info())
get_my_hub(glet).handle_error(glet, *sys_exc_info())
if waiter is not None:
waiter.switch(None)
......@@ -930,8 +979,8 @@ def _killall3(greenlets, exception, waiter):
if not g.dead:
try:
g.throw(exception)
except: # pylint:disable=bare-except
g.parent.handle_error(g, *sys_exc_info())
except: # pylint:disable=bare-except, undefined-variable
get_my_hub(g).handle_error(g, *sys_exc_info())
if not g.dead:
diehards.append(g)
waiter.switch(diehards)
......@@ -942,8 +991,8 @@ def _killall(greenlets, exception):
if not g.dead:
try:
g.throw(exception)
except: # pylint:disable=bare-except
g.parent.handle_error(g, *sys_exc_info())
except: # pylint:disable=bare-except, undefined-variable
get_my_hub(g).handle_error(g, *sys_exc_info())
def _call_spawn_callbacks(gr):
......@@ -963,7 +1012,8 @@ def killall(greenlets, exception=GreenletExit, block=True, timeout=None):
this could result in corrupted state.
:param greenlets: A **bounded** iterable of the non-None greenlets to terminate.
*All* the items in this iterable must be greenlets that belong to the same thread.
*All* the items in this iterable must be greenlets that belong to the same hub,
which should be the hub for this current thread.
:keyword exception: The exception to raise in the greenlets. By default this is
:class:`GreenletExit`.
:keyword bool block: If True (the default) then this function only returns when all the
......
......@@ -531,22 +531,22 @@ class TestBasic(greentest.TestCase):
g = gevent.Greenlet(func, 0.01, return_value=5)
g.rawlink(link_test.append) # use rawlink to avoid timing issues on Appveyor/Travis (not always successful)
assert not g, bool(g)
assert not g.dead
assert not g.started
assert not g.ready()
assert not g.successful()
assert g.value is None
assert g.exception is None
self.assertFalse(g, g)
self.assertFalse(g.dead, g)
self.assertFalse(g.started, g)
self.assertFalse(g.ready(), g)
self.assertFalse(g.successful(), g)
self.assertIsNone(g.value, g)
self.assertIsNone(g.exception, g)
g.start()
assert g # changed
assert not g.dead
assert g.started # changed
assert not g.ready()
assert not g.successful()
assert g.value is None
assert g.exception is None
self.assertTrue(g, g) # changed
self.assertFalse(g.dead, g)
self.assertTrue(g.started, g) # changed
self.assertFalse(g.ready(), g)
self.assertFalse(g.successful(), g)
self.assertIsNone(g.value, g)
self.assertIsNone(g.exception, g)
gevent.sleep(0.001)
self.assertTrue(g)
......@@ -559,14 +559,15 @@ class TestBasic(greentest.TestCase):
self.assertFalse(link_test)
gevent.sleep(0.02)
assert not g
assert g.dead
assert not g.started
assert g.ready()
assert g.successful()
assert g.value == 5
assert g.exception is None # not changed
assert link_test == [g] or greentest.RUNNING_ON_CI, link_test # changed
self.assertFalse(g, g) # changed
self.assertTrue(g.dead, g) # changed
self.assertFalse(g.started, g) # changed
self.assertTrue(g.ready(), g) # changed
self.assertTrue(g.successful(), g) # changed
self.assertEqual(g.value, 5) # changed
self.assertIsNone(g.exception, g)
self.assertTrue(link_test == [g] or greentest.RUNNING_ON_CI, link_test) # changed
def test_error_exit(self):
link_test = []
......@@ -755,28 +756,24 @@ class TestBasic(greentest.TestCase):
finally:
greenlet.sys_getframe = ogf
def test_spawn_length_nested_doesnt_grow(self):
# https://github.com/gevent/gevent/pull/1374
def doit1():
getcurrent()._spawning_stack_frames.append('not copied')
return gevent.spawn(doit2)
def doit2():
return gevent.spawn(doit3)
def test_minimal_ident_parent_not_hub(self):
def doit3():
current = gevent.getcurrent()
return current
g1 = gevent.spawn(doit1)
g1.join()
g2 = g1.value
g2.join()
g = gevent.spawn(lambda: 1)
self.assertIs(g.parent, gevent.get_hub())
g.parent = getcurrent()
try:
self.assertIsNot(g.parent, gevent.get_hub())
g3 = g2.value
self.assertNotIn('not copied', g3._spawning_stack_frames)
with self.assertRaisesRegex((TypeError, # Cython
AttributeError), # PyPy
'Cannot convert|ident_registry'):
getattr(g, 'minimal_ident')
finally:
# Attempting to switch into this later, when we next cycle the
# loop, would raise an InvalidSwitchError if we don't put
# things back the way they were (or kill the greenlet)
g.parent = gevent.get_hub()
g.kill()
......
......@@ -417,9 +417,11 @@ class GreenletTree(object):
def __render_children(self, tree):
children = sorted(self.child_trees,
key=lambda c: (
# raw greenlets first
# raw greenlets first. Note that we could be accessing
# minimal_ident for a hub from a different thread, which isn't
# technically thread safe.
getattr(c, 'minimal_ident', -1),
# running greenlets first
# running greenlets next
getattr(c, 'ready', _ready)(),
id(c.parent)))
for n, child in enumerate(children):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment