Commit b4db40b8 authored by Jason Madden's avatar Jason Madden Committed by GitHub

Merge pull request #1153 from gevent/threadpool-opts

Optimizations for threadpool
parents 277e34ca c21db37f
......@@ -42,6 +42,9 @@ Enhancements
- Hub objects now include the value of their ``name`` attribute in
their repr.
- Pools for greenlets and threads have lower overhead, especially for
``map``. See :pr:`1153`.
Monitoring and Debugging
------------------------
......
# -*- coding: utf-8 -*-
"""
Benchmarks for greenlet pool.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gevent.pool
import bench_threadpool
bench_threadpool.ThreadPool = gevent.pool.Pool
if __name__ == '__main__':
bench_threadpool.main()
# -*- coding: utf-8 -*-
"""
Benchmarks for thread pool.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import perf
from gevent.threadpool import ThreadPool
try:
xrange = xrange
except NameError:
xrange = range
def noop():
"Does nothing"
def identity(i):
return i
PAR_COUNT = 5
N = 20
def bench_apply(loops):
pool = ThreadPool(1)
t0 = perf.perf_counter()
for _ in xrange(loops):
for _ in xrange(N):
pool.apply(noop)
pool.join()
pool.kill()
return perf.perf_counter() - t0
def bench_spawn_wait(loops):
pool = ThreadPool(1)
t0 = perf.perf_counter()
for _ in xrange(loops):
for _ in xrange(N):
r = pool.spawn(noop)
r.get()
pool.join()
pool.kill()
return perf.perf_counter() - t0
def _map(pool, pool_func, loops):
data = [1] * N
t0 = perf.perf_counter()
# Must collect for imap to finish
for _ in xrange(loops):
list(pool_func(identity, data))
pool.join()
pool.kill()
return perf.perf_counter() - t0
def _ppool():
pool = ThreadPool(PAR_COUNT)
pool.size = PAR_COUNT
return pool
def bench_map_seq(loops):
pool = ThreadPool(1)
return _map(pool, pool.map, loops)
def bench_map_par(loops):
pool = _ppool()
return _map(pool, pool.map, loops)
def bench_imap_seq(loops):
pool = ThreadPool(1)
return _map(pool, pool.imap, loops)
def bench_imap_par(loops):
pool = _ppool()
return _map(pool, pool.imap, loops)
def bench_imap_un_seq(loops):
pool = ThreadPool(1)
return _map(pool, pool.imap_unordered, loops)
def bench_imap_un_par(loops):
pool = _ppool()
return _map(pool, pool.imap_unordered, loops)
def main():
runner = perf.Runner()
runner.bench_time_func('imap_unordered_seq',
bench_imap_un_seq)
runner.bench_time_func('imap_unordered_par',
bench_imap_un_par)
runner.bench_time_func('imap_seq',
bench_imap_seq)
runner.bench_time_func('imap_par',
bench_imap_par)
runner.bench_time_func('map_seq',
bench_map_seq)
runner.bench_time_func('map_par',
bench_map_par)
runner.bench_time_func('apply',
bench_apply)
runner.bench_time_func('spawn',
bench_spawn_wait)
if __name__ == '__main__':
main()
......@@ -8,14 +8,12 @@ This module is missing 'Thread' class, but includes 'Queue'.
from __future__ import absolute_import
from collections import deque
from itertools import islice as _islice
from gevent import monkey
from gevent._compat import thread_mod_name
__all__ = [
'Condition',
'Lock',
'Queue',
]
......@@ -26,14 +24,13 @@ start_new_thread, Lock, get_thread_ident, = monkey.get_original(thread_mod_name,
])
class Condition(object):
class _Condition(object):
# pylint:disable=method-hidden
def __init__(self, lock):
self.__lock = lock
# Export the lock's acquire() and release() methods
self.acquire = lock.acquire
self.release = lock.release
self.__waiters = []
# If the lock defines _release_save() and/or _acquire_restore(),
# these override the default implementations (which just call
# release() and acquire() on the lock). Ditto for _is_owned().
......@@ -49,13 +46,12 @@ class Condition(object):
self._is_owned = lock._is_owned
except AttributeError:
pass
self.__waiters = []
def __enter__(self):
return self.__lock.__enter__()
def __exit__(self, *args):
return self.__lock.__exit__(*args)
def __exit__(self, t, v, tb):
return self.__lock.__exit__(t, v, tb)
def __repr__(self):
return "<Condition(%s, %d)>" % (self.__lock, len(self.__waiters))
......@@ -75,8 +71,7 @@ class Condition(object):
return True
def wait(self):
if not self._is_owned():
raise RuntimeError("cannot wait on un-acquired lock")
# The condition MUST be owned, but we don't check that.
waiter = Lock()
waiter.acquire()
self.__waiters.append(waiter)
......@@ -86,19 +81,15 @@ class Condition(object):
finally:
self._acquire_restore(saved_state)
def notify(self, n=1):
if not self._is_owned():
raise RuntimeError("cannot notify on un-acquired lock")
all_waiters = self.__waiters
waiters_to_notify = deque(_islice(all_waiters, n))
if not waiters_to_notify:
return
for waiter in waiters_to_notify:
def notify_one(self):
# The condition MUST be owned, but we don't check that.
try:
waiter = self.__waiters.pop()
except IndexError:
# Nobody around
pass
else:
waiter.release()
try:
all_waiters.remove(waiter)
except ValueError:
pass
class Queue(object):
......@@ -107,17 +98,18 @@ class Queue(object):
The queue is always infinite size.
"""
__slots__ = ('_queue', '_mutex', '_not_empty', 'unfinished_tasks')
def __init__(self):
self.queue = deque()
self._queue = deque()
# mutex must be held whenever the queue is mutating. All methods
# that acquire mutex must release it before returning. mutex
# is shared between the three conditions, so acquiring and
# releasing the conditions also acquires and releases mutex.
self.mutex = Lock()
self._mutex = Lock()
# Notify not_empty whenever an item is added to the queue; a
# thread waiting to get is notified then.
self.not_empty = Condition(self.mutex)
self._not_empty = _Condition(self._mutex)
self.unfinished_tasks = 0
......@@ -135,7 +127,7 @@ class Queue(object):
Raises a ValueError if called more times than there were items
placed in the queue.
"""
with self.mutex:
with self._mutex:
unfinished = self.unfinished_tasks - 1
if unfinished <= 0:
if unfinished < 0:
......@@ -144,8 +136,7 @@ class Queue(object):
def qsize(self, len=len):
"""Return the approximate size of the queue (not reliable!)."""
with self.mutex:
return len(self.queue)
return len(self._queue)
def empty(self):
"""Return True if the queue is empty, False otherwise (not reliable!)."""
......@@ -158,16 +149,16 @@ class Queue(object):
def put(self, item):
"""Put an item into the queue.
"""
with self.mutex:
self.queue.append(item)
with self._not_empty:
self._queue.append(item)
self.unfinished_tasks += 1
self.not_empty.notify()
self._not_empty.notify_one()
def get(self):
"""Remove and return an item from the queue.
"""
with self.mutex:
while not self.queue:
self.not_empty.wait()
item = self.queue.popleft()
with self._not_empty:
while not self._queue:
self._not_empty.wait()
item = self._queue.popleft()
return item
This diff is collapsed.
......@@ -597,7 +597,7 @@ class Channel(object):
self.getters.remove(waiter)
raise
finally:
timeout.cancel()
timeout.close()
def get_nowait(self):
return self.get(False)
......
......@@ -219,7 +219,7 @@ class ThreadPool(GroupMappingMixin):
# we get LoopExit (why?). Previously it was done with a rawlink on the
# AsyncResult and the comment that it is "competing for order with get(); this is not
# good, just make ThreadResult release the semaphore before doing anything else"
thread_result = ThreadResult(result, hub=self.hub, call_when_ready=semaphore.release)
thread_result = ThreadResult(result, self.hub, semaphore.release)
task_queue.put((func, args, kwargs, thread_result))
self.adjust()
except:
......@@ -333,6 +333,21 @@ class ThreadPool(GroupMappingMixin):
# Always go to Greenlet because our self.spawn uses threads
return True
class _FakeAsync(object):
def send(self):
pass
close = stop = send
def __call_(self, result):
"fake out for 'receiver'"
def __bool__(self):
return False
__nonzero__ = __bool__
_FakeAsync = _FakeAsync()
class ThreadResult(object):
......@@ -340,9 +355,7 @@ class ThreadResult(object):
__slots__ = ('exc_info', 'async_watcher', '_call_when_ready', 'value',
'context', 'hub', 'receiver')
def __init__(self, receiver, hub=None, call_when_ready=None):
if hub is None:
hub = get_hub()
def __init__(self, receiver, hub, call_when_ready):
self.receiver = receiver
self.hub = hub
self.context = None
......@@ -359,48 +372,45 @@ class ThreadResult(object):
def _on_async(self):
self.async_watcher.stop()
self.async_watcher.close()
if self._call_when_ready:
# Typically this is pool.semaphore.release and we have to
# call this in the Hub; if we don't we get the dreaded
# LoopExit (XXX: Why?)
self._call_when_ready()
# Typically this is pool.semaphore.release and we have to
# call this in the Hub; if we don't we get the dreaded
# LoopExit (XXX: Why?)
self._call_when_ready()
try:
if self.exc_info:
self.hub.handle_error(self.context, *self.exc_info)
self.context = None
self.async_watcher = None
self.async_watcher = _FakeAsync
self.hub = None
self._call_when_ready = None
if self.receiver is not None:
self.receiver(self)
self._call_when_ready = _FakeAsync
self.receiver(self)
finally:
self.receiver = None
self.receiver = _FakeAsync
self.value = None
if self.exc_info:
self.exc_info = (self.exc_info[0], self.exc_info[1], None)
def destroy(self):
if self.async_watcher is not None:
self.async_watcher.stop()
self.async_watcher.close()
self.async_watcher = None
self.async_watcher.stop()
self.async_watcher.close()
self.async_watcher = _FakeAsync
self.context = None
self.hub = None
self._call_when_ready = None
self.receiver = None
def _ready(self):
if self.async_watcher is not None:
self.async_watcher.send()
self._call_when_ready = _FakeAsync
self.receiver = _FakeAsync
def set(self, value):
self.value = value
self._ready()
self.async_watcher.send()
def handle_error(self, context, exc_info):
self.context = context
self.exc_info = exc_info
self._ready()
self.async_watcher.send()
# link protocol:
def successful(self):
......
......@@ -375,13 +375,15 @@ class TestPool(greentest.TestCase): # pylint:disable=too-many-public-methods
it = self.pool.imap_unordered(sqr_random_sleep, range(SMALL_RANGE))
self.assertEqual(sorted(it), list(map(squared, range(SMALL_RANGE))))
def test_empty(self):
def test_empty_imap_unordered(self):
it = self.pool.imap_unordered(sqr, [])
self.assertEqual(list(it), [])
def test_empty_imap(self):
it = self.pool.imap(sqr, [])
self.assertEqual(list(it), [])
def test_empty_map(self):
self.assertEqual(self.pool.map(sqr, []), [])
def test_terminate(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment