Commit da85d191 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue updates from Tejun Heo:
 "Lai simplified worker destruction path and internal workqueue locking
  and there are some other minor changes.

  Except for the removal of some long-deprecated interfaces which
  haven't had any in-kernel user for quite a while, there shouldn't be
  any difference to workqueue users"

* 'for-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  kernel/workqueue.c: pr_warning/pr_warn & printk/pr_info
  workqueue: remove the confusing POOL_FREEZING
  workqueue: rename first_worker() to first_idle_worker()
  workqueue: remove unused work_clear_pending()
  workqueue: remove unused WORK_CPU_END
  workqueue: declare system_highpri_wq
  workqueue: use generic attach/detach routine for rescuers
  workqueue: separate pool-attaching code out from create_worker()
  workqueue: rename manager_mutex to attach_mutex
  workqueue: narrow the protection range of manager_mutex
  workqueue: convert worker_idr to worker_ida
  workqueue: separate iteration role from worker_idr
  workqueue: destroy worker directly in the idle timeout handler
  workqueue: async worker destruction
  workqueue: destroy_worker() should destroy idle workers only
  workqueue: use manager lock only to protect worker_idr
  workqueue: Remove deprecated system_nrt[_freezable]_wq
  workqueue: Remove deprecated flush[_delayed]_work_sync()
  kernel/workqueue.c: pr_warning/pr_warn & printk/pr_info
  workqueue: simplify wq_update_unbound_numa() by jumping to use_dfl_pwq if the target cpumask equals wq's
parents 68a29ef2 015af06e
...@@ -56,9 +56,8 @@ enum { ...@@ -56,9 +56,8 @@ enum {
WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1, WORK_NR_COLORS = (1 << WORK_STRUCT_COLOR_BITS) - 1,
WORK_NO_COLOR = WORK_NR_COLORS, WORK_NO_COLOR = WORK_NR_COLORS,
/* special cpu IDs */ /* not bound to any CPU, prefer the local CPU */
WORK_CPU_UNBOUND = NR_CPUS, WORK_CPU_UNBOUND = NR_CPUS,
WORK_CPU_END = NR_CPUS + 1,
/* /*
* Reserve 7 bits off of pwq pointer w/ debugobjects turned off. * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
...@@ -274,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } ...@@ -274,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
#define delayed_work_pending(w) \ #define delayed_work_pending(w) \
work_pending(&(w)->work) work_pending(&(w)->work)
/**
* work_clear_pending - for internal use only, mark a work item as not pending
* @work: The work item in question
*/
#define work_clear_pending(work) \
clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
/* /*
* Workqueue flags and constants. For details, please refer to * Workqueue flags and constants. For details, please refer to
* Documentation/workqueue.txt. * Documentation/workqueue.txt.
...@@ -340,6 +332,9 @@ enum { ...@@ -340,6 +332,9 @@ enum {
* short queue flush time. Don't queue works which can run for too * short queue flush time. Don't queue works which can run for too
* long. * long.
* *
* system_highpri_wq is similar to system_wq but for work items which
* require WQ_HIGHPRI.
*
* system_long_wq is similar to system_wq but may host long running * system_long_wq is similar to system_wq but may host long running
* works. Queue flushing might take relatively long. * works. Queue flushing might take relatively long.
* *
...@@ -358,26 +353,13 @@ enum { ...@@ -358,26 +353,13 @@ enum {
* 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info. * 'wq_power_efficient' is disabled. See WQ_POWER_EFFICIENT for more info.
*/ */
extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_wq;
extern struct workqueue_struct *system_highpri_wq;
extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_long_wq;
extern struct workqueue_struct *system_unbound_wq; extern struct workqueue_struct *system_unbound_wq;
extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct *system_freezable_wq;
extern struct workqueue_struct *system_power_efficient_wq; extern struct workqueue_struct *system_power_efficient_wq;
extern struct workqueue_struct *system_freezable_power_efficient_wq; extern struct workqueue_struct *system_freezable_power_efficient_wq;
static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
{
return system_wq;
}
static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
{
return system_freezable_wq;
}
/* equivlalent to system_wq and system_freezable_wq, deprecated */
#define system_nrt_wq __system_nrt_wq()
#define system_nrt_freezable_wq __system_nrt_freezable_wq()
extern struct workqueue_struct * extern struct workqueue_struct *
__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6); struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
...@@ -587,18 +569,6 @@ static inline bool keventd_up(void) ...@@ -587,18 +569,6 @@ static inline bool keventd_up(void)
return system_wq != NULL; return system_wq != NULL;
} }
/* used to be different but now identical to flush_work(), deprecated */
static inline bool __deprecated flush_work_sync(struct work_struct *work)
{
return flush_work(work);
}
/* used to be different but now identical to flush_delayed_work(), deprecated */
static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
{
return flush_delayed_work(dwork);
}
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
{ {
......
...@@ -65,15 +65,12 @@ enum { ...@@ -65,15 +65,12 @@ enum {
* be executing on any CPU. The pool behaves as an unbound one. * be executing on any CPU. The pool behaves as an unbound one.
* *
* Note that DISASSOCIATED should be flipped only while holding * Note that DISASSOCIATED should be flipped only while holding
* manager_mutex to avoid changing binding state while * attach_mutex to avoid changing binding state while
* create_worker() is in progress. * worker_attach_to_pool() is in progress.
*/ */
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
POOL_FREEZING = 1 << 3, /* freeze in progress */
/* worker flags */ /* worker flags */
WORKER_STARTED = 1 << 0, /* started */
WORKER_DIE = 1 << 1, /* die die die */ WORKER_DIE = 1 << 1, /* die die die */
WORKER_IDLE = 1 << 2, /* is idle */ WORKER_IDLE = 1 << 2, /* is idle */
WORKER_PREP = 1 << 3, /* preparing to run works */ WORKER_PREP = 1 << 3, /* preparing to run works */
...@@ -124,8 +121,7 @@ enum { ...@@ -124,8 +121,7 @@ enum {
* cpu or grabbing pool->lock is enough for read access. If * cpu or grabbing pool->lock is enough for read access. If
* POOL_DISASSOCIATED is set, it's identical to L. * POOL_DISASSOCIATED is set, it's identical to L.
* *
* MG: pool->manager_mutex and pool->lock protected. Writes require both * A: pool->attach_mutex protected.
* locks. Reads can happen under either lock.
* *
* PL: wq_pool_mutex protected. * PL: wq_pool_mutex protected.
* *
...@@ -163,8 +159,11 @@ struct worker_pool { ...@@ -163,8 +159,11 @@ struct worker_pool {
/* see manage_workers() for details on the two manager mutexes */ /* see manage_workers() for details on the two manager mutexes */
struct mutex manager_arb; /* manager arbitration */ struct mutex manager_arb; /* manager arbitration */
struct mutex manager_mutex; /* manager exclusion */ struct mutex attach_mutex; /* attach/detach exclusion */
struct idr worker_idr; /* MG: worker IDs and iteration */ struct list_head workers; /* A: attached workers */
struct completion *detach_completion; /* all workers detached */
struct ida worker_ida; /* worker IDs for task name */
struct workqueue_attrs *attrs; /* I: worker attributes */ struct workqueue_attrs *attrs; /* I: worker attributes */
struct hlist_node hash_node; /* PL: unbound_pool_hash node */ struct hlist_node hash_node; /* PL: unbound_pool_hash node */
...@@ -340,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, ...@@ -340,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
lockdep_is_held(&wq->mutex), \ lockdep_is_held(&wq->mutex), \
"sched RCU or wq->mutex should be held") "sched RCU or wq->mutex should be held")
#ifdef CONFIG_LOCKDEP
#define assert_manager_or_pool_lock(pool) \
WARN_ONCE(debug_locks && \
!lockdep_is_held(&(pool)->manager_mutex) && \
!lockdep_is_held(&(pool)->lock), \
"pool->manager_mutex or ->lock should be held")
#else
#define assert_manager_or_pool_lock(pool) do { } while (0)
#endif
#define for_each_cpu_worker_pool(pool, cpu) \ #define for_each_cpu_worker_pool(pool, cpu) \
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
...@@ -375,17 +364,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, ...@@ -375,17 +364,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
/** /**
* for_each_pool_worker - iterate through all workers of a worker_pool * for_each_pool_worker - iterate through all workers of a worker_pool
* @worker: iteration cursor * @worker: iteration cursor
* @wi: integer used for iteration
* @pool: worker_pool to iterate workers of * @pool: worker_pool to iterate workers of
* *
* This must be called with either @pool->manager_mutex or ->lock held. * This must be called with @pool->attach_mutex.
* *
* The if/else clause exists only for the lockdep assertion and can be * The if/else clause exists only for the lockdep assertion and can be
* ignored. * ignored.
*/ */
#define for_each_pool_worker(worker, wi, pool) \ #define for_each_pool_worker(worker, pool) \
idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \ list_for_each_entry((worker), &(pool)->workers, node) \
if (({ assert_manager_or_pool_lock((pool)); false; })) { } \ if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
else else
/** /**
...@@ -763,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool) ...@@ -763,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool)
return need_more_worker(pool) && !may_start_working(pool); return need_more_worker(pool) && !may_start_working(pool);
} }
/* Do I need to be the manager? */
static bool need_to_manage_workers(struct worker_pool *pool)
{
return need_to_create_worker(pool) ||
(pool->flags & POOL_MANAGE_WORKERS);
}
/* Do we have too many workers and should some go away? */ /* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool) static bool too_many_workers(struct worker_pool *pool)
{ {
...@@ -791,8 +772,8 @@ static bool too_many_workers(struct worker_pool *pool) ...@@ -791,8 +772,8 @@ static bool too_many_workers(struct worker_pool *pool)
* Wake up functions. * Wake up functions.
*/ */
/* Return the first worker. Safe with preemption disabled */ /* Return the first idle worker. Safe with preemption disabled */
static struct worker *first_worker(struct worker_pool *pool) static struct worker *first_idle_worker(struct worker_pool *pool)
{ {
if (unlikely(list_empty(&pool->idle_list))) if (unlikely(list_empty(&pool->idle_list)))
return NULL; return NULL;
...@@ -811,7 +792,7 @@ static struct worker *first_worker(struct worker_pool *pool) ...@@ -811,7 +792,7 @@ static struct worker *first_worker(struct worker_pool *pool)
*/ */
static void wake_up_worker(struct worker_pool *pool) static void wake_up_worker(struct worker_pool *pool)
{ {
struct worker *worker = first_worker(pool); struct worker *worker = first_idle_worker(pool);
if (likely(worker)) if (likely(worker))
wake_up_process(worker->task); wake_up_process(worker->task);
...@@ -885,7 +866,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu) ...@@ -885,7 +866,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
*/ */
if (atomic_dec_and_test(&pool->nr_running) && if (atomic_dec_and_test(&pool->nr_running) &&
!list_empty(&pool->worklist)) !list_empty(&pool->worklist))
to_wakeup = first_worker(pool); to_wakeup = first_idle_worker(pool);
return to_wakeup ? to_wakeup->task : NULL; return to_wakeup ? to_wakeup->task : NULL;
} }
...@@ -1621,91 +1602,84 @@ static void worker_leave_idle(struct worker *worker) ...@@ -1621,91 +1602,84 @@ static void worker_leave_idle(struct worker *worker)
list_del_init(&worker->entry); list_del_init(&worker->entry);
} }
static struct worker *alloc_worker(void)
{
struct worker *worker;
worker = kzalloc(sizeof(*worker), GFP_KERNEL);
if (worker) {
INIT_LIST_HEAD(&worker->entry);
INIT_LIST_HEAD(&worker->scheduled);
INIT_LIST_HEAD(&worker->node);
/* on creation a worker is in !idle && prep state */
worker->flags = WORKER_PREP;
}
return worker;
}
/** /**
* worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it * worker_attach_to_pool() - attach a worker to a pool
* @pool: target worker_pool * @worker: worker to be attached
* * @pool: the target pool
* Bind %current to the cpu of @pool if it is associated and lock @pool.
*
* Works which are scheduled while the cpu is online must at least be
* scheduled to a worker which is bound to the cpu so that if they are
* flushed from cpu callbacks while cpu is going down, they are
* guaranteed to execute on the cpu.
*
* This function is to be used by unbound workers and rescuers to bind
* themselves to the target cpu and may race with cpu going down or
* coming online. kthread_bind() can't be used because it may put the
* worker to already dead cpu and set_cpus_allowed_ptr() can't be used
* verbatim as it's best effort and blocking and pool may be
* [dis]associated in the meantime.
*
* This function tries set_cpus_allowed() and locks pool and verifies the
* binding against %POOL_DISASSOCIATED which is set during
* %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
* enters idle state or fetches works without dropping lock, it can
* guarantee the scheduling requirement described in the first paragraph.
*
* CONTEXT:
* Might sleep. Called without any lock but returns with pool->lock
* held.
* *
* Return: * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
* %true if the associated pool is online (@worker is successfully * cpu-binding of @worker are kept coordinated with the pool across
* bound), %false if offline. * cpu-[un]hotplugs.
*/ */
static bool worker_maybe_bind_and_lock(struct worker_pool *pool) static void worker_attach_to_pool(struct worker *worker,
__acquires(&pool->lock) struct worker_pool *pool)
{ {
while (true) { mutex_lock(&pool->attach_mutex);
/* /*
* The following call may fail, succeed or succeed * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
* without actually migrating the task to the cpu if * online CPUs. It'll be re-applied when any of the CPUs come up.
* it races with cpu hotunplug operation. Verify
* against POOL_DISASSOCIATED.
*/ */
if (!(pool->flags & POOL_DISASSOCIATED)) set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
set_cpus_allowed_ptr(current, pool->attrs->cpumask);
spin_lock_irq(&pool->lock);
if (pool->flags & POOL_DISASSOCIATED)
return false;
if (task_cpu(current) == pool->cpu &&
cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
return true;
spin_unlock_irq(&pool->lock);
/* /*
* We've raced with CPU hot[un]plug. Give it a breather * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
* and retry migration. cond_resched() is required here; * stable across this function. See the comments above the
* otherwise, we might deadlock against cpu_stop trying to * flag definition for details.
* bring down the CPU on non-preemptive kernel.
*/ */
cpu_relax(); if (pool->flags & POOL_DISASSOCIATED)
cond_resched(); worker->flags |= WORKER_UNBOUND;
}
list_add_tail(&worker->node, &pool->workers);
mutex_unlock(&pool->attach_mutex);
} }
static struct worker *alloc_worker(void) /**
* worker_detach_from_pool() - detach a worker from its pool
* @worker: worker which is attached to its pool
* @pool: the pool @worker is attached to
*
* Undo the attaching which had been done in worker_attach_to_pool(). The
* caller worker shouldn't access to the pool after detached except it has
* other reference to the pool.
*/
static void worker_detach_from_pool(struct worker *worker,
struct worker_pool *pool)
{ {
struct worker *worker; struct completion *detach_completion = NULL;
worker = kzalloc(sizeof(*worker), GFP_KERNEL); mutex_lock(&pool->attach_mutex);
if (worker) { list_del(&worker->node);
INIT_LIST_HEAD(&worker->entry); if (list_empty(&pool->workers))
INIT_LIST_HEAD(&worker->scheduled); detach_completion = pool->detach_completion;
/* on creation a worker is in !idle && prep state */ mutex_unlock(&pool->attach_mutex);
worker->flags = WORKER_PREP;
} if (detach_completion)
return worker; complete(detach_completion);
} }
/** /**
* create_worker - create a new workqueue worker * create_worker - create a new workqueue worker
* @pool: pool the new worker will belong to * @pool: pool the new worker will belong to
* *
* Create a new worker which is bound to @pool. The returned worker * Create a new worker which is attached to @pool. The new worker must be
* can be started by calling start_worker() or destroyed using * started by start_worker().
* destroy_worker().
* *
* CONTEXT: * CONTEXT:
* Might sleep. Does GFP_KERNEL allocations. * Might sleep. Does GFP_KERNEL allocations.
...@@ -1719,19 +1693,8 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1719,19 +1693,8 @@ static struct worker *create_worker(struct worker_pool *pool)
int id = -1; int id = -1;
char id_buf[16]; char id_buf[16];
lockdep_assert_held(&pool->manager_mutex); /* ID is needed to determine kthread name */
id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
/*
* ID is needed to determine kthread name. Allocate ID first
* without installing the pointer.
*/
idr_preload(GFP_KERNEL);
spin_lock_irq(&pool->lock);
id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
spin_unlock_irq(&pool->lock);
idr_preload_end();
if (id < 0) if (id < 0)
goto fail; goto fail;
...@@ -1758,33 +1721,14 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1758,33 +1721,14 @@ static struct worker *create_worker(struct worker_pool *pool)
/* prevent userland from meddling with cpumask of workqueue workers */ /* prevent userland from meddling with cpumask of workqueue workers */
worker->task->flags |= PF_NO_SETAFFINITY; worker->task->flags |= PF_NO_SETAFFINITY;
/* /* successful, attach the worker to the pool */
* set_cpus_allowed_ptr() will fail if the cpumask doesn't have any worker_attach_to_pool(worker, pool);
* online CPUs. It'll be re-applied when any of the CPUs come up.
*/
set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
/*
* The caller is responsible for ensuring %POOL_DISASSOCIATED
* remains stable across this function. See the comments above the
* flag definition for details.
*/
if (pool->flags & POOL_DISASSOCIATED)
worker->flags |= WORKER_UNBOUND;
/* successful, commit the pointer to idr */
spin_lock_irq(&pool->lock);
idr_replace(&pool->worker_idr, worker, worker->id);
spin_unlock_irq(&pool->lock);
return worker; return worker;
fail: fail:
if (id >= 0) { if (id >= 0)
spin_lock_irq(&pool->lock); ida_simple_remove(&pool->worker_ida, id);
idr_remove(&pool->worker_idr, id);
spin_unlock_irq(&pool->lock);
}
kfree(worker); kfree(worker);
return NULL; return NULL;
} }
...@@ -1800,7 +1744,6 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1800,7 +1744,6 @@ static struct worker *create_worker(struct worker_pool *pool)
*/ */
static void start_worker(struct worker *worker) static void start_worker(struct worker *worker)
{ {
worker->flags |= WORKER_STARTED;
worker->pool->nr_workers++; worker->pool->nr_workers++;
worker_enter_idle(worker); worker_enter_idle(worker);
wake_up_process(worker->task); wake_up_process(worker->task);
...@@ -1818,8 +1761,6 @@ static int create_and_start_worker(struct worker_pool *pool) ...@@ -1818,8 +1761,6 @@ static int create_and_start_worker(struct worker_pool *pool)
{ {
struct worker *worker; struct worker *worker;
mutex_lock(&pool->manager_mutex);
worker = create_worker(pool); worker = create_worker(pool);
if (worker) { if (worker) {
spin_lock_irq(&pool->lock); spin_lock_irq(&pool->lock);
...@@ -1827,8 +1768,6 @@ static int create_and_start_worker(struct worker_pool *pool) ...@@ -1827,8 +1768,6 @@ static int create_and_start_worker(struct worker_pool *pool)
spin_unlock_irq(&pool->lock); spin_unlock_irq(&pool->lock);
} }
mutex_unlock(&pool->manager_mutex);
return worker ? 0 : -ENOMEM; return worker ? 0 : -ENOMEM;
} }
...@@ -1836,46 +1775,30 @@ static int create_and_start_worker(struct worker_pool *pool) ...@@ -1836,46 +1775,30 @@ static int create_and_start_worker(struct worker_pool *pool)
* destroy_worker - destroy a workqueue worker * destroy_worker - destroy a workqueue worker
* @worker: worker to be destroyed * @worker: worker to be destroyed
* *
* Destroy @worker and adjust @pool stats accordingly. * Destroy @worker and adjust @pool stats accordingly. The worker should
* be idle.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(pool->lock) which is released and regrabbed. * spin_lock_irq(pool->lock).
*/ */
static void destroy_worker(struct worker *worker) static void destroy_worker(struct worker *worker)
{ {
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
lockdep_assert_held(&pool->manager_mutex);
lockdep_assert_held(&pool->lock); lockdep_assert_held(&pool->lock);
/* sanity check frenzy */ /* sanity check frenzy */
if (WARN_ON(worker->current_work) || if (WARN_ON(worker->current_work) ||
WARN_ON(!list_empty(&worker->scheduled))) WARN_ON(!list_empty(&worker->scheduled)) ||
WARN_ON(!(worker->flags & WORKER_IDLE)))
return; return;
if (worker->flags & WORKER_STARTED)
pool->nr_workers--; pool->nr_workers--;
if (worker->flags & WORKER_IDLE)
pool->nr_idle--; pool->nr_idle--;
/*
* Once WORKER_DIE is set, the kworker may destroy itself at any
* point. Pin to ensure the task stays until we're done with it.
*/
get_task_struct(worker->task);
list_del_init(&worker->entry); list_del_init(&worker->entry);
worker->flags |= WORKER_DIE; worker->flags |= WORKER_DIE;
wake_up_process(worker->task);
idr_remove(&pool->worker_idr, worker->id);
spin_unlock_irq(&pool->lock);
kthread_stop(worker->task);
put_task_struct(worker->task);
kfree(worker);
spin_lock_irq(&pool->lock);
} }
static void idle_worker_timeout(unsigned long __pool) static void idle_worker_timeout(unsigned long __pool)
...@@ -1884,7 +1807,7 @@ static void idle_worker_timeout(unsigned long __pool) ...@@ -1884,7 +1807,7 @@ static void idle_worker_timeout(unsigned long __pool)
spin_lock_irq(&pool->lock); spin_lock_irq(&pool->lock);
if (too_many_workers(pool)) { while (too_many_workers(pool)) {
struct worker *worker; struct worker *worker;
unsigned long expires; unsigned long expires;
...@@ -1892,13 +1815,12 @@ static void idle_worker_timeout(unsigned long __pool) ...@@ -1892,13 +1815,12 @@ static void idle_worker_timeout(unsigned long __pool)
worker = list_entry(pool->idle_list.prev, struct worker, entry); worker = list_entry(pool->idle_list.prev, struct worker, entry);
expires = worker->last_active + IDLE_WORKER_TIMEOUT; expires = worker->last_active + IDLE_WORKER_TIMEOUT;
if (time_before(jiffies, expires)) if (time_before(jiffies, expires)) {
mod_timer(&pool->idle_timer, expires); mod_timer(&pool->idle_timer, expires);
else { break;
/* it's been idle for too long, wake up manager */
pool->flags |= POOL_MANAGE_WORKERS;
wake_up_worker(pool);
} }
destroy_worker(worker);
} }
spin_unlock_irq(&pool->lock); spin_unlock_irq(&pool->lock);
...@@ -2016,44 +1938,6 @@ __acquires(&pool->lock) ...@@ -2016,44 +1938,6 @@ __acquires(&pool->lock)
return true; return true;
} }
/**
* maybe_destroy_worker - destroy workers which have been idle for a while
* @pool: pool to destroy workers for
*
* Destroy @pool workers which have been idle for longer than
* IDLE_WORKER_TIMEOUT.
*
* LOCKING:
* spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Called only from manager.
*
* Return:
* %false if no action was taken and pool->lock stayed locked, %true
* otherwise.
*/
static bool maybe_destroy_workers(struct worker_pool *pool)
{
bool ret = false;
while (too_many_workers(pool)) {
struct worker *worker;
unsigned long expires;
worker = list_entry(pool->idle_list.prev, struct worker, entry);
expires = worker->last_active + IDLE_WORKER_TIMEOUT;
if (time_before(jiffies, expires)) {
mod_timer(&pool->idle_timer, expires);
break;
}
destroy_worker(worker);
ret = true;
}
return ret;
}
/** /**
* manage_workers - manage worker pool * manage_workers - manage worker pool
* @worker: self * @worker: self
...@@ -2083,8 +1967,6 @@ static bool manage_workers(struct worker *worker) ...@@ -2083,8 +1967,6 @@ static bool manage_workers(struct worker *worker)
bool ret = false; bool ret = false;
/* /*
* Managership is governed by two mutexes - manager_arb and
* manager_mutex. manager_arb handles arbitration of manager role.
* Anyone who successfully grabs manager_arb wins the arbitration * Anyone who successfully grabs manager_arb wins the arbitration
* and becomes the manager. mutex_trylock() on pool->manager_arb * and becomes the manager. mutex_trylock() on pool->manager_arb
* failure while holding pool->lock reliably indicates that someone * failure while holding pool->lock reliably indicates that someone
...@@ -2093,40 +1975,12 @@ static bool manage_workers(struct worker *worker) ...@@ -2093,40 +1975,12 @@ static bool manage_workers(struct worker *worker)
* grabbing manager_arb is responsible for actually performing * grabbing manager_arb is responsible for actually performing
* manager duties. If manager_arb is grabbed and released without * manager duties. If manager_arb is grabbed and released without
* actual management, the pool may stall indefinitely. * actual management, the pool may stall indefinitely.
*
* manager_mutex is used for exclusion of actual management
* operations. The holder of manager_mutex can be sure that none
* of management operations, including creation and destruction of
* workers, won't take place until the mutex is released. Because
* manager_mutex doesn't interfere with manager role arbitration,
* it is guaranteed that the pool's management, while may be
* delayed, won't be disturbed by someone else grabbing
* manager_mutex.
*/ */
if (!mutex_trylock(&pool->manager_arb)) if (!mutex_trylock(&pool->manager_arb))
return ret; return ret;
/*
* With manager arbitration won, manager_mutex would be free in
* most cases. trylock first without dropping @pool->lock.
*/
if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
spin_unlock_irq(&pool->lock);
mutex_lock(&pool->manager_mutex);
spin_lock_irq(&pool->lock);
ret = true;
}
pool->flags &= ~POOL_MANAGE_WORKERS;
/*
* Destroy and then create so that may_start_working() is true
* on return.
*/
ret |= maybe_destroy_workers(pool);
ret |= maybe_create_worker(pool); ret |= maybe_create_worker(pool);
mutex_unlock(&pool->manager_mutex);
mutex_unlock(&pool->manager_arb); mutex_unlock(&pool->manager_arb);
return ret; return ret;
} }
...@@ -2314,6 +2168,11 @@ static int worker_thread(void *__worker) ...@@ -2314,6 +2168,11 @@ static int worker_thread(void *__worker)
spin_unlock_irq(&pool->lock); spin_unlock_irq(&pool->lock);
WARN_ON_ONCE(!list_empty(&worker->entry)); WARN_ON_ONCE(!list_empty(&worker->entry));
worker->task->flags &= ~PF_WQ_WORKER; worker->task->flags &= ~PF_WQ_WORKER;
set_task_comm(worker->task, "kworker/dying");
ida_simple_remove(&pool->worker_ida, worker->id);
worker_detach_from_pool(worker, pool);
kfree(worker);
return 0; return 0;
} }
...@@ -2361,9 +2220,6 @@ static int worker_thread(void *__worker) ...@@ -2361,9 +2220,6 @@ static int worker_thread(void *__worker)
worker_set_flags(worker, WORKER_PREP, false); worker_set_flags(worker, WORKER_PREP, false);
sleep: sleep:
if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
goto recheck;
/* /*
* pool->lock is held and there's no work to process and no need to * pool->lock is held and there's no work to process and no need to
* manage, sleep. Workers are woken up only while holding * manage, sleep. Workers are woken up only while holding
...@@ -2440,8 +2296,9 @@ static int rescuer_thread(void *__rescuer) ...@@ -2440,8 +2296,9 @@ static int rescuer_thread(void *__rescuer)
spin_unlock_irq(&wq_mayday_lock); spin_unlock_irq(&wq_mayday_lock);
/* migrate to the target cpu if possible */ worker_attach_to_pool(rescuer, pool);
worker_maybe_bind_and_lock(pool);
spin_lock_irq(&pool->lock);
rescuer->pool = pool; rescuer->pool = pool;
/* /*
...@@ -2454,6 +2311,11 @@ static int rescuer_thread(void *__rescuer) ...@@ -2454,6 +2311,11 @@ static int rescuer_thread(void *__rescuer)
move_linked_works(work, scheduled, &n); move_linked_works(work, scheduled, &n);
process_scheduled_works(rescuer); process_scheduled_works(rescuer);
spin_unlock_irq(&pool->lock);
worker_detach_from_pool(rescuer, pool);
spin_lock_irq(&pool->lock);
/* /*
* Put the reference grabbed by send_mayday(). @pool won't * Put the reference grabbed by send_mayday(). @pool won't
...@@ -3550,9 +3412,10 @@ static int init_worker_pool(struct worker_pool *pool) ...@@ -3550,9 +3412,10 @@ static int init_worker_pool(struct worker_pool *pool)
(unsigned long)pool); (unsigned long)pool);
mutex_init(&pool->manager_arb); mutex_init(&pool->manager_arb);
mutex_init(&pool->manager_mutex); mutex_init(&pool->attach_mutex);
idr_init(&pool->worker_idr); INIT_LIST_HEAD(&pool->workers);
ida_init(&pool->worker_ida);
INIT_HLIST_NODE(&pool->hash_node); INIT_HLIST_NODE(&pool->hash_node);
pool->refcnt = 1; pool->refcnt = 1;
...@@ -3567,7 +3430,7 @@ static void rcu_free_pool(struct rcu_head *rcu) ...@@ -3567,7 +3430,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
{ {
struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu); struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
idr_destroy(&pool->worker_idr); ida_destroy(&pool->worker_ida);
free_workqueue_attrs(pool->attrs); free_workqueue_attrs(pool->attrs);
kfree(pool); kfree(pool);
} }
...@@ -3585,6 +3448,7 @@ static void rcu_free_pool(struct rcu_head *rcu) ...@@ -3585,6 +3448,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
*/ */
static void put_unbound_pool(struct worker_pool *pool) static void put_unbound_pool(struct worker_pool *pool)
{ {
DECLARE_COMPLETION_ONSTACK(detach_completion);
struct worker *worker; struct worker *worker;
lockdep_assert_held(&wq_pool_mutex); lockdep_assert_held(&wq_pool_mutex);
...@@ -3605,18 +3469,24 @@ static void put_unbound_pool(struct worker_pool *pool) ...@@ -3605,18 +3469,24 @@ static void put_unbound_pool(struct worker_pool *pool)
/* /*
* Become the manager and destroy all workers. Grabbing * Become the manager and destroy all workers. Grabbing
* manager_arb prevents @pool's workers from blocking on * manager_arb prevents @pool's workers from blocking on
* manager_mutex. * attach_mutex.
*/ */
mutex_lock(&pool->manager_arb); mutex_lock(&pool->manager_arb);
mutex_lock(&pool->manager_mutex);
spin_lock_irq(&pool->lock);
while ((worker = first_worker(pool))) spin_lock_irq(&pool->lock);
while ((worker = first_idle_worker(pool)))
destroy_worker(worker); destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle); WARN_ON(pool->nr_workers || pool->nr_idle);
spin_unlock_irq(&pool->lock); spin_unlock_irq(&pool->lock);
mutex_unlock(&pool->manager_mutex);
mutex_lock(&pool->attach_mutex);
if (!list_empty(&pool->workers))
pool->detach_completion = &detach_completion;
mutex_unlock(&pool->attach_mutex);
if (pool->detach_completion)
wait_for_completion(pool->detach_completion);
mutex_unlock(&pool->manager_arb); mutex_unlock(&pool->manager_arb);
/* shut down the timers */ /* shut down the timers */
...@@ -3662,9 +3532,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs) ...@@ -3662,9 +3532,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
if (!pool || init_worker_pool(pool) < 0) if (!pool || init_worker_pool(pool) < 0)
goto fail; goto fail;
if (workqueue_freezing)
pool->flags |= POOL_FREEZING;
lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */ lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
copy_workqueue_attrs(pool->attrs, attrs); copy_workqueue_attrs(pool->attrs, attrs);
...@@ -3771,7 +3638,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq) ...@@ -3771,7 +3638,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
spin_lock_irq(&pwq->pool->lock); spin_lock_irq(&pwq->pool->lock);
if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) { /*
* During [un]freezing, the caller is responsible for ensuring that
* this function is called at least once after @workqueue_freezing
* is updated and visible.
*/
if (!freezable || !workqueue_freezing) {
pwq->max_active = wq->saved_max_active; pwq->max_active = wq->saved_max_active;
while (!list_empty(&pwq->delayed_works) && while (!list_empty(&pwq->delayed_works) &&
...@@ -4103,16 +3975,12 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, ...@@ -4103,16 +3975,12 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
* Let's determine what needs to be done. If the target cpumask is * Let's determine what needs to be done. If the target cpumask is
* different from wq's, we need to compare it to @pwq's and create * different from wq's, we need to compare it to @pwq's and create
* a new one if they don't match. If the target cpumask equals * a new one if they don't match. If the target cpumask equals
* wq's, the default pwq should be used. If @pwq is already the * wq's, the default pwq should be used.
* default one, nothing to do; otherwise, install the default one.
*/ */
if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) { if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
goto out_unlock; goto out_unlock;
} else { } else {
if (pwq == wq->dfl_pwq)
goto out_unlock;
else
goto use_dfl_pwq; goto use_dfl_pwq;
} }
...@@ -4121,7 +3989,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu, ...@@ -4121,7 +3989,7 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
/* create a new pwq */ /* create a new pwq */
pwq = alloc_unbound_pwq(wq, target_attrs); pwq = alloc_unbound_pwq(wq, target_attrs);
if (!pwq) { if (!pwq) {
pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
wq->name); wq->name);
mutex_lock(&wq->mutex); mutex_lock(&wq->mutex);
goto use_dfl_pwq; goto use_dfl_pwq;
...@@ -4599,28 +4467,27 @@ static void wq_unbind_fn(struct work_struct *work) ...@@ -4599,28 +4467,27 @@ static void wq_unbind_fn(struct work_struct *work)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
struct worker_pool *pool; struct worker_pool *pool;
struct worker *worker; struct worker *worker;
int wi;
for_each_cpu_worker_pool(pool, cpu) { for_each_cpu_worker_pool(pool, cpu) {
WARN_ON_ONCE(cpu != smp_processor_id()); WARN_ON_ONCE(cpu != smp_processor_id());
mutex_lock(&pool->manager_mutex); mutex_lock(&pool->attach_mutex);
spin_lock_irq(&pool->lock); spin_lock_irq(&pool->lock);
/* /*
* We've blocked all manager operations. Make all workers * We've blocked all attach/detach operations. Make all workers
* unbound and set DISASSOCIATED. Before this, all workers * unbound and set DISASSOCIATED. Before this, all workers
* except for the ones which are still executing works from * except for the ones which are still executing works from
* before the last CPU down must be on the cpu. After * before the last CPU down must be on the cpu. After
* this, they may become diasporas. * this, they may become diasporas.
*/ */
for_each_pool_worker(worker, wi, pool) for_each_pool_worker(worker, pool)
worker->flags |= WORKER_UNBOUND; worker->flags |= WORKER_UNBOUND;
pool->flags |= POOL_DISASSOCIATED; pool->flags |= POOL_DISASSOCIATED;
spin_unlock_irq(&pool->lock); spin_unlock_irq(&pool->lock);
mutex_unlock(&pool->manager_mutex); mutex_unlock(&pool->attach_mutex);
/* /*
* Call schedule() so that we cross rq->lock and thus can * Call schedule() so that we cross rq->lock and thus can
...@@ -4660,9 +4527,8 @@ static void wq_unbind_fn(struct work_struct *work) ...@@ -4660,9 +4527,8 @@ static void wq_unbind_fn(struct work_struct *work)
static void rebind_workers(struct worker_pool *pool) static void rebind_workers(struct worker_pool *pool)
{ {
struct worker *worker; struct worker *worker;
int wi;
lockdep_assert_held(&pool->manager_mutex); lockdep_assert_held(&pool->attach_mutex);
/* /*
* Restore CPU affinity of all workers. As all idle workers should * Restore CPU affinity of all workers. As all idle workers should
...@@ -4671,13 +4537,13 @@ static void rebind_workers(struct worker_pool *pool) ...@@ -4671,13 +4537,13 @@ static void rebind_workers(struct worker_pool *pool)
* of all workers first and then clear UNBOUND. As we're called * of all workers first and then clear UNBOUND. As we're called
* from CPU_ONLINE, the following shouldn't fail. * from CPU_ONLINE, the following shouldn't fail.
*/ */
for_each_pool_worker(worker, wi, pool) for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0); pool->attrs->cpumask) < 0);
spin_lock_irq(&pool->lock); spin_lock_irq(&pool->lock);
for_each_pool_worker(worker, wi, pool) { for_each_pool_worker(worker, pool) {
unsigned int worker_flags = worker->flags; unsigned int worker_flags = worker->flags;
/* /*
...@@ -4729,9 +4595,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) ...@@ -4729,9 +4595,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
{ {
static cpumask_t cpumask; static cpumask_t cpumask;
struct worker *worker; struct worker *worker;
int wi;
lockdep_assert_held(&pool->manager_mutex); lockdep_assert_held(&pool->attach_mutex);
/* is @cpu allowed for @pool? */ /* is @cpu allowed for @pool? */
if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
...@@ -4743,7 +4608,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) ...@@ -4743,7 +4608,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
return; return;
/* as we're called from CPU_ONLINE, the following shouldn't fail */ /* as we're called from CPU_ONLINE, the following shouldn't fail */
for_each_pool_worker(worker, wi, pool) for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
pool->attrs->cpumask) < 0); pool->attrs->cpumask) < 0);
} }
...@@ -4776,7 +4641,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, ...@@ -4776,7 +4641,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
mutex_lock(&wq_pool_mutex); mutex_lock(&wq_pool_mutex);
for_each_pool(pool, pi) { for_each_pool(pool, pi) {
mutex_lock(&pool->manager_mutex); mutex_lock(&pool->attach_mutex);
if (pool->cpu == cpu) { if (pool->cpu == cpu) {
spin_lock_irq(&pool->lock); spin_lock_irq(&pool->lock);
...@@ -4788,7 +4653,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, ...@@ -4788,7 +4653,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
restore_unbound_workers_cpumask(pool, cpu); restore_unbound_workers_cpumask(pool, cpu);
} }
mutex_unlock(&pool->manager_mutex); mutex_unlock(&pool->attach_mutex);
} }
/* update NUMA affinity of unbound workqueues */ /* update NUMA affinity of unbound workqueues */
...@@ -4887,24 +4752,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu); ...@@ -4887,24 +4752,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
*/ */
void freeze_workqueues_begin(void) void freeze_workqueues_begin(void)
{ {
struct worker_pool *pool;
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct pool_workqueue *pwq; struct pool_workqueue *pwq;
int pi;
mutex_lock(&wq_pool_mutex); mutex_lock(&wq_pool_mutex);
WARN_ON_ONCE(workqueue_freezing); WARN_ON_ONCE(workqueue_freezing);
workqueue_freezing = true; workqueue_freezing = true;
/* set FREEZING */
for_each_pool(pool, pi) {
spin_lock_irq(&pool->lock);
WARN_ON_ONCE(pool->flags & POOL_FREEZING);
pool->flags |= POOL_FREEZING;
spin_unlock_irq(&pool->lock);
}
list_for_each_entry(wq, &workqueues, list) { list_for_each_entry(wq, &workqueues, list) {
mutex_lock(&wq->mutex); mutex_lock(&wq->mutex);
for_each_pwq(pwq, wq) for_each_pwq(pwq, wq)
...@@ -4974,21 +4829,13 @@ void thaw_workqueues(void) ...@@ -4974,21 +4829,13 @@ void thaw_workqueues(void)
{ {
struct workqueue_struct *wq; struct workqueue_struct *wq;
struct pool_workqueue *pwq; struct pool_workqueue *pwq;
struct worker_pool *pool;
int pi;
mutex_lock(&wq_pool_mutex); mutex_lock(&wq_pool_mutex);
if (!workqueue_freezing) if (!workqueue_freezing)
goto out_unlock; goto out_unlock;
/* clear FREEZING */ workqueue_freezing = false;
for_each_pool(pool, pi) {
spin_lock_irq(&pool->lock);
WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
pool->flags &= ~POOL_FREEZING;
spin_unlock_irq(&pool->lock);
}
/* restore max_active and repopulate worklist */ /* restore max_active and repopulate worklist */
list_for_each_entry(wq, &workqueues, list) { list_for_each_entry(wq, &workqueues, list) {
...@@ -4998,7 +4845,6 @@ void thaw_workqueues(void) ...@@ -4998,7 +4845,6 @@ void thaw_workqueues(void)
mutex_unlock(&wq->mutex); mutex_unlock(&wq->mutex);
} }
workqueue_freezing = false;
out_unlock: out_unlock:
mutex_unlock(&wq_pool_mutex); mutex_unlock(&wq_pool_mutex);
} }
......
...@@ -37,6 +37,8 @@ struct worker { ...@@ -37,6 +37,8 @@ struct worker {
struct task_struct *task; /* I: worker task */ struct task_struct *task; /* I: worker task */
struct worker_pool *pool; /* I: the associated pool */ struct worker_pool *pool; /* I: the associated pool */
/* L: for rescuers */ /* L: for rescuers */
struct list_head node; /* A: anchored at pool->workers */
/* A: runs through worker->node */
unsigned long last_active; /* L: last active timestamp */ unsigned long last_active; /* L: last active timestamp */
unsigned int flags; /* X: flags */ unsigned int flags; /* X: flags */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment