Commit 67cb104b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue changes from Tejun Heo:
 "A lot of reorganization is going on mostly to prepare for worker pools
  with custom attributes so that workqueue can replace custom pool
  implementations in places including writeback and btrfs and make CPU
  assignment in crypto more flexible.

  workqueue evolved from purely per-cpu design and implementation, so
  there are a lot of assumptions regarding being bound to CPUs and even
  unbound workqueues are implemented as an extension of the model -
  workqueues running on the special unbound CPU.  Bulk of changes this
  round are about promoting worker_pools as the top level abstraction
  replacing global_cwq (global cpu workqueue).  At this point, I'm
  fairly confident about getting custom worker pools working pretty soon
  and ready for the next merge window.

  Lai's patches are replacing the convoluted mb() dancing workqueue has
  been doing with much simpler mechanism which only depends on
  assignment atomicity of long.  For details, please read the commit
  message of 0b3dae68 ("workqueue: simplify is-work-item-queued-here
  test").  While the change ends up adding one pointer to struct
  delayed_work, the inflation in percentage is less than five percent
  and it decouples delayed_work logic a lot more cleaner from usual work
  handling, removes the unusual memory barrier dancing, and allows for
  further simplification, so I think the trade-off is acceptable.

  There will be two more workqueue related pull requests and there are
  some shared commits among them.  I'll write further pull requests
  assuming this pull request is pulled first."

* 'for-3.9' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: (37 commits)
  workqueue: un-GPL function delayed_work_timer_fn()
  workqueue: rename cpu_workqueue to pool_workqueue
  workqueue: reimplement is_chained_work() using current_wq_worker()
  workqueue: fix is_chained_work() regression
  workqueue: pick cwq instead of pool in __queue_work()
  workqueue: make get_work_pool_id() cheaper
  workqueue: move nr_running into worker_pool
  workqueue: cosmetic update in try_to_grab_pending()
  workqueue: simplify is-work-item-queued-here test
  workqueue: make work->data point to pool after try_to_grab_pending()
  workqueue: add delayed_work->wq to simplify reentrancy handling
  workqueue: make work_busy() test WORK_STRUCT_PENDING first
  workqueue: replace WORK_CPU_NONE/LAST with WORK_CPU_END
  workqueue: post global_cwq removal cleanups
  workqueue: rename nr_running variables
  workqueue: remove global_cwq
  workqueue: remove worker_pool->gcwq
  workqueue: replace for_each_worker_pool() with for_each_std_worker_pool()
  workqueue: make freezing/thawing per-pool
  workqueue: make hotplug processing per-pool
  ...
parents 1eaec821 1438ade5
...@@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain); ...@@ -52,4 +52,5 @@ extern void async_synchronize_full_domain(struct async_domain *domain);
extern void async_synchronize_cookie(async_cookie_t cookie); extern void async_synchronize_cookie(async_cookie_t cookie);
extern void async_synchronize_cookie_domain(async_cookie_t cookie, extern void async_synchronize_cookie_domain(async_cookie_t cookie,
struct async_domain *domain); struct async_domain *domain);
extern bool current_is_async(void);
#endif #endif
...@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data); ...@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
enum { enum {
WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */
WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */
WORK_STRUCT_CWQ_BIT = 2, /* data points to cwq */ WORK_STRUCT_PWQ_BIT = 2, /* data points to pwq */
WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */ WORK_STRUCT_LINKED_BIT = 3, /* next work is linked to this one */
#ifdef CONFIG_DEBUG_OBJECTS_WORK #ifdef CONFIG_DEBUG_OBJECTS_WORK
WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */ WORK_STRUCT_STATIC_BIT = 4, /* static initializer (debugobjects) */
...@@ -40,7 +40,7 @@ enum { ...@@ -40,7 +40,7 @@ enum {
WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT,
WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT,
WORK_STRUCT_CWQ = 1 << WORK_STRUCT_CWQ_BIT, WORK_STRUCT_PWQ = 1 << WORK_STRUCT_PWQ_BIT,
WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT, WORK_STRUCT_LINKED = 1 << WORK_STRUCT_LINKED_BIT,
#ifdef CONFIG_DEBUG_OBJECTS_WORK #ifdef CONFIG_DEBUG_OBJECTS_WORK
WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT, WORK_STRUCT_STATIC = 1 << WORK_STRUCT_STATIC_BIT,
...@@ -57,29 +57,36 @@ enum { ...@@ -57,29 +57,36 @@ enum {
/* special cpu IDs */ /* special cpu IDs */
WORK_CPU_UNBOUND = NR_CPUS, WORK_CPU_UNBOUND = NR_CPUS,
WORK_CPU_NONE = NR_CPUS + 1, WORK_CPU_END = NR_CPUS + 1,
WORK_CPU_LAST = WORK_CPU_NONE,
/* /*
* Reserve 7 bits off of cwq pointer w/ debugobjects turned * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
* off. This makes cwqs aligned to 256 bytes and allows 15 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
* workqueue flush colors. * flush colors.
*/ */
WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT + WORK_STRUCT_FLAG_BITS = WORK_STRUCT_COLOR_SHIFT +
WORK_STRUCT_COLOR_BITS, WORK_STRUCT_COLOR_BITS,
/* data contains off-queue information when !WORK_STRUCT_CWQ */ /* data contains off-queue information when !WORK_STRUCT_PWQ */
WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS,
WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
/*
* When a work item is off queue, its high bits point to the last
* pool it was on. Cap at 31 bits and use the highest number to
* indicate that no pool is associated.
*/
WORK_OFFQ_FLAG_BITS = 1, WORK_OFFQ_FLAG_BITS = 1,
WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1,
/* convenience constants */ /* convenience constants */
WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1,
WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
WORK_STRUCT_NO_CPU = (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT, WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
/* bit mask for work_busy() return values */ /* bit mask for work_busy() return values */
WORK_BUSY_PENDING = 1 << 0, WORK_BUSY_PENDING = 1 << 0,
...@@ -95,13 +102,16 @@ struct work_struct { ...@@ -95,13 +102,16 @@ struct work_struct {
#endif #endif
}; };
#define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU) #define WORK_DATA_INIT() ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
#define WORK_DATA_STATIC_INIT() \ #define WORK_DATA_STATIC_INIT() \
ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC) ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
struct delayed_work { struct delayed_work {
struct work_struct work; struct work_struct work;
struct timer_list timer; struct timer_list timer;
/* target workqueue and CPU ->timer uses to queue ->work */
struct workqueue_struct *wq;
int cpu; int cpu;
}; };
...@@ -426,7 +436,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork); ...@@ -426,7 +436,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
extern void workqueue_set_max_active(struct workqueue_struct *wq, extern void workqueue_set_max_active(struct workqueue_struct *wq,
int max_active); int max_active);
extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq); extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
extern unsigned int work_cpu(struct work_struct *work);
extern unsigned int work_busy(struct work_struct *work); extern unsigned int work_busy(struct work_struct *work);
/* /*
......
...@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work, ...@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
/** /**
* workqueue_queue_work - called when a work gets queued * workqueue_queue_work - called when a work gets queued
* @req_cpu: the requested cpu * @req_cpu: the requested cpu
* @cwq: pointer to struct cpu_workqueue_struct * @pwq: pointer to struct pool_workqueue
* @work: pointer to struct work_struct * @work: pointer to struct work_struct
* *
* This event occurs when a work is queued immediately or once a * This event occurs when a work is queued immediately or once a
...@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work, ...@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
*/ */
TRACE_EVENT(workqueue_queue_work, TRACE_EVENT(workqueue_queue_work,
TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq, TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
struct work_struct *work), struct work_struct *work),
TP_ARGS(req_cpu, cwq, work), TP_ARGS(req_cpu, pwq, work),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( void *, work ) __field( void *, work )
...@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work, ...@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
TP_fast_assign( TP_fast_assign(
__entry->work = work; __entry->work = work;
__entry->function = work->func; __entry->function = work->func;
__entry->workqueue = cwq->wq; __entry->workqueue = pwq->wq;
__entry->req_cpu = req_cpu; __entry->req_cpu = req_cpu;
__entry->cpu = cwq->pool->gcwq->cpu; __entry->cpu = pwq->pool->cpu;
), ),
TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u", TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
......
...@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel. ...@@ -57,6 +57,8 @@ asynchronous and synchronous parts of the kernel.
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include "workqueue_internal.h"
static async_cookie_t next_cookie = 1; static async_cookie_t next_cookie = 1;
#define MAX_WORK 32768 #define MAX_WORK 32768
...@@ -353,3 +355,15 @@ void async_synchronize_cookie(async_cookie_t cookie) ...@@ -353,3 +355,15 @@ void async_synchronize_cookie(async_cookie_t cookie)
async_synchronize_cookie_domain(cookie, &async_running); async_synchronize_cookie_domain(cookie, &async_running);
} }
EXPORT_SYMBOL_GPL(async_synchronize_cookie); EXPORT_SYMBOL_GPL(async_synchronize_cookie);
/**
* current_is_async - is %current an async worker task?
*
* Returns %true if %current is an async worker task.
*/
bool current_is_async(void)
{
struct worker *worker = current_wq_worker();
return worker && worker->current_func == async_run_entry_fn;
}
...@@ -83,7 +83,7 @@ ...@@ -83,7 +83,7 @@
#endif #endif
#include "sched.h" #include "sched.h"
#include "../workqueue_sched.h" #include "../workqueue_internal.h"
#include "../smpboot.h" #include "../smpboot.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
......
...@@ -41,32 +41,31 @@ ...@@ -41,32 +41,31 @@
#include <linux/debug_locks.h> #include <linux/debug_locks.h>
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/hashtable.h>
#include "workqueue_sched.h" #include "workqueue_internal.h"
enum { enum {
/* /*
* global_cwq flags * worker_pool flags
* *
* A bound gcwq is either associated or disassociated with its CPU. * A bound pool is either associated or disassociated with its CPU.
* While associated (!DISASSOCIATED), all workers are bound to the * While associated (!DISASSOCIATED), all workers are bound to the
* CPU and none has %WORKER_UNBOUND set and concurrency management * CPU and none has %WORKER_UNBOUND set and concurrency management
* is in effect. * is in effect.
* *
* While DISASSOCIATED, the cpu may be offline and all workers have * While DISASSOCIATED, the cpu may be offline and all workers have
* %WORKER_UNBOUND set and concurrency management disabled, and may * %WORKER_UNBOUND set and concurrency management disabled, and may
* be executing on any CPU. The gcwq behaves as an unbound one. * be executing on any CPU. The pool behaves as an unbound one.
* *
* Note that DISASSOCIATED can be flipped only while holding * Note that DISASSOCIATED can be flipped only while holding
* assoc_mutex of all pools on the gcwq to avoid changing binding * assoc_mutex to avoid changing binding state while
* state while create_worker() is in progress. * create_worker() is in progress.
*/ */
GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */
GCWQ_FREEZING = 1 << 1, /* freeze in progress */
/* pool flags */
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */ POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
POOL_FREEZING = 1 << 3, /* freeze in progress */
/* worker flags */ /* worker flags */
WORKER_STARTED = 1 << 0, /* started */ WORKER_STARTED = 1 << 0, /* started */
...@@ -79,11 +78,9 @@ enum { ...@@ -79,11 +78,9 @@ enum {
WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND |
WORKER_CPU_INTENSIVE, WORKER_CPU_INTENSIVE,
NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */
BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */
BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER,
BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1,
MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */
IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */
...@@ -111,48 +108,24 @@ enum { ...@@ -111,48 +108,24 @@ enum {
* P: Preemption protected. Disabling preemption is enough and should * P: Preemption protected. Disabling preemption is enough and should
* only be modified and accessed from the local cpu. * only be modified and accessed from the local cpu.
* *
* L: gcwq->lock protected. Access with gcwq->lock held. * L: pool->lock protected. Access with pool->lock held.
* *
* X: During normal operation, modification requires gcwq->lock and * X: During normal operation, modification requires pool->lock and should
* should be done only from local cpu. Either disabling preemption * be done only from local cpu. Either disabling preemption on local
* on local cpu or grabbing gcwq->lock is enough for read access. * cpu or grabbing pool->lock is enough for read access. If
* If GCWQ_DISASSOCIATED is set, it's identical to L. * POOL_DISASSOCIATED is set, it's identical to L.
* *
* F: wq->flush_mutex protected. * F: wq->flush_mutex protected.
* *
* W: workqueue_lock protected. * W: workqueue_lock protected.
*/ */
struct global_cwq; /* struct worker is defined in workqueue_internal.h */
struct worker_pool;
/*
* The poor guys doing the actual heavy lifting. All on-duty workers
* are either serving the manager role, on idle list or on busy hash.
*/
struct worker {
/* on idle list while idle, on busy hash table while busy */
union {
struct list_head entry; /* L: while idle */
struct hlist_node hentry; /* L: while busy */
};
struct work_struct *current_work; /* L: work being processed */
struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
struct list_head scheduled; /* L: scheduled works */
struct task_struct *task; /* I: worker task */
struct worker_pool *pool; /* I: the associated pool */
/* 64 bytes boundary on 64bit, 32 on 32bit */
unsigned long last_active; /* L: last active timestamp */
unsigned int flags; /* X: flags */
int id; /* I: worker id */
/* for rebinding worker to CPU */
struct work_struct rebind_work; /* L: for busy worker */
};
struct worker_pool { struct worker_pool {
struct global_cwq *gcwq; /* I: the owning gcwq */ spinlock_t lock; /* the pool lock */
unsigned int cpu; /* I: the associated cpu */
int id; /* I: pool ID */
unsigned int flags; /* X: flags */ unsigned int flags; /* X: flags */
struct list_head worklist; /* L: list of pending works */ struct list_head worklist; /* L: list of pending works */
...@@ -165,34 +138,28 @@ struct worker_pool { ...@@ -165,34 +138,28 @@ struct worker_pool {
struct timer_list idle_timer; /* L: worker idle timeout */ struct timer_list idle_timer; /* L: worker idle timeout */
struct timer_list mayday_timer; /* L: SOS timer for workers */ struct timer_list mayday_timer; /* L: SOS timer for workers */
struct mutex assoc_mutex; /* protect GCWQ_DISASSOCIATED */ /* workers are chained either in busy_hash or idle_list */
DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER);
/* L: hash of busy workers */
struct mutex assoc_mutex; /* protect POOL_DISASSOCIATED */
struct ida worker_ida; /* L: for worker IDs */ struct ida worker_ida; /* L: for worker IDs */
};
/* /*
* Global per-cpu workqueue. There's one and only one for each cpu * The current concurrency level. As it's likely to be accessed
* and all works are queued and processed here regardless of their * from other CPUs during try_to_wake_up(), put it in a separate
* target workqueues. * cacheline.
*/ */
struct global_cwq { atomic_t nr_running ____cacheline_aligned_in_smp;
spinlock_t lock; /* the gcwq lock */
unsigned int cpu; /* I: the associated cpu */
unsigned int flags; /* L: GCWQ_* flags */
/* workers are chained either in busy_hash or pool idle_list */
struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE];
/* L: hash of busy workers */
struct worker_pool pools[NR_WORKER_POOLS];
/* normal and highpri pools */
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
/* /*
* The per-CPU workqueue. The lower WORK_STRUCT_FLAG_BITS of * The per-pool workqueue. While queued, the lower WORK_STRUCT_FLAG_BITS
* work_struct->data are used for flags and thus cwqs need to be * of work_struct->data are used for flags and the remaining high bits
* aligned at two's power of the number of flag bits. * point to the pwq; thus, pwqs need to be aligned at two's power of the
* number of flag bits.
*/ */
struct cpu_workqueue_struct { struct pool_workqueue {
struct worker_pool *pool; /* I: the associated pool */ struct worker_pool *pool; /* I: the associated pool */
struct workqueue_struct *wq; /* I: the owning workqueue */ struct workqueue_struct *wq; /* I: the owning workqueue */
int work_color; /* L: current color */ int work_color; /* L: current color */
...@@ -241,16 +208,16 @@ typedef unsigned long mayday_mask_t; ...@@ -241,16 +208,16 @@ typedef unsigned long mayday_mask_t;
struct workqueue_struct { struct workqueue_struct {
unsigned int flags; /* W: WQ_* flags */ unsigned int flags; /* W: WQ_* flags */
union { union {
struct cpu_workqueue_struct __percpu *pcpu; struct pool_workqueue __percpu *pcpu;
struct cpu_workqueue_struct *single; struct pool_workqueue *single;
unsigned long v; unsigned long v;
} cpu_wq; /* I: cwq's */ } pool_wq; /* I: pwq's */
struct list_head list; /* W: list of all workqueues */ struct list_head list; /* W: list of all workqueues */
struct mutex flush_mutex; /* protects wq flushing */ struct mutex flush_mutex; /* protects wq flushing */
int work_color; /* F: current work color */ int work_color; /* F: current work color */
int flush_color; /* F: current flush color */ int flush_color; /* F: current flush color */
atomic_t nr_cwqs_to_flush; /* flush in progress */ atomic_t nr_pwqs_to_flush; /* flush in progress */
struct wq_flusher *first_flusher; /* F: first flusher */ struct wq_flusher *first_flusher; /* F: first flusher */
struct list_head flusher_queue; /* F: flush waiters */ struct list_head flusher_queue; /* F: flush waiters */
struct list_head flusher_overflow; /* F: flush overflow list */ struct list_head flusher_overflow; /* F: flush overflow list */
...@@ -259,7 +226,7 @@ struct workqueue_struct { ...@@ -259,7 +226,7 @@ struct workqueue_struct {
struct worker *rescuer; /* I: rescue worker */ struct worker *rescuer; /* I: rescue worker */
int nr_drainers; /* W: drain in progress */ int nr_drainers; /* W: drain in progress */
int saved_max_active; /* W: saved cwq max_active */ int saved_max_active; /* W: saved pwq max_active */
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
struct lockdep_map lockdep_map; struct lockdep_map lockdep_map;
#endif #endif
...@@ -280,15 +247,14 @@ EXPORT_SYMBOL_GPL(system_freezable_wq); ...@@ -280,15 +247,14 @@ EXPORT_SYMBOL_GPL(system_freezable_wq);
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/workqueue.h> #include <trace/events/workqueue.h>
#define for_each_worker_pool(pool, gcwq) \ #define for_each_std_worker_pool(pool, cpu) \
for ((pool) = &(gcwq)->pools[0]; \ for ((pool) = &std_worker_pools(cpu)[0]; \
(pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) (pool) < &std_worker_pools(cpu)[NR_STD_WORKER_POOLS]; (pool)++)
#define for_each_busy_worker(worker, i, pos, gcwq) \ #define for_each_busy_worker(worker, i, pos, pool) \
for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ hash_for_each(pool->busy_hash, i, pos, worker, hentry)
hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
unsigned int sw) unsigned int sw)
{ {
if (cpu < nr_cpu_ids) { if (cpu < nr_cpu_ids) {
...@@ -300,42 +266,42 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask, ...@@ -300,42 +266,42 @@ static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
if (sw & 2) if (sw & 2)
return WORK_CPU_UNBOUND; return WORK_CPU_UNBOUND;
} }
return WORK_CPU_NONE; return WORK_CPU_END;
} }
static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
struct workqueue_struct *wq) struct workqueue_struct *wq)
{ {
return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2); return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
} }
/* /*
* CPU iterators * CPU iterators
* *
* An extra gcwq is defined for an invalid cpu number * An extra cpu number is defined using an invalid cpu number
* (WORK_CPU_UNBOUND) to host workqueues which are not bound to any * (WORK_CPU_UNBOUND) to host workqueues which are not bound to any
* specific CPU. The following iterators are similar to * specific CPU. The following iterators are similar to for_each_*_cpu()
* for_each_*_cpu() iterators but also considers the unbound gcwq. * iterators but also considers the unbound CPU.
* *
* for_each_gcwq_cpu() : possible CPUs + WORK_CPU_UNBOUND * for_each_wq_cpu() : possible CPUs + WORK_CPU_UNBOUND
* for_each_online_gcwq_cpu() : online CPUs + WORK_CPU_UNBOUND * for_each_online_wq_cpu() : online CPUs + WORK_CPU_UNBOUND
* for_each_cwq_cpu() : possible CPUs for bound workqueues, * for_each_pwq_cpu() : possible CPUs for bound workqueues,
* WORK_CPU_UNBOUND for unbound workqueues * WORK_CPU_UNBOUND for unbound workqueues
*/ */
#define for_each_gcwq_cpu(cpu) \ #define for_each_wq_cpu(cpu) \
for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3); \ for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3); \
(cpu) < WORK_CPU_NONE; \ (cpu) < WORK_CPU_END; \
(cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3)) (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
#define for_each_online_gcwq_cpu(cpu) \ #define for_each_online_wq_cpu(cpu) \
for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3); \ for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3); \
(cpu) < WORK_CPU_NONE; \ (cpu) < WORK_CPU_END; \
(cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3)) (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
#define for_each_cwq_cpu(cpu, wq) \ #define for_each_pwq_cpu(cpu, wq) \
for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq)); \ for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq)); \
(cpu) < WORK_CPU_NONE; \ (cpu) < WORK_CPU_END; \
(cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
#ifdef CONFIG_DEBUG_OBJECTS_WORK #ifdef CONFIG_DEBUG_OBJECTS_WORK
...@@ -459,57 +425,69 @@ static LIST_HEAD(workqueues); ...@@ -459,57 +425,69 @@ static LIST_HEAD(workqueues);
static bool workqueue_freezing; /* W: have wqs started freezing? */ static bool workqueue_freezing; /* W: have wqs started freezing? */
/* /*
* The almighty global cpu workqueues. nr_running is the only field * The CPU and unbound standard worker pools. The unbound ones have
* which is expected to be used frequently by other cpus via * POOL_DISASSOCIATED set, and their workers have WORKER_UNBOUND set.
* try_to_wake_up(). Put it in a separate cacheline.
*/ */
static DEFINE_PER_CPU(struct global_cwq, global_cwq); static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]); cpu_std_worker_pools);
static struct worker_pool unbound_std_worker_pools[NR_STD_WORKER_POOLS];
/* /* idr of all pools */
* Global cpu workqueue and nr_running counter for unbound gcwq. The static DEFINE_MUTEX(worker_pool_idr_mutex);
* gcwq is always online, has GCWQ_DISASSOCIATED set, and all its static DEFINE_IDR(worker_pool_idr);
* workers have WORKER_UNBOUND set.
*/
static struct global_cwq unbound_global_cwq;
static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
[0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0), /* always 0 */
};
static int worker_thread(void *__worker); static int worker_thread(void *__worker);
static int worker_pool_pri(struct worker_pool *pool) static struct worker_pool *std_worker_pools(int cpu)
{ {
return pool - pool->gcwq->pools; if (cpu != WORK_CPU_UNBOUND)
return per_cpu(cpu_std_worker_pools, cpu);
else
return unbound_std_worker_pools;
} }
static struct global_cwq *get_gcwq(unsigned int cpu) static int std_worker_pool_pri(struct worker_pool *pool)
{ {
if (cpu != WORK_CPU_UNBOUND) return pool - std_worker_pools(pool->cpu);
return &per_cpu(global_cwq, cpu);
else
return &unbound_global_cwq;
} }
static atomic_t *get_pool_nr_running(struct worker_pool *pool) /* allocate ID and assign it to @pool */
static int worker_pool_assign_id(struct worker_pool *pool)
{ {
int cpu = pool->gcwq->cpu; int ret;
int idx = worker_pool_pri(pool);
if (cpu != WORK_CPU_UNBOUND) mutex_lock(&worker_pool_idr_mutex);
return &per_cpu(pool_nr_running, cpu)[idx]; idr_pre_get(&worker_pool_idr, GFP_KERNEL);
else ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
return &unbound_pool_nr_running[idx]; mutex_unlock(&worker_pool_idr_mutex);
return ret;
} }
static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, /*
* Lookup worker_pool by id. The idr currently is built during boot and
* never modified. Don't worry about locking for now.
*/
static struct worker_pool *worker_pool_by_id(int pool_id)
{
return idr_find(&worker_pool_idr, pool_id);
}
static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
{
struct worker_pool *pools = std_worker_pools(cpu);
return &pools[highpri];
}
static struct pool_workqueue *get_pwq(unsigned int cpu,
struct workqueue_struct *wq) struct workqueue_struct *wq)
{ {
if (!(wq->flags & WQ_UNBOUND)) { if (!(wq->flags & WQ_UNBOUND)) {
if (likely(cpu < nr_cpu_ids)) if (likely(cpu < nr_cpu_ids))
return per_cpu_ptr(wq->cpu_wq.pcpu, cpu); return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
} else if (likely(cpu == WORK_CPU_UNBOUND)) } else if (likely(cpu == WORK_CPU_UNBOUND))
return wq->cpu_wq.single; return wq->pool_wq.single;
return NULL; return NULL;
} }
...@@ -530,19 +508,19 @@ static int work_next_color(int color) ...@@ -530,19 +508,19 @@ static int work_next_color(int color)
} }
/* /*
* While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
* contain the pointer to the queued cwq. Once execution starts, the flag * contain the pointer to the queued pwq. Once execution starts, the flag
* is cleared and the high bits contain OFFQ flags and CPU number. * is cleared and the high bits contain OFFQ flags and pool ID.
* *
* set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling() * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
* and clear_work_data() can be used to set the cwq, cpu or clear * and clear_work_data() can be used to set the pwq, pool or clear
* work->data. These functions should only be called while the work is * work->data. These functions should only be called while the work is
* owned - ie. while the PENDING bit is set. * owned - ie. while the PENDING bit is set.
* *
* get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
* a work. gcwq is available once the work has been queued anywhere after * corresponding to a work. Pool is available once the work has been
* initialization until it is sync canceled. cwq is available only while * queued anywhere after initialization until it is sync canceled. pwq is
* the work item is queued. * available only while the work item is queued.
* *
* %WORK_OFFQ_CANCELING is used to mark a work item which is being * %WORK_OFFQ_CANCELING is used to mark a work item which is being
* canceled. While being canceled, a work item may have its PENDING set * canceled. While being canceled, a work item may have its PENDING set
...@@ -556,16 +534,22 @@ static inline void set_work_data(struct work_struct *work, unsigned long data, ...@@ -556,16 +534,22 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
atomic_long_set(&work->data, data | flags | work_static(work)); atomic_long_set(&work->data, data | flags | work_static(work));
} }
static void set_work_cwq(struct work_struct *work, static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
struct cpu_workqueue_struct *cwq,
unsigned long extra_flags) unsigned long extra_flags)
{ {
set_work_data(work, (unsigned long)cwq, set_work_data(work, (unsigned long)pwq,
WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
} }
static void set_work_cpu_and_clear_pending(struct work_struct *work, static void set_work_pool_and_keep_pending(struct work_struct *work,
unsigned int cpu) int pool_id)
{
set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT,
WORK_STRUCT_PENDING);
}
static void set_work_pool_and_clear_pending(struct work_struct *work,
int pool_id)
{ {
/* /*
* The following wmb is paired with the implied mb in * The following wmb is paired with the implied mb in
...@@ -574,67 +558,92 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work, ...@@ -574,67 +558,92 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
* owner. * owner.
*/ */
smp_wmb(); smp_wmb();
set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0); set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
} }
static void clear_work_data(struct work_struct *work) static void clear_work_data(struct work_struct *work)
{ {
smp_wmb(); /* see set_work_cpu_and_clear_pending() */ smp_wmb(); /* see set_work_pool_and_clear_pending() */
set_work_data(work, WORK_STRUCT_NO_CPU, 0); set_work_data(work, WORK_STRUCT_NO_POOL, 0);
} }
static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work) static struct pool_workqueue *get_work_pwq(struct work_struct *work)
{ {
unsigned long data = atomic_long_read(&work->data); unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_CWQ) if (data & WORK_STRUCT_PWQ)
return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
else else
return NULL; return NULL;
} }
static struct global_cwq *get_work_gcwq(struct work_struct *work) /**
* get_work_pool - return the worker_pool a given work was associated with
* @work: the work item of interest
*
* Return the worker_pool @work was last associated with. %NULL if none.
*/
static struct worker_pool *get_work_pool(struct work_struct *work)
{ {
unsigned long data = atomic_long_read(&work->data); unsigned long data = atomic_long_read(&work->data);
unsigned int cpu; struct worker_pool *pool;
int pool_id;
if (data & WORK_STRUCT_CWQ) if (data & WORK_STRUCT_PWQ)
return ((struct cpu_workqueue_struct *) return ((struct pool_workqueue *)
(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; (data & WORK_STRUCT_WQ_DATA_MASK))->pool;
cpu = data >> WORK_OFFQ_CPU_SHIFT; pool_id = data >> WORK_OFFQ_POOL_SHIFT;
if (cpu == WORK_CPU_NONE) if (pool_id == WORK_OFFQ_POOL_NONE)
return NULL; return NULL;
BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND); pool = worker_pool_by_id(pool_id);
return get_gcwq(cpu); WARN_ON_ONCE(!pool);
return pool;
}
/**
* get_work_pool_id - return the worker pool ID a given work is associated with
* @work: the work item of interest
*
* Return the worker_pool ID @work was last associated with.
* %WORK_OFFQ_POOL_NONE if none.
*/
static int get_work_pool_id(struct work_struct *work)
{
unsigned long data = atomic_long_read(&work->data);
if (data & WORK_STRUCT_PWQ)
return ((struct pool_workqueue *)
(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
return data >> WORK_OFFQ_POOL_SHIFT;
} }
static void mark_work_canceling(struct work_struct *work) static void mark_work_canceling(struct work_struct *work)
{ {
struct global_cwq *gcwq = get_work_gcwq(work); unsigned long pool_id = get_work_pool_id(work);
unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING, pool_id <<= WORK_OFFQ_POOL_SHIFT;
WORK_STRUCT_PENDING); set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
} }
static bool work_is_canceling(struct work_struct *work) static bool work_is_canceling(struct work_struct *work)
{ {
unsigned long data = atomic_long_read(&work->data); unsigned long data = atomic_long_read(&work->data);
return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING); return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
} }
/* /*
* Policy functions. These define the policies on how the global worker * Policy functions. These define the policies on how the global worker
* pools are managed. Unless noted otherwise, these functions assume that * pools are managed. Unless noted otherwise, these functions assume that
* they're being called with gcwq->lock held. * they're being called with pool->lock held.
*/ */
static bool __need_more_worker(struct worker_pool *pool) static bool __need_more_worker(struct worker_pool *pool)
{ {
return !atomic_read(get_pool_nr_running(pool)); return !atomic_read(&pool->nr_running);
} }
/* /*
...@@ -642,7 +651,7 @@ static bool __need_more_worker(struct worker_pool *pool) ...@@ -642,7 +651,7 @@ static bool __need_more_worker(struct worker_pool *pool)
* running workers. * running workers.
* *
* Note that, because unbound workers never contribute to nr_running, this * Note that, because unbound workers never contribute to nr_running, this
* function will always return %true for unbound gcwq as long as the * function will always return %true for unbound pools as long as the
* worklist isn't empty. * worklist isn't empty.
*/ */
static bool need_more_worker(struct worker_pool *pool) static bool need_more_worker(struct worker_pool *pool)
...@@ -659,9 +668,8 @@ static bool may_start_working(struct worker_pool *pool) ...@@ -659,9 +668,8 @@ static bool may_start_working(struct worker_pool *pool)
/* Do I need to keep working? Called from currently running workers. */ /* Do I need to keep working? Called from currently running workers. */
static bool keep_working(struct worker_pool *pool) static bool keep_working(struct worker_pool *pool)
{ {
atomic_t *nr_running = get_pool_nr_running(pool); return !list_empty(&pool->worklist) &&
atomic_read(&pool->nr_running) <= 1;
return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1;
} }
/* Do we need a new worker? Called from manager. */ /* Do we need a new worker? Called from manager. */
...@@ -714,7 +722,7 @@ static struct worker *first_worker(struct worker_pool *pool) ...@@ -714,7 +722,7 @@ static struct worker *first_worker(struct worker_pool *pool)
* Wake up the first idle worker of @pool. * Wake up the first idle worker of @pool.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void wake_up_worker(struct worker_pool *pool) static void wake_up_worker(struct worker_pool *pool)
{ {
...@@ -740,8 +748,8 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) ...@@ -740,8 +748,8 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
struct worker *worker = kthread_data(task); struct worker *worker = kthread_data(task);
if (!(worker->flags & WORKER_NOT_RUNNING)) { if (!(worker->flags & WORKER_NOT_RUNNING)) {
WARN_ON_ONCE(worker->pool->gcwq->cpu != cpu); WARN_ON_ONCE(worker->pool->cpu != cpu);
atomic_inc(get_pool_nr_running(worker->pool)); atomic_inc(&worker->pool->nr_running);
} }
} }
...@@ -764,12 +772,18 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, ...@@ -764,12 +772,18 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
unsigned int cpu) unsigned int cpu)
{ {
struct worker *worker = kthread_data(task), *to_wakeup = NULL; struct worker *worker = kthread_data(task), *to_wakeup = NULL;
struct worker_pool *pool = worker->pool; struct worker_pool *pool;
atomic_t *nr_running = get_pool_nr_running(pool);
/*
* Rescuers, which may not have all the fields set up like normal
* workers, also reach here, let's not access anything before
* checking NOT_RUNNING.
*/
if (worker->flags & WORKER_NOT_RUNNING) if (worker->flags & WORKER_NOT_RUNNING)
return NULL; return NULL;
pool = worker->pool;
/* this can only happen on the local cpu */ /* this can only happen on the local cpu */
BUG_ON(cpu != raw_smp_processor_id()); BUG_ON(cpu != raw_smp_processor_id());
...@@ -781,10 +795,11 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, ...@@ -781,10 +795,11 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
* NOT_RUNNING is clear. This means that we're bound to and * NOT_RUNNING is clear. This means that we're bound to and
* running on the local cpu w/ rq lock held and preemption * running on the local cpu w/ rq lock held and preemption
* disabled, which in turn means that none else could be * disabled, which in turn means that none else could be
* manipulating idle_list, so dereferencing idle_list without gcwq * manipulating idle_list, so dereferencing idle_list without pool
* lock is safe. * lock is safe.
*/ */
if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist)) if (atomic_dec_and_test(&pool->nr_running) &&
!list_empty(&pool->worklist))
to_wakeup = first_worker(pool); to_wakeup = first_worker(pool);
return to_wakeup ? to_wakeup->task : NULL; return to_wakeup ? to_wakeup->task : NULL;
} }
...@@ -800,7 +815,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, ...@@ -800,7 +815,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task,
* woken up. * woken up.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock) * spin_lock_irq(pool->lock)
*/ */
static inline void worker_set_flags(struct worker *worker, unsigned int flags, static inline void worker_set_flags(struct worker *worker, unsigned int flags,
bool wakeup) bool wakeup)
...@@ -816,14 +831,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, ...@@ -816,14 +831,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
*/ */
if ((flags & WORKER_NOT_RUNNING) && if ((flags & WORKER_NOT_RUNNING) &&
!(worker->flags & WORKER_NOT_RUNNING)) { !(worker->flags & WORKER_NOT_RUNNING)) {
atomic_t *nr_running = get_pool_nr_running(pool);
if (wakeup) { if (wakeup) {
if (atomic_dec_and_test(nr_running) && if (atomic_dec_and_test(&pool->nr_running) &&
!list_empty(&pool->worklist)) !list_empty(&pool->worklist))
wake_up_worker(pool); wake_up_worker(pool);
} else } else
atomic_dec(nr_running); atomic_dec(&pool->nr_running);
} }
worker->flags |= flags; worker->flags |= flags;
...@@ -837,7 +850,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, ...@@ -837,7 +850,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags,
* Clear @flags in @worker->flags and adjust nr_running accordingly. * Clear @flags in @worker->flags and adjust nr_running accordingly.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock) * spin_lock_irq(pool->lock)
*/ */
static inline void worker_clr_flags(struct worker *worker, unsigned int flags) static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
{ {
...@@ -855,87 +868,56 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) ...@@ -855,87 +868,56 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
*/ */
if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
if (!(worker->flags & WORKER_NOT_RUNNING)) if (!(worker->flags & WORKER_NOT_RUNNING))
atomic_inc(get_pool_nr_running(pool)); atomic_inc(&pool->nr_running);
} }
/** /**
* busy_worker_head - return the busy hash head for a work * find_worker_executing_work - find worker which is executing a work
* @gcwq: gcwq of interest * @pool: pool of interest
* @work: work to be hashed
*
* Return hash head of @gcwq for @work.
*
* CONTEXT:
* spin_lock_irq(gcwq->lock).
*
* RETURNS:
* Pointer to the hash head.
*/
static struct hlist_head *busy_worker_head(struct global_cwq *gcwq,
struct work_struct *work)
{
const int base_shift = ilog2(sizeof(struct work_struct));
unsigned long v = (unsigned long)work;
/* simple shift and fold hash, do we need something better? */
v >>= base_shift;
v += v >> BUSY_WORKER_HASH_ORDER;
v &= BUSY_WORKER_HASH_MASK;
return &gcwq->busy_hash[v];
}
/**
* __find_worker_executing_work - find worker which is executing a work
* @gcwq: gcwq of interest
* @bwh: hash head as returned by busy_worker_head()
* @work: work to find worker for * @work: work to find worker for
* *
* Find a worker which is executing @work on @gcwq. @bwh should be * Find a worker which is executing @work on @pool by searching
* the hash head obtained by calling busy_worker_head() with the same * @pool->busy_hash which is keyed by the address of @work. For a worker
* work. * to match, its current execution should match the address of @work and
* its work function. This is to avoid unwanted dependency between
* unrelated work executions through a work item being recycled while still
* being executed.
*
* This is a bit tricky. A work item may be freed once its execution
* starts and nothing prevents the freed area from being recycled for
* another work item. If the same work item address ends up being reused
* before the original execution finishes, workqueue will identify the
* recycled work item as currently executing and make it wait until the
* current execution finishes, introducing an unwanted dependency.
*
* This function checks the work item address, work function and workqueue
* to avoid false positives. Note that this isn't complete as one may
* construct a work function which can introduce dependency onto itself
* through a recycled work item. Well, if somebody wants to shoot oneself
* in the foot that badly, there's only so much we can do, and if such
* deadlock actually occurs, it should be easy to locate the culprit work
* function.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
* *
* RETURNS: * RETURNS:
* Pointer to worker which is executing @work if found, NULL * Pointer to worker which is executing @work if found, NULL
* otherwise. * otherwise.
*/ */
static struct worker *__find_worker_executing_work(struct global_cwq *gcwq, static struct worker *find_worker_executing_work(struct worker_pool *pool,
struct hlist_head *bwh,
struct work_struct *work) struct work_struct *work)
{ {
struct worker *worker; struct worker *worker;
struct hlist_node *tmp; struct hlist_node *tmp;
hlist_for_each_entry(worker, tmp, bwh, hentry) hash_for_each_possible(pool->busy_hash, worker, tmp, hentry,
if (worker->current_work == work) (unsigned long)work)
if (worker->current_work == work &&
worker->current_func == work->func)
return worker; return worker;
return NULL;
}
/** return NULL;
* find_worker_executing_work - find worker which is executing a work
* @gcwq: gcwq of interest
* @work: work to find worker for
*
* Find a worker which is executing @work on @gcwq. This function is
* identical to __find_worker_executing_work() except that this
* function calculates @bwh itself.
*
* CONTEXT:
* spin_lock_irq(gcwq->lock).
*
* RETURNS:
* Pointer to worker which is executing @work if found, NULL
* otherwise.
*/
static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
struct work_struct *work)
{
return __find_worker_executing_work(gcwq, busy_worker_head(gcwq, work),
work);
} }
/** /**
...@@ -953,7 +935,7 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq, ...@@ -953,7 +935,7 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
* nested inside outer list_for_each_entry_safe(). * nested inside outer list_for_each_entry_safe().
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void move_linked_works(struct work_struct *work, struct list_head *head, static void move_linked_works(struct work_struct *work, struct list_head *head,
struct work_struct **nextp) struct work_struct **nextp)
...@@ -979,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head, ...@@ -979,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
*nextp = n; *nextp = n;
} }
static void cwq_activate_delayed_work(struct work_struct *work) static void pwq_activate_delayed_work(struct work_struct *work)
{ {
struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct pool_workqueue *pwq = get_work_pwq(work);
trace_workqueue_activate_work(work); trace_workqueue_activate_work(work);
move_linked_works(work, &cwq->pool->worklist, NULL); move_linked_works(work, &pwq->pool->worklist, NULL);
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
cwq->nr_active++; pwq->nr_active++;
} }
static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
{ {
struct work_struct *work = list_first_entry(&cwq->delayed_works, struct work_struct *work = list_first_entry(&pwq->delayed_works,
struct work_struct, entry); struct work_struct, entry);
cwq_activate_delayed_work(work); pwq_activate_delayed_work(work);
} }
/** /**
* cwq_dec_nr_in_flight - decrement cwq's nr_in_flight * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
* @cwq: cwq of interest * @pwq: pwq of interest
* @color: color of work which left the queue * @color: color of work which left the queue
* *
* A work either has completed or is removed from pending queue, * A work either has completed or is removed from pending queue,
* decrement nr_in_flight of its cwq and handle workqueue flushing. * decrement nr_in_flight of its pwq and handle workqueue flushing.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
{ {
/* ignore uncolored works */ /* ignore uncolored works */
if (color == WORK_NO_COLOR) if (color == WORK_NO_COLOR)
return; return;
cwq->nr_in_flight[color]--; pwq->nr_in_flight[color]--;
cwq->nr_active--; pwq->nr_active--;
if (!list_empty(&cwq->delayed_works)) { if (!list_empty(&pwq->delayed_works)) {
/* one down, submit a delayed one */ /* one down, submit a delayed one */
if (cwq->nr_active < cwq->max_active) if (pwq->nr_active < pwq->max_active)
cwq_activate_first_delayed(cwq); pwq_activate_first_delayed(pwq);
} }
/* is flush in progress and are we at the flushing tip? */ /* is flush in progress and are we at the flushing tip? */
if (likely(cwq->flush_color != color)) if (likely(pwq->flush_color != color))
return; return;
/* are there still in-flight works? */ /* are there still in-flight works? */
if (cwq->nr_in_flight[color]) if (pwq->nr_in_flight[color])
return; return;
/* this cwq is done, clear flush_color */ /* this pwq is done, clear flush_color */
cwq->flush_color = -1; pwq->flush_color = -1;
/* /*
* If this was the last cwq, wake up the first flusher. It * If this was the last pwq, wake up the first flusher. It
* will handle the rest. * will handle the rest.
*/ */
if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush)) if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
complete(&cwq->wq->first_flusher->done); complete(&pwq->wq->first_flusher->done);
} }
/** /**
...@@ -1070,7 +1052,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color) ...@@ -1070,7 +1052,8 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
static int try_to_grab_pending(struct work_struct *work, bool is_dwork, static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
unsigned long *flags) unsigned long *flags)
{ {
struct global_cwq *gcwq; struct worker_pool *pool;
struct pool_workqueue *pwq;
local_irq_save(*flags); local_irq_save(*flags);
...@@ -1095,41 +1078,43 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, ...@@ -1095,41 +1078,43 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
* The queueing is in progress, or it is already queued. Try to * The queueing is in progress, or it is already queued. Try to
* steal it from ->worklist without clearing WORK_STRUCT_PENDING. * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
*/ */
gcwq = get_work_gcwq(work); pool = get_work_pool(work);
if (!gcwq) if (!pool)
goto fail; goto fail;
spin_lock(&gcwq->lock); spin_lock(&pool->lock);
if (!list_empty(&work->entry)) {
/* /*
* This work is queued, but perhaps we locked the wrong gcwq. * work->data is guaranteed to point to pwq only while the work
* In that case we must see the new value after rmb(), see * item is queued on pwq->wq, and both updating work->data to point
* insert_work()->wmb(). * to pwq on queueing and to pool on dequeueing are done under
* pwq->pool->lock. This in turn guarantees that, if work->data
* points to pwq which is associated with a locked pool, the work
* item is currently queued on that pool.
*/ */
smp_rmb(); pwq = get_work_pwq(work);
if (gcwq == get_work_gcwq(work)) { if (pwq && pwq->pool == pool) {
debug_work_deactivate(work); debug_work_deactivate(work);
/* /*
* A delayed work item cannot be grabbed directly * A delayed work item cannot be grabbed directly because
* because it might have linked NO_COLOR work items * it might have linked NO_COLOR work items which, if left
* which, if left on the delayed_list, will confuse * on the delayed_list, will confuse pwq->nr_active
* cwq->nr_active management later on and cause * management later on and cause stall. Make sure the work
* stall. Make sure the work item is activated * item is activated before grabbing.
* before grabbing.
*/ */
if (*work_data_bits(work) & WORK_STRUCT_DELAYED) if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
cwq_activate_delayed_work(work); pwq_activate_delayed_work(work);
list_del_init(&work->entry); list_del_init(&work->entry);
cwq_dec_nr_in_flight(get_work_cwq(work), pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
get_work_color(work));
spin_unlock(&gcwq->lock); /* work->data points to pwq iff queued, point to pool */
set_work_pool_and_keep_pending(work, pool->id);
spin_unlock(&pool->lock);
return 1; return 1;
} }
} spin_unlock(&pool->lock);
spin_unlock(&gcwq->lock);
fail: fail:
local_irq_restore(*flags); local_irq_restore(*flags);
if (work_is_canceling(work)) if (work_is_canceling(work))
...@@ -1139,33 +1124,25 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork, ...@@ -1139,33 +1124,25 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
} }
/** /**
* insert_work - insert a work into gcwq * insert_work - insert a work into a pool
* @cwq: cwq @work belongs to * @pwq: pwq @work belongs to
* @work: work to insert * @work: work to insert
* @head: insertion point * @head: insertion point
* @extra_flags: extra WORK_STRUCT_* flags to set * @extra_flags: extra WORK_STRUCT_* flags to set
* *
* Insert @work which belongs to @cwq into @gcwq after @head. * Insert @work which belongs to @pwq after @head. @extra_flags is or'd to
* @extra_flags is or'd to work_struct flags. * work_struct flags.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void insert_work(struct cpu_workqueue_struct *cwq, static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
struct work_struct *work, struct list_head *head, struct list_head *head, unsigned int extra_flags)
unsigned int extra_flags)
{ {
struct worker_pool *pool = cwq->pool; struct worker_pool *pool = pwq->pool;
/* we own @work, set data and link */ /* we own @work, set data and link */
set_work_cwq(work, cwq, extra_flags); set_work_pwq(work, pwq, extra_flags);
/*
* Ensure that we get the right work->data if we see the
* result of list_add() below, see try_to_grab_pending().
*/
smp_wmb();
list_add_tail(&work->entry, head); list_add_tail(&work->entry, head);
/* /*
...@@ -1181,41 +1158,24 @@ static void insert_work(struct cpu_workqueue_struct *cwq, ...@@ -1181,41 +1158,24 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
/* /*
* Test whether @work is being queued from another work executing on the * Test whether @work is being queued from another work executing on the
* same workqueue. This is rather expensive and should only be used from * same workqueue.
* cold paths.
*/ */
static bool is_chained_work(struct workqueue_struct *wq) static bool is_chained_work(struct workqueue_struct *wq)
{ {
unsigned long flags;
unsigned int cpu;
for_each_gcwq_cpu(cpu) {
struct global_cwq *gcwq = get_gcwq(cpu);
struct worker *worker; struct worker *worker;
struct hlist_node *pos;
int i;
spin_lock_irqsave(&gcwq->lock, flags); worker = current_wq_worker();
for_each_busy_worker(worker, i, pos, gcwq) {
if (worker->task != current)
continue;
spin_unlock_irqrestore(&gcwq->lock, flags);
/* /*
* I'm @worker, no locking necessary. See if @work * Return %true iff I'm a worker execuing a work item on @wq. If
* is headed to the same workqueue. * I'm @worker, it's safe to dereference it without locking.
*/ */
return worker->current_cwq->wq == wq; return worker && worker->current_pwq->wq == wq;
}
spin_unlock_irqrestore(&gcwq->lock, flags);
}
return false;
} }
static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
struct work_struct *work) struct work_struct *work)
{ {
struct global_cwq *gcwq; struct pool_workqueue *pwq;
struct cpu_workqueue_struct *cwq;
struct list_head *worklist; struct list_head *worklist;
unsigned int work_flags; unsigned int work_flags;
unsigned int req_cpu = cpu; unsigned int req_cpu = cpu;
...@@ -1235,9 +1195,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, ...@@ -1235,9 +1195,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
WARN_ON_ONCE(!is_chained_work(wq))) WARN_ON_ONCE(!is_chained_work(wq)))
return; return;
/* determine gcwq to use */ /* determine the pwq to use */
if (!(wq->flags & WQ_UNBOUND)) { if (!(wq->flags & WQ_UNBOUND)) {
struct global_cwq *last_gcwq; struct worker_pool *last_pool;
if (cpu == WORK_CPU_UNBOUND) if (cpu == WORK_CPU_UNBOUND)
cpu = raw_smp_processor_id(); cpu = raw_smp_processor_id();
...@@ -1248,55 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, ...@@ -1248,55 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
* work needs to be queued on that cpu to guarantee * work needs to be queued on that cpu to guarantee
* non-reentrancy. * non-reentrancy.
*/ */
gcwq = get_gcwq(cpu); pwq = get_pwq(cpu, wq);
last_gcwq = get_work_gcwq(work); last_pool = get_work_pool(work);
if (last_gcwq && last_gcwq != gcwq) { if (last_pool && last_pool != pwq->pool) {
struct worker *worker; struct worker *worker;
spin_lock(&last_gcwq->lock); spin_lock(&last_pool->lock);
worker = find_worker_executing_work(last_gcwq, work); worker = find_worker_executing_work(last_pool, work);
if (worker && worker->current_cwq->wq == wq) if (worker && worker->current_pwq->wq == wq) {
gcwq = last_gcwq; pwq = get_pwq(last_pool->cpu, wq);
else { } else {
/* meh... not running there, queue here */ /* meh... not running there, queue here */
spin_unlock(&last_gcwq->lock); spin_unlock(&last_pool->lock);
spin_lock(&gcwq->lock); spin_lock(&pwq->pool->lock);
} }
} else { } else {
spin_lock(&gcwq->lock); spin_lock(&pwq->pool->lock);
} }
} else { } else {
gcwq = get_gcwq(WORK_CPU_UNBOUND); pwq = get_pwq(WORK_CPU_UNBOUND, wq);
spin_lock(&gcwq->lock); spin_lock(&pwq->pool->lock);
} }
/* gcwq determined, get cwq and queue */ /* pwq determined, queue */
cwq = get_cwq(gcwq->cpu, wq); trace_workqueue_queue_work(req_cpu, pwq, work);
trace_workqueue_queue_work(req_cpu, cwq, work);
if (WARN_ON(!list_empty(&work->entry))) { if (WARN_ON(!list_empty(&work->entry))) {
spin_unlock(&gcwq->lock); spin_unlock(&pwq->pool->lock);
return; return;
} }
cwq->nr_in_flight[cwq->work_color]++; pwq->nr_in_flight[pwq->work_color]++;
work_flags = work_color_to_flags(cwq->work_color); work_flags = work_color_to_flags(pwq->work_color);
if (likely(cwq->nr_active < cwq->max_active)) { if (likely(pwq->nr_active < pwq->max_active)) {
trace_workqueue_activate_work(work); trace_workqueue_activate_work(work);
cwq->nr_active++; pwq->nr_active++;
worklist = &cwq->pool->worklist; worklist = &pwq->pool->worklist;
} else { } else {
work_flags |= WORK_STRUCT_DELAYED; work_flags |= WORK_STRUCT_DELAYED;
worklist = &cwq->delayed_works; worklist = &pwq->delayed_works;
} }
insert_work(cwq, work, worklist, work_flags); insert_work(pwq, work, worklist, work_flags);
spin_unlock(&gcwq->lock); spin_unlock(&pwq->pool->lock);
} }
/** /**
...@@ -1347,19 +1306,17 @@ EXPORT_SYMBOL_GPL(queue_work); ...@@ -1347,19 +1306,17 @@ EXPORT_SYMBOL_GPL(queue_work);
void delayed_work_timer_fn(unsigned long __data) void delayed_work_timer_fn(unsigned long __data)
{ {
struct delayed_work *dwork = (struct delayed_work *)__data; struct delayed_work *dwork = (struct delayed_work *)__data;
struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
/* should have been called from irqsafe timer with irq already off */ /* should have been called from irqsafe timer with irq already off */
__queue_work(dwork->cpu, cwq->wq, &dwork->work); __queue_work(dwork->cpu, dwork->wq, &dwork->work);
} }
EXPORT_SYMBOL_GPL(delayed_work_timer_fn); EXPORT_SYMBOL(delayed_work_timer_fn);
static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
struct delayed_work *dwork, unsigned long delay) struct delayed_work *dwork, unsigned long delay)
{ {
struct timer_list *timer = &dwork->timer; struct timer_list *timer = &dwork->timer;
struct work_struct *work = &dwork->work; struct work_struct *work = &dwork->work;
unsigned int lcpu;
WARN_ON_ONCE(timer->function != delayed_work_timer_fn || WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
timer->data != (unsigned long)dwork); timer->data != (unsigned long)dwork);
...@@ -1379,30 +1336,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, ...@@ -1379,30 +1336,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
timer_stats_timer_set_start_info(&dwork->timer); timer_stats_timer_set_start_info(&dwork->timer);
/* dwork->wq = wq;
* This stores cwq for the moment, for the timer_fn. Note that the
* work's gcwq is preserved to allow reentrance detection for
* delayed works.
*/
if (!(wq->flags & WQ_UNBOUND)) {
struct global_cwq *gcwq = get_work_gcwq(work);
/*
* If we cannot get the last gcwq from @work directly,
* select the last CPU such that it avoids unnecessarily
* triggering non-reentrancy check in __queue_work().
*/
lcpu = cpu;
if (gcwq)
lcpu = gcwq->cpu;
if (lcpu == WORK_CPU_UNBOUND)
lcpu = raw_smp_processor_id();
} else {
lcpu = WORK_CPU_UNBOUND;
}
set_work_cwq(work, get_cwq(lcpu, wq), 0);
dwork->cpu = cpu; dwork->cpu = cpu;
timer->expires = jiffies + delay; timer->expires = jiffies + delay;
...@@ -1519,12 +1453,11 @@ EXPORT_SYMBOL_GPL(mod_delayed_work); ...@@ -1519,12 +1453,11 @@ EXPORT_SYMBOL_GPL(mod_delayed_work);
* necessary. * necessary.
* *
* LOCKING: * LOCKING:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void worker_enter_idle(struct worker *worker) static void worker_enter_idle(struct worker *worker)
{ {
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
struct global_cwq *gcwq = pool->gcwq;
BUG_ON(worker->flags & WORKER_IDLE); BUG_ON(worker->flags & WORKER_IDLE);
BUG_ON(!list_empty(&worker->entry) && BUG_ON(!list_empty(&worker->entry) &&
...@@ -1542,14 +1475,14 @@ static void worker_enter_idle(struct worker *worker) ...@@ -1542,14 +1475,14 @@ static void worker_enter_idle(struct worker *worker)
mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
/* /*
* Sanity check nr_running. Because gcwq_unbind_fn() releases * Sanity check nr_running. Because wq_unbind_fn() releases
* gcwq->lock between setting %WORKER_UNBOUND and zapping * pool->lock between setting %WORKER_UNBOUND and zapping
* nr_running, the warning may trigger spuriously. Check iff * nr_running, the warning may trigger spuriously. Check iff
* unbind is not in progress. * unbind is not in progress.
*/ */
WARN_ON_ONCE(!(gcwq->flags & GCWQ_DISASSOCIATED) && WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&
pool->nr_workers == pool->nr_idle && pool->nr_workers == pool->nr_idle &&
atomic_read(get_pool_nr_running(pool))); atomic_read(&pool->nr_running));
} }
/** /**
...@@ -1559,7 +1492,7 @@ static void worker_enter_idle(struct worker *worker) ...@@ -1559,7 +1492,7 @@ static void worker_enter_idle(struct worker *worker)
* @worker is leaving idle state. Update stats. * @worker is leaving idle state. Update stats.
* *
* LOCKING: * LOCKING:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void worker_leave_idle(struct worker *worker) static void worker_leave_idle(struct worker *worker)
{ {
...@@ -1572,7 +1505,7 @@ static void worker_leave_idle(struct worker *worker) ...@@ -1572,7 +1505,7 @@ static void worker_leave_idle(struct worker *worker)
} }
/** /**
* worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock pool
* @worker: self * @worker: self
* *
* Works which are scheduled while the cpu is online must at least be * Works which are scheduled while the cpu is online must at least be
...@@ -1584,27 +1517,27 @@ static void worker_leave_idle(struct worker *worker) ...@@ -1584,27 +1517,27 @@ static void worker_leave_idle(struct worker *worker)
* themselves to the target cpu and may race with cpu going down or * themselves to the target cpu and may race with cpu going down or
* coming online. kthread_bind() can't be used because it may put the * coming online. kthread_bind() can't be used because it may put the
* worker to already dead cpu and set_cpus_allowed_ptr() can't be used * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
* verbatim as it's best effort and blocking and gcwq may be * verbatim as it's best effort and blocking and pool may be
* [dis]associated in the meantime. * [dis]associated in the meantime.
* *
* This function tries set_cpus_allowed() and locks gcwq and verifies the * This function tries set_cpus_allowed() and locks pool and verifies the
* binding against %GCWQ_DISASSOCIATED which is set during * binding against %POOL_DISASSOCIATED which is set during
* %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
* enters idle state or fetches works without dropping lock, it can * enters idle state or fetches works without dropping lock, it can
* guarantee the scheduling requirement described in the first paragraph. * guarantee the scheduling requirement described in the first paragraph.
* *
* CONTEXT: * CONTEXT:
* Might sleep. Called without any lock but returns with gcwq->lock * Might sleep. Called without any lock but returns with pool->lock
* held. * held.
* *
* RETURNS: * RETURNS:
* %true if the associated gcwq is online (@worker is successfully * %true if the associated pool is online (@worker is successfully
* bound), %false if offline. * bound), %false if offline.
*/ */
static bool worker_maybe_bind_and_lock(struct worker *worker) static bool worker_maybe_bind_and_lock(struct worker *worker)
__acquires(&gcwq->lock) __acquires(&pool->lock)
{ {
struct global_cwq *gcwq = worker->pool->gcwq; struct worker_pool *pool = worker->pool;
struct task_struct *task = worker->task; struct task_struct *task = worker->task;
while (true) { while (true) {
...@@ -1612,19 +1545,19 @@ __acquires(&gcwq->lock) ...@@ -1612,19 +1545,19 @@ __acquires(&gcwq->lock)
* The following call may fail, succeed or succeed * The following call may fail, succeed or succeed
* without actually migrating the task to the cpu if * without actually migrating the task to the cpu if
* it races with cpu hotunplug operation. Verify * it races with cpu hotunplug operation. Verify
* against GCWQ_DISASSOCIATED. * against POOL_DISASSOCIATED.
*/ */
if (!(gcwq->flags & GCWQ_DISASSOCIATED)) if (!(pool->flags & POOL_DISASSOCIATED))
set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu)); set_cpus_allowed_ptr(task, get_cpu_mask(pool->cpu));
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (gcwq->flags & GCWQ_DISASSOCIATED) if (pool->flags & POOL_DISASSOCIATED)
return false; return false;
if (task_cpu(task) == gcwq->cpu && if (task_cpu(task) == pool->cpu &&
cpumask_equal(&current->cpus_allowed, cpumask_equal(&current->cpus_allowed,
get_cpu_mask(gcwq->cpu))) get_cpu_mask(pool->cpu)))
return true; return true;
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
/* /*
* We've raced with CPU hot[un]plug. Give it a breather * We've raced with CPU hot[un]plug. Give it a breather
...@@ -1643,15 +1576,13 @@ __acquires(&gcwq->lock) ...@@ -1643,15 +1576,13 @@ __acquires(&gcwq->lock)
*/ */
static void idle_worker_rebind(struct worker *worker) static void idle_worker_rebind(struct worker *worker)
{ {
struct global_cwq *gcwq = worker->pool->gcwq;
/* CPU may go down again inbetween, clear UNBOUND only on success */ /* CPU may go down again inbetween, clear UNBOUND only on success */
if (worker_maybe_bind_and_lock(worker)) if (worker_maybe_bind_and_lock(worker))
worker_clr_flags(worker, WORKER_UNBOUND); worker_clr_flags(worker, WORKER_UNBOUND);
/* rebind complete, become available again */ /* rebind complete, become available again */
list_add(&worker->entry, &worker->pool->idle_list); list_add(&worker->entry, &worker->pool->idle_list);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&worker->pool->lock);
} }
/* /*
...@@ -1663,19 +1594,18 @@ static void idle_worker_rebind(struct worker *worker) ...@@ -1663,19 +1594,18 @@ static void idle_worker_rebind(struct worker *worker)
static void busy_worker_rebind_fn(struct work_struct *work) static void busy_worker_rebind_fn(struct work_struct *work)
{ {
struct worker *worker = container_of(work, struct worker, rebind_work); struct worker *worker = container_of(work, struct worker, rebind_work);
struct global_cwq *gcwq = worker->pool->gcwq;
if (worker_maybe_bind_and_lock(worker)) if (worker_maybe_bind_and_lock(worker))
worker_clr_flags(worker, WORKER_UNBOUND); worker_clr_flags(worker, WORKER_UNBOUND);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&worker->pool->lock);
} }
/** /**
* rebind_workers - rebind all workers of a gcwq to the associated CPU * rebind_workers - rebind all workers of a pool to the associated CPU
* @gcwq: gcwq of interest * @pool: pool of interest
* *
* @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding * @pool->cpu is coming online. Rebind all workers to the CPU. Rebinding
* is different for idle and busy ones. * is different for idle and busy ones.
* *
* Idle ones will be removed from the idle_list and woken up. They will * Idle ones will be removed from the idle_list and woken up. They will
...@@ -1693,38 +1623,32 @@ static void busy_worker_rebind_fn(struct work_struct *work) ...@@ -1693,38 +1623,32 @@ static void busy_worker_rebind_fn(struct work_struct *work)
* including the manager will not appear on @idle_list until rebind is * including the manager will not appear on @idle_list until rebind is
* complete, making local wake-ups safe. * complete, making local wake-ups safe.
*/ */
static void rebind_workers(struct global_cwq *gcwq) static void rebind_workers(struct worker_pool *pool)
{ {
struct worker_pool *pool;
struct worker *worker, *n; struct worker *worker, *n;
struct hlist_node *pos; struct hlist_node *pos;
int i; int i;
lockdep_assert_held(&gcwq->lock);
for_each_worker_pool(pool, gcwq)
lockdep_assert_held(&pool->assoc_mutex); lockdep_assert_held(&pool->assoc_mutex);
lockdep_assert_held(&pool->lock);
/* dequeue and kick idle ones */ /* dequeue and kick idle ones */
for_each_worker_pool(pool, gcwq) {
list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
/* /*
* idle workers should be off @pool->idle_list * idle workers should be off @pool->idle_list until rebind
* until rebind is complete to avoid receiving * is complete to avoid receiving premature local wake-ups.
* premature local wake-ups.
*/ */
list_del_init(&worker->entry); list_del_init(&worker->entry);
/* /*
* worker_thread() will see the above dequeuing * worker_thread() will see the above dequeuing and call
* and call idle_worker_rebind(). * idle_worker_rebind().
*/ */
wake_up_process(worker->task); wake_up_process(worker->task);
} }
}
/* rebind busy workers */ /* rebind busy workers */
for_each_busy_worker(worker, i, pos, gcwq) { for_each_busy_worker(worker, i, pos, pool) {
struct work_struct *rebind_work = &worker->rebind_work; struct work_struct *rebind_work = &worker->rebind_work;
struct workqueue_struct *wq; struct workqueue_struct *wq;
...@@ -1736,14 +1660,14 @@ static void rebind_workers(struct global_cwq *gcwq) ...@@ -1736,14 +1660,14 @@ static void rebind_workers(struct global_cwq *gcwq)
/* /*
* wq doesn't really matter but let's keep @worker->pool * wq doesn't really matter but let's keep @worker->pool
* and @cwq->pool consistent for sanity. * and @pwq->pool consistent for sanity.
*/ */
if (worker_pool_pri(worker->pool)) if (std_worker_pool_pri(worker->pool))
wq = system_highpri_wq; wq = system_highpri_wq;
else else
wq = system_wq; wq = system_wq;
insert_work(get_cwq(gcwq->cpu, wq), rebind_work, insert_work(get_pwq(pool->cpu, wq), rebind_work,
worker->scheduled.next, worker->scheduled.next,
work_color_to_flags(WORK_NO_COLOR)); work_color_to_flags(WORK_NO_COLOR));
} }
...@@ -1780,19 +1704,18 @@ static struct worker *alloc_worker(void) ...@@ -1780,19 +1704,18 @@ static struct worker *alloc_worker(void)
*/ */
static struct worker *create_worker(struct worker_pool *pool) static struct worker *create_worker(struct worker_pool *pool)
{ {
struct global_cwq *gcwq = pool->gcwq; const char *pri = std_worker_pool_pri(pool) ? "H" : "";
const char *pri = worker_pool_pri(pool) ? "H" : "";
struct worker *worker = NULL; struct worker *worker = NULL;
int id = -1; int id = -1;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
while (ida_get_new(&pool->worker_ida, &id)) { while (ida_get_new(&pool->worker_ida, &id)) {
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL)) if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL))
goto fail; goto fail;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
} }
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
worker = alloc_worker(); worker = alloc_worker();
if (!worker) if (!worker)
...@@ -1801,30 +1724,30 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1801,30 +1724,30 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->pool = pool; worker->pool = pool;
worker->id = id; worker->id = id;
if (gcwq->cpu != WORK_CPU_UNBOUND) if (pool->cpu != WORK_CPU_UNBOUND)
worker->task = kthread_create_on_node(worker_thread, worker->task = kthread_create_on_node(worker_thread,
worker, cpu_to_node(gcwq->cpu), worker, cpu_to_node(pool->cpu),
"kworker/%u:%d%s", gcwq->cpu, id, pri); "kworker/%u:%d%s", pool->cpu, id, pri);
else else
worker->task = kthread_create(worker_thread, worker, worker->task = kthread_create(worker_thread, worker,
"kworker/u:%d%s", id, pri); "kworker/u:%d%s", id, pri);
if (IS_ERR(worker->task)) if (IS_ERR(worker->task))
goto fail; goto fail;
if (worker_pool_pri(pool)) if (std_worker_pool_pri(pool))
set_user_nice(worker->task, HIGHPRI_NICE_LEVEL); set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
/* /*
* Determine CPU binding of the new worker depending on * Determine CPU binding of the new worker depending on
* %GCWQ_DISASSOCIATED. The caller is responsible for ensuring the * %POOL_DISASSOCIATED. The caller is responsible for ensuring the
* flag remains stable across this function. See the comments * flag remains stable across this function. See the comments
* above the flag definition for details. * above the flag definition for details.
* *
* As an unbound worker may later become a regular one if CPU comes * As an unbound worker may later become a regular one if CPU comes
* online, make sure every worker has %PF_THREAD_BOUND set. * online, make sure every worker has %PF_THREAD_BOUND set.
*/ */
if (!(gcwq->flags & GCWQ_DISASSOCIATED)) { if (!(pool->flags & POOL_DISASSOCIATED)) {
kthread_bind(worker->task, gcwq->cpu); kthread_bind(worker->task, pool->cpu);
} else { } else {
worker->task->flags |= PF_THREAD_BOUND; worker->task->flags |= PF_THREAD_BOUND;
worker->flags |= WORKER_UNBOUND; worker->flags |= WORKER_UNBOUND;
...@@ -1833,9 +1756,9 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1833,9 +1756,9 @@ static struct worker *create_worker(struct worker_pool *pool)
return worker; return worker;
fail: fail:
if (id >= 0) { if (id >= 0) {
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
ida_remove(&pool->worker_ida, id); ida_remove(&pool->worker_ida, id);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
kfree(worker); kfree(worker);
return NULL; return NULL;
...@@ -1845,10 +1768,10 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1845,10 +1768,10 @@ static struct worker *create_worker(struct worker_pool *pool)
* start_worker - start a newly created worker * start_worker - start a newly created worker
* @worker: worker to start * @worker: worker to start
* *
* Make the gcwq aware of @worker and start it. * Make the pool aware of @worker and start it.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void start_worker(struct worker *worker) static void start_worker(struct worker *worker)
{ {
...@@ -1862,15 +1785,14 @@ static void start_worker(struct worker *worker) ...@@ -1862,15 +1785,14 @@ static void start_worker(struct worker *worker)
* destroy_worker - destroy a workqueue worker * destroy_worker - destroy a workqueue worker
* @worker: worker to be destroyed * @worker: worker to be destroyed
* *
* Destroy @worker and adjust @gcwq stats accordingly. * Destroy @worker and adjust @pool stats accordingly.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock) which is released and regrabbed. * spin_lock_irq(pool->lock) which is released and regrabbed.
*/ */
static void destroy_worker(struct worker *worker) static void destroy_worker(struct worker *worker)
{ {
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
struct global_cwq *gcwq = pool->gcwq;
int id = worker->id; int id = worker->id;
/* sanity check frenzy */ /* sanity check frenzy */
...@@ -1885,21 +1807,20 @@ static void destroy_worker(struct worker *worker) ...@@ -1885,21 +1807,20 @@ static void destroy_worker(struct worker *worker)
list_del_init(&worker->entry); list_del_init(&worker->entry);
worker->flags |= WORKER_DIE; worker->flags |= WORKER_DIE;
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
kthread_stop(worker->task); kthread_stop(worker->task);
kfree(worker); kfree(worker);
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
ida_remove(&pool->worker_ida, id); ida_remove(&pool->worker_ida, id);
} }
static void idle_worker_timeout(unsigned long __pool) static void idle_worker_timeout(unsigned long __pool)
{ {
struct worker_pool *pool = (void *)__pool; struct worker_pool *pool = (void *)__pool;
struct global_cwq *gcwq = pool->gcwq;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (too_many_workers(pool)) { if (too_many_workers(pool)) {
struct worker *worker; struct worker *worker;
...@@ -1918,20 +1839,20 @@ static void idle_worker_timeout(unsigned long __pool) ...@@ -1918,20 +1839,20 @@ static void idle_worker_timeout(unsigned long __pool)
} }
} }
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
static bool send_mayday(struct work_struct *work) static bool send_mayday(struct work_struct *work)
{ {
struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct pool_workqueue *pwq = get_work_pwq(work);
struct workqueue_struct *wq = cwq->wq; struct workqueue_struct *wq = pwq->wq;
unsigned int cpu; unsigned int cpu;
if (!(wq->flags & WQ_RESCUER)) if (!(wq->flags & WQ_RESCUER))
return false; return false;
/* mayday mayday mayday */ /* mayday mayday mayday */
cpu = cwq->pool->gcwq->cpu; cpu = pwq->pool->cpu;
/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
if (cpu == WORK_CPU_UNBOUND) if (cpu == WORK_CPU_UNBOUND)
cpu = 0; cpu = 0;
...@@ -1940,13 +1861,12 @@ static bool send_mayday(struct work_struct *work) ...@@ -1940,13 +1861,12 @@ static bool send_mayday(struct work_struct *work)
return true; return true;
} }
static void gcwq_mayday_timeout(unsigned long __pool) static void pool_mayday_timeout(unsigned long __pool)
{ {
struct worker_pool *pool = (void *)__pool; struct worker_pool *pool = (void *)__pool;
struct global_cwq *gcwq = pool->gcwq;
struct work_struct *work; struct work_struct *work;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (need_to_create_worker(pool)) { if (need_to_create_worker(pool)) {
/* /*
...@@ -1959,7 +1879,7 @@ static void gcwq_mayday_timeout(unsigned long __pool) ...@@ -1959,7 +1879,7 @@ static void gcwq_mayday_timeout(unsigned long __pool)
send_mayday(work); send_mayday(work);
} }
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL);
} }
...@@ -1978,24 +1898,22 @@ static void gcwq_mayday_timeout(unsigned long __pool) ...@@ -1978,24 +1898,22 @@ static void gcwq_mayday_timeout(unsigned long __pool)
* may_start_working() true. * may_start_working() true.
* *
* LOCKING: * LOCKING:
* spin_lock_irq(gcwq->lock) which may be released and regrabbed * spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Does GFP_KERNEL allocations. Called only from * multiple times. Does GFP_KERNEL allocations. Called only from
* manager. * manager.
* *
* RETURNS: * RETURNS:
* false if no action was taken and gcwq->lock stayed locked, true * false if no action was taken and pool->lock stayed locked, true
* otherwise. * otherwise.
*/ */
static bool maybe_create_worker(struct worker_pool *pool) static bool maybe_create_worker(struct worker_pool *pool)
__releases(&gcwq->lock) __releases(&pool->lock)
__acquires(&gcwq->lock) __acquires(&pool->lock)
{ {
struct global_cwq *gcwq = pool->gcwq;
if (!need_to_create_worker(pool)) if (!need_to_create_worker(pool))
return false; return false;
restart: restart:
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
...@@ -2006,7 +1924,7 @@ __acquires(&gcwq->lock) ...@@ -2006,7 +1924,7 @@ __acquires(&gcwq->lock)
worker = create_worker(pool); worker = create_worker(pool);
if (worker) { if (worker) {
del_timer_sync(&pool->mayday_timer); del_timer_sync(&pool->mayday_timer);
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
start_worker(worker); start_worker(worker);
BUG_ON(need_to_create_worker(pool)); BUG_ON(need_to_create_worker(pool));
return true; return true;
...@@ -2023,7 +1941,7 @@ __acquires(&gcwq->lock) ...@@ -2023,7 +1941,7 @@ __acquires(&gcwq->lock)
} }
del_timer_sync(&pool->mayday_timer); del_timer_sync(&pool->mayday_timer);
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (need_to_create_worker(pool)) if (need_to_create_worker(pool))
goto restart; goto restart;
return true; return true;
...@@ -2037,11 +1955,11 @@ __acquires(&gcwq->lock) ...@@ -2037,11 +1955,11 @@ __acquires(&gcwq->lock)
* IDLE_WORKER_TIMEOUT. * IDLE_WORKER_TIMEOUT.
* *
* LOCKING: * LOCKING:
* spin_lock_irq(gcwq->lock) which may be released and regrabbed * spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Called only from manager. * multiple times. Called only from manager.
* *
* RETURNS: * RETURNS:
* false if no action was taken and gcwq->lock stayed locked, true * false if no action was taken and pool->lock stayed locked, true
* otherwise. * otherwise.
*/ */
static bool maybe_destroy_workers(struct worker_pool *pool) static bool maybe_destroy_workers(struct worker_pool *pool)
...@@ -2071,21 +1989,21 @@ static bool maybe_destroy_workers(struct worker_pool *pool) ...@@ -2071,21 +1989,21 @@ static bool maybe_destroy_workers(struct worker_pool *pool)
* manage_workers - manage worker pool * manage_workers - manage worker pool
* @worker: self * @worker: self
* *
* Assume the manager role and manage gcwq worker pool @worker belongs * Assume the manager role and manage the worker pool @worker belongs
* to. At any given time, there can be only zero or one manager per * to. At any given time, there can be only zero or one manager per
* gcwq. The exclusion is handled automatically by this function. * pool. The exclusion is handled automatically by this function.
* *
* The caller can safely start processing works on false return. On * The caller can safely start processing works on false return. On
* true return, it's guaranteed that need_to_create_worker() is false * true return, it's guaranteed that need_to_create_worker() is false
* and may_start_working() is true. * and may_start_working() is true.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock) which may be released and regrabbed * spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. Does GFP_KERNEL allocations. * multiple times. Does GFP_KERNEL allocations.
* *
* RETURNS: * RETURNS:
* false if no action was taken and gcwq->lock stayed locked, true if * spin_lock_irq(pool->lock) which may be released and regrabbed
* some action was taken. * multiple times. Does GFP_KERNEL allocations.
*/ */
static bool manage_workers(struct worker *worker) static bool manage_workers(struct worker *worker)
{ {
...@@ -2107,20 +2025,20 @@ static bool manage_workers(struct worker *worker) ...@@ -2107,20 +2025,20 @@ static bool manage_workers(struct worker *worker)
* manager against CPU hotplug. * manager against CPU hotplug.
* *
* assoc_mutex would always be free unless CPU hotplug is in * assoc_mutex would always be free unless CPU hotplug is in
* progress. trylock first without dropping @gcwq->lock. * progress. trylock first without dropping @pool->lock.
*/ */
if (unlikely(!mutex_trylock(&pool->assoc_mutex))) { if (unlikely(!mutex_trylock(&pool->assoc_mutex))) {
spin_unlock_irq(&pool->gcwq->lock); spin_unlock_irq(&pool->lock);
mutex_lock(&pool->assoc_mutex); mutex_lock(&pool->assoc_mutex);
/* /*
* CPU hotplug could have happened while we were waiting * CPU hotplug could have happened while we were waiting
* for assoc_mutex. Hotplug itself can't handle us * for assoc_mutex. Hotplug itself can't handle us
* because manager isn't either on idle or busy list, and * because manager isn't either on idle or busy list, and
* @gcwq's state and ours could have deviated. * @pool's state and ours could have deviated.
* *
* As hotplug is now excluded via assoc_mutex, we can * As hotplug is now excluded via assoc_mutex, we can
* simply try to bind. It will succeed or fail depending * simply try to bind. It will succeed or fail depending
* on @gcwq's current state. Try it and adjust * on @pool's current state. Try it and adjust
* %WORKER_UNBOUND accordingly. * %WORKER_UNBOUND accordingly.
*/ */
if (worker_maybe_bind_and_lock(worker)) if (worker_maybe_bind_and_lock(worker))
...@@ -2157,18 +2075,15 @@ static bool manage_workers(struct worker *worker) ...@@ -2157,18 +2075,15 @@ static bool manage_workers(struct worker *worker)
* call this function to process a work. * call this function to process a work.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock) which is released and regrabbed. * spin_lock_irq(pool->lock) which is released and regrabbed.
*/ */
static void process_one_work(struct worker *worker, struct work_struct *work) static void process_one_work(struct worker *worker, struct work_struct *work)
__releases(&gcwq->lock) __releases(&pool->lock)
__acquires(&gcwq->lock) __acquires(&pool->lock)
{ {
struct cpu_workqueue_struct *cwq = get_work_cwq(work); struct pool_workqueue *pwq = get_work_pwq(work);
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
struct global_cwq *gcwq = pool->gcwq; bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
struct hlist_head *bwh = busy_worker_head(gcwq, work);
bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
work_func_t f = work->func;
int work_color; int work_color;
struct worker *collision; struct worker *collision;
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
...@@ -2186,11 +2101,11 @@ __acquires(&gcwq->lock) ...@@ -2186,11 +2101,11 @@ __acquires(&gcwq->lock)
/* /*
* Ensure we're on the correct CPU. DISASSOCIATED test is * Ensure we're on the correct CPU. DISASSOCIATED test is
* necessary to avoid spurious warnings from rescuers servicing the * necessary to avoid spurious warnings from rescuers servicing the
* unbound or a disassociated gcwq. * unbound or a disassociated pool.
*/ */
WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) &&
!(gcwq->flags & GCWQ_DISASSOCIATED) && !(pool->flags & POOL_DISASSOCIATED) &&
raw_smp_processor_id() != gcwq->cpu); raw_smp_processor_id() != pool->cpu);
/* /*
* A single work shouldn't be executed concurrently by * A single work shouldn't be executed concurrently by
...@@ -2198,7 +2113,7 @@ __acquires(&gcwq->lock) ...@@ -2198,7 +2113,7 @@ __acquires(&gcwq->lock)
* already processing the work. If so, defer the work to the * already processing the work. If so, defer the work to the
* currently executing one. * currently executing one.
*/ */
collision = __find_worker_executing_work(gcwq, bwh, work); collision = find_worker_executing_work(pool, work);
if (unlikely(collision)) { if (unlikely(collision)) {
move_linked_works(work, &collision->scheduled, NULL); move_linked_works(work, &collision->scheduled, NULL);
return; return;
...@@ -2206,9 +2121,10 @@ __acquires(&gcwq->lock) ...@@ -2206,9 +2121,10 @@ __acquires(&gcwq->lock)
/* claim and dequeue */ /* claim and dequeue */
debug_work_deactivate(work); debug_work_deactivate(work);
hlist_add_head(&worker->hentry, bwh); hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
worker->current_work = work; worker->current_work = work;
worker->current_cwq = cwq; worker->current_func = work->func;
worker->current_pwq = pwq;
work_color = get_work_color(work); work_color = get_work_color(work);
list_del_init(&work->entry); list_del_init(&work->entry);
...@@ -2221,53 +2137,55 @@ __acquires(&gcwq->lock) ...@@ -2221,53 +2137,55 @@ __acquires(&gcwq->lock)
worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
/* /*
* Unbound gcwq isn't concurrency managed and work items should be * Unbound pool isn't concurrency managed and work items should be
* executed ASAP. Wake up another worker if necessary. * executed ASAP. Wake up another worker if necessary.
*/ */
if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool))
wake_up_worker(pool); wake_up_worker(pool);
/* /*
* Record the last CPU and clear PENDING which should be the last * Record the last pool and clear PENDING which should be the last
* update to @work. Also, do this inside @gcwq->lock so that * update to @work. Also, do this inside @pool->lock so that
* PENDING and queued state changes happen together while IRQ is * PENDING and queued state changes happen together while IRQ is
* disabled. * disabled.
*/ */
set_work_cpu_and_clear_pending(work, gcwq->cpu); set_work_pool_and_clear_pending(work, pool->id);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire_read(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map); lock_map_acquire(&lockdep_map);
trace_workqueue_execute_start(work); trace_workqueue_execute_start(work);
f(work); worker->current_func(work);
/* /*
* While we must be careful to not use "work" after this, the trace * While we must be careful to not use "work" after this, the trace
* point will only record its address. * point will only record its address.
*/ */
trace_workqueue_execute_end(work); trace_workqueue_execute_end(work);
lock_map_release(&lockdep_map); lock_map_release(&lockdep_map);
lock_map_release(&cwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map);
if (unlikely(in_atomic() || lockdep_depth(current) > 0)) { if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n" pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
" last function: %pf\n", " last function: %pf\n",
current->comm, preempt_count(), task_pid_nr(current), f); current->comm, preempt_count(), task_pid_nr(current),
worker->current_func);
debug_show_held_locks(current); debug_show_held_locks(current);
dump_stack(); dump_stack();
} }
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
/* clear cpu intensive status */ /* clear cpu intensive status */
if (unlikely(cpu_intensive)) if (unlikely(cpu_intensive))
worker_clr_flags(worker, WORKER_CPU_INTENSIVE); worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
/* we're done with it, release */ /* we're done with it, release */
hlist_del_init(&worker->hentry); hash_del(&worker->hentry);
worker->current_work = NULL; worker->current_work = NULL;
worker->current_cwq = NULL; worker->current_func = NULL;
cwq_dec_nr_in_flight(cwq, work_color); worker->current_pwq = NULL;
pwq_dec_nr_in_flight(pwq, work_color);
} }
/** /**
...@@ -2279,7 +2197,7 @@ __acquires(&gcwq->lock) ...@@ -2279,7 +2197,7 @@ __acquires(&gcwq->lock)
* fetches a work from the top and executes it. * fetches a work from the top and executes it.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock) which may be released and regrabbed * spin_lock_irq(pool->lock) which may be released and regrabbed
* multiple times. * multiple times.
*/ */
static void process_scheduled_works(struct worker *worker) static void process_scheduled_works(struct worker *worker)
...@@ -2295,8 +2213,8 @@ static void process_scheduled_works(struct worker *worker) ...@@ -2295,8 +2213,8 @@ static void process_scheduled_works(struct worker *worker)
* worker_thread - the worker thread function * worker_thread - the worker thread function
* @__worker: self * @__worker: self
* *
* The gcwq worker thread function. There's a single dynamic pool of * The worker thread function. There are NR_CPU_WORKER_POOLS dynamic pools
* these per each cpu. These workers process all works regardless of * of these per each cpu. These workers process all works regardless of
* their specific target workqueue. The only exception is works which * their specific target workqueue. The only exception is works which
* belong to workqueues with a rescuer which will be explained in * belong to workqueues with a rescuer which will be explained in
* rescuer_thread(). * rescuer_thread().
...@@ -2305,16 +2223,15 @@ static int worker_thread(void *__worker) ...@@ -2305,16 +2223,15 @@ static int worker_thread(void *__worker)
{ {
struct worker *worker = __worker; struct worker *worker = __worker;
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
struct global_cwq *gcwq = pool->gcwq;
/* tell the scheduler that this is a workqueue worker */ /* tell the scheduler that this is a workqueue worker */
worker->task->flags |= PF_WQ_WORKER; worker->task->flags |= PF_WQ_WORKER;
woke_up: woke_up:
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
/* we are off idle list if destruction or rebind is requested */ /* we are off idle list if destruction or rebind is requested */
if (unlikely(list_empty(&worker->entry))) { if (unlikely(list_empty(&worker->entry))) {
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
/* if DIE is set, destruction is requested */ /* if DIE is set, destruction is requested */
if (worker->flags & WORKER_DIE) { if (worker->flags & WORKER_DIE) {
...@@ -2373,52 +2290,59 @@ static int worker_thread(void *__worker) ...@@ -2373,52 +2290,59 @@ static int worker_thread(void *__worker)
goto recheck; goto recheck;
/* /*
* gcwq->lock is held and there's no work to process and no * pool->lock is held and there's no work to process and no need to
* need to manage, sleep. Workers are woken up only while * manage, sleep. Workers are woken up only while holding
* holding gcwq->lock or from local cpu, so setting the * pool->lock or from local cpu, so setting the current state
* current state before releasing gcwq->lock is enough to * before releasing pool->lock is enough to prevent losing any
* prevent losing any event. * event.
*/ */
worker_enter_idle(worker); worker_enter_idle(worker);
__set_current_state(TASK_INTERRUPTIBLE); __set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
schedule(); schedule();
goto woke_up; goto woke_up;
} }
/** /**
* rescuer_thread - the rescuer thread function * rescuer_thread - the rescuer thread function
* @__wq: the associated workqueue * @__rescuer: self
* *
* Workqueue rescuer thread function. There's one rescuer for each * Workqueue rescuer thread function. There's one rescuer for each
* workqueue which has WQ_RESCUER set. * workqueue which has WQ_RESCUER set.
* *
* Regular work processing on a gcwq may block trying to create a new * Regular work processing on a pool may block trying to create a new
* worker which uses GFP_KERNEL allocation which has slight chance of * worker which uses GFP_KERNEL allocation which has slight chance of
* developing into deadlock if some works currently on the same queue * developing into deadlock if some works currently on the same queue
* need to be processed to satisfy the GFP_KERNEL allocation. This is * need to be processed to satisfy the GFP_KERNEL allocation. This is
* the problem rescuer solves. * the problem rescuer solves.
* *
* When such condition is possible, the gcwq summons rescuers of all * When such condition is possible, the pool summons rescuers of all
* workqueues which have works queued on the gcwq and let them process * workqueues which have works queued on the pool and let them process
* those works so that forward progress can be guaranteed. * those works so that forward progress can be guaranteed.
* *
* This should happen rarely. * This should happen rarely.
*/ */
static int rescuer_thread(void *__wq) static int rescuer_thread(void *__rescuer)
{ {
struct workqueue_struct *wq = __wq; struct worker *rescuer = __rescuer;
struct worker *rescuer = wq->rescuer; struct workqueue_struct *wq = rescuer->rescue_wq;
struct list_head *scheduled = &rescuer->scheduled; struct list_head *scheduled = &rescuer->scheduled;
bool is_unbound = wq->flags & WQ_UNBOUND; bool is_unbound = wq->flags & WQ_UNBOUND;
unsigned int cpu; unsigned int cpu;
set_user_nice(current, RESCUER_NICE_LEVEL); set_user_nice(current, RESCUER_NICE_LEVEL);
/*
* Mark rescuer as worker too. As WORKER_PREP is never cleared, it
* doesn't participate in concurrency management.
*/
rescuer->task->flags |= PF_WQ_WORKER;
repeat: repeat:
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop()) { if (kthread_should_stop()) {
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
rescuer->task->flags &= ~PF_WQ_WORKER;
return 0; return 0;
} }
...@@ -2428,9 +2352,8 @@ static int rescuer_thread(void *__wq) ...@@ -2428,9 +2352,8 @@ static int rescuer_thread(void *__wq)
*/ */
for_each_mayday_cpu(cpu, wq->mayday_mask) { for_each_mayday_cpu(cpu, wq->mayday_mask) {
unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); struct pool_workqueue *pwq = get_pwq(tcpu, wq);
struct worker_pool *pool = cwq->pool; struct worker_pool *pool = pwq->pool;
struct global_cwq *gcwq = pool->gcwq;
struct work_struct *work, *n; struct work_struct *work, *n;
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -2446,22 +2369,24 @@ static int rescuer_thread(void *__wq) ...@@ -2446,22 +2369,24 @@ static int rescuer_thread(void *__wq)
*/ */
BUG_ON(!list_empty(&rescuer->scheduled)); BUG_ON(!list_empty(&rescuer->scheduled));
list_for_each_entry_safe(work, n, &pool->worklist, entry) list_for_each_entry_safe(work, n, &pool->worklist, entry)
if (get_work_cwq(work) == cwq) if (get_work_pwq(work) == pwq)
move_linked_works(work, scheduled, &n); move_linked_works(work, scheduled, &n);
process_scheduled_works(rescuer); process_scheduled_works(rescuer);
/* /*
* Leave this gcwq. If keep_working() is %true, notify a * Leave this pool. If keep_working() is %true, notify a
* regular worker; otherwise, we end up with 0 concurrency * regular worker; otherwise, we end up with 0 concurrency
* and stalling the execution. * and stalling the execution.
*/ */
if (keep_working(pool)) if (keep_working(pool))
wake_up_worker(pool); wake_up_worker(pool);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
/* rescuers should never participate in concurrency management */
WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
schedule(); schedule();
goto repeat; goto repeat;
} }
...@@ -2479,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work) ...@@ -2479,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work)
/** /**
* insert_wq_barrier - insert a barrier work * insert_wq_barrier - insert a barrier work
* @cwq: cwq to insert barrier into * @pwq: pwq to insert barrier into
* @barr: wq_barrier to insert * @barr: wq_barrier to insert
* @target: target work to attach @barr to * @target: target work to attach @barr to
* @worker: worker currently executing @target, NULL if @target is not executing * @worker: worker currently executing @target, NULL if @target is not executing
...@@ -2496,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work) ...@@ -2496,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work)
* after a work with LINKED flag set. * after a work with LINKED flag set.
* *
* Note that when @worker is non-NULL, @target may be modified * Note that when @worker is non-NULL, @target may be modified
* underneath us, so we can't reliably determine cwq from @target. * underneath us, so we can't reliably determine pwq from @target.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, static void insert_wq_barrier(struct pool_workqueue *pwq,
struct wq_barrier *barr, struct wq_barrier *barr,
struct work_struct *target, struct worker *worker) struct work_struct *target, struct worker *worker)
{ {
...@@ -2509,7 +2434,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, ...@@ -2509,7 +2434,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
unsigned int linked = 0; unsigned int linked = 0;
/* /*
* debugobject calls are safe here even with gcwq->lock locked * debugobject calls are safe here even with pool->lock locked
* as we know for sure that this will not trigger any of the * as we know for sure that this will not trigger any of the
* checks and call back into the fixup functions where we * checks and call back into the fixup functions where we
* might deadlock. * might deadlock.
...@@ -2534,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, ...@@ -2534,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
} }
debug_work_activate(&barr->work); debug_work_activate(&barr->work);
insert_work(cwq, &barr->work, head, insert_work(pwq, &barr->work, head,
work_color_to_flags(WORK_NO_COLOR) | linked); work_color_to_flags(WORK_NO_COLOR) | linked);
} }
/** /**
* flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
* @wq: workqueue being flushed * @wq: workqueue being flushed
* @flush_color: new flush color, < 0 for no-op * @flush_color: new flush color, < 0 for no-op
* @work_color: new work color, < 0 for no-op * @work_color: new work color, < 0 for no-op
* *
* Prepare cwqs for workqueue flushing. * Prepare pwqs for workqueue flushing.
* *
* If @flush_color is non-negative, flush_color on all cwqs should be * If @flush_color is non-negative, flush_color on all pwqs should be
* -1. If no cwq has in-flight commands at the specified color, all * -1. If no pwq has in-flight commands at the specified color, all
* cwq->flush_color's stay at -1 and %false is returned. If any cwq * pwq->flush_color's stay at -1 and %false is returned. If any pwq
* has in flight commands, its cwq->flush_color is set to * has in flight commands, its pwq->flush_color is set to
* @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
* wakeup logic is armed and %true is returned. * wakeup logic is armed and %true is returned.
* *
* The caller should have initialized @wq->first_flusher prior to * The caller should have initialized @wq->first_flusher prior to
...@@ -2558,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, ...@@ -2558,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
* @flush_color is negative, no flush color update is done and %false * @flush_color is negative, no flush color update is done and %false
* is returned. * is returned.
* *
* If @work_color is non-negative, all cwqs should have the same * If @work_color is non-negative, all pwqs should have the same
* work_color which is previous to @work_color and all will be * work_color which is previous to @work_color and all will be
* advanced to @work_color. * advanced to @work_color.
* *
...@@ -2569,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, ...@@ -2569,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
* %true if @flush_color >= 0 and there's something to flush. %false * %true if @flush_color >= 0 and there's something to flush. %false
* otherwise. * otherwise.
*/ */
static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq, static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
int flush_color, int work_color) int flush_color, int work_color)
{ {
bool wait = false; bool wait = false;
unsigned int cpu; unsigned int cpu;
if (flush_color >= 0) { if (flush_color >= 0) {
BUG_ON(atomic_read(&wq->nr_cwqs_to_flush)); BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
atomic_set(&wq->nr_cwqs_to_flush, 1); atomic_set(&wq->nr_pwqs_to_flush, 1);
} }
for_each_cwq_cpu(cpu, wq) { for_each_pwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
struct global_cwq *gcwq = cwq->pool->gcwq; struct worker_pool *pool = pwq->pool;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (flush_color >= 0) { if (flush_color >= 0) {
BUG_ON(cwq->flush_color != -1); BUG_ON(pwq->flush_color != -1);
if (cwq->nr_in_flight[flush_color]) { if (pwq->nr_in_flight[flush_color]) {
cwq->flush_color = flush_color; pwq->flush_color = flush_color;
atomic_inc(&wq->nr_cwqs_to_flush); atomic_inc(&wq->nr_pwqs_to_flush);
wait = true; wait = true;
} }
} }
if (work_color >= 0) { if (work_color >= 0) {
BUG_ON(work_color != work_next_color(cwq->work_color)); BUG_ON(work_color != work_next_color(pwq->work_color));
cwq->work_color = work_color; pwq->work_color = work_color;
} }
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush)) if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
complete(&wq->first_flusher->done); complete(&wq->first_flusher->done);
return wait; return wait;
...@@ -2655,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq) ...@@ -2655,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq)
wq->first_flusher = &this_flusher; wq->first_flusher = &this_flusher;
if (!flush_workqueue_prep_cwqs(wq, wq->flush_color, if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
wq->work_color)) { wq->work_color)) {
/* nothing to flush, done */ /* nothing to flush, done */
wq->flush_color = next_color; wq->flush_color = next_color;
...@@ -2666,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq) ...@@ -2666,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq)
/* wait in queue */ /* wait in queue */
BUG_ON(wq->flush_color == this_flusher.flush_color); BUG_ON(wq->flush_color == this_flusher.flush_color);
list_add_tail(&this_flusher.list, &wq->flusher_queue); list_add_tail(&this_flusher.list, &wq->flusher_queue);
flush_workqueue_prep_cwqs(wq, -1, wq->work_color); flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
} }
} else { } else {
/* /*
...@@ -2733,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq) ...@@ -2733,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq)
list_splice_tail_init(&wq->flusher_overflow, list_splice_tail_init(&wq->flusher_overflow,
&wq->flusher_queue); &wq->flusher_queue);
flush_workqueue_prep_cwqs(wq, -1, wq->work_color); flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
} }
if (list_empty(&wq->flusher_queue)) { if (list_empty(&wq->flusher_queue)) {
...@@ -2743,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq) ...@@ -2743,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq)
/* /*
* Need to flush more colors. Make the next flusher * Need to flush more colors. Make the next flusher
* the new first flusher and arm cwqs. * the new first flusher and arm pwqs.
*/ */
BUG_ON(wq->flush_color == wq->work_color); BUG_ON(wq->flush_color == wq->work_color);
BUG_ON(wq->flush_color != next->flush_color); BUG_ON(wq->flush_color != next->flush_color);
...@@ -2751,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq) ...@@ -2751,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq)
list_del_init(&next->list); list_del_init(&next->list);
wq->first_flusher = next; wq->first_flusher = next;
if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1)) if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
break; break;
/* /*
...@@ -2794,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq) ...@@ -2794,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq)
reflush: reflush:
flush_workqueue(wq); flush_workqueue(wq);
for_each_cwq_cpu(cpu, wq) { for_each_pwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
bool drained; bool drained;
spin_lock_irq(&cwq->pool->gcwq->lock); spin_lock_irq(&pwq->pool->lock);
drained = !cwq->nr_active && list_empty(&cwq->delayed_works); drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
spin_unlock_irq(&cwq->pool->gcwq->lock); spin_unlock_irq(&pwq->pool->lock);
if (drained) if (drained)
continue; continue;
...@@ -2822,34 +2747,29 @@ EXPORT_SYMBOL_GPL(drain_workqueue); ...@@ -2822,34 +2747,29 @@ EXPORT_SYMBOL_GPL(drain_workqueue);
static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
{ {
struct worker *worker = NULL; struct worker *worker = NULL;
struct global_cwq *gcwq; struct worker_pool *pool;
struct cpu_workqueue_struct *cwq; struct pool_workqueue *pwq;
might_sleep(); might_sleep();
gcwq = get_work_gcwq(work); pool = get_work_pool(work);
if (!gcwq) if (!pool)
return false; return false;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (!list_empty(&work->entry)) { /* see the comment in try_to_grab_pending() with the same code */
/* pwq = get_work_pwq(work);
* See the comment near try_to_grab_pending()->smp_rmb(). if (pwq) {
* If it was re-queued to a different gcwq under us, we if (unlikely(pwq->pool != pool))
* are not going to wait.
*/
smp_rmb();
cwq = get_work_cwq(work);
if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
goto already_gone; goto already_gone;
} else { } else {
worker = find_worker_executing_work(gcwq, work); worker = find_worker_executing_work(pool, work);
if (!worker) if (!worker)
goto already_gone; goto already_gone;
cwq = worker->current_cwq; pwq = worker->current_pwq;
} }
insert_wq_barrier(cwq, barr, work, worker); insert_wq_barrier(pwq, barr, work, worker);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
/* /*
* If @max_active is 1 or rescuer is in use, flushing another work * If @max_active is 1 or rescuer is in use, flushing another work
...@@ -2857,15 +2777,15 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr) ...@@ -2857,15 +2777,15 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
* flusher is not running on the same workqueue by verifying write * flusher is not running on the same workqueue by verifying write
* access. * access.
*/ */
if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
lock_map_acquire(&cwq->wq->lockdep_map); lock_map_acquire(&pwq->wq->lockdep_map);
else else
lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire_read(&pwq->wq->lockdep_map);
lock_map_release(&cwq->wq->lockdep_map); lock_map_release(&pwq->wq->lockdep_map);
return true; return true;
already_gone: already_gone:
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
return false; return false;
} }
...@@ -2961,8 +2881,7 @@ bool flush_delayed_work(struct delayed_work *dwork) ...@@ -2961,8 +2881,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
{ {
local_irq_disable(); local_irq_disable();
if (del_timer_sync(&dwork->timer)) if (del_timer_sync(&dwork->timer))
__queue_work(dwork->cpu, __queue_work(dwork->cpu, dwork->wq, &dwork->work);
get_work_cwq(&dwork->work)->wq, &dwork->work);
local_irq_enable(); local_irq_enable();
return flush_work(&dwork->work); return flush_work(&dwork->work);
} }
...@@ -2992,7 +2911,8 @@ bool cancel_delayed_work(struct delayed_work *dwork) ...@@ -2992,7 +2911,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
if (unlikely(ret < 0)) if (unlikely(ret < 0))
return false; return false;
set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work)); set_work_pool_and_clear_pending(&dwork->work,
get_work_pool_id(&dwork->work));
local_irq_restore(flags); local_irq_restore(flags);
return ret; return ret;
} }
...@@ -3171,46 +3091,46 @@ int keventd_up(void) ...@@ -3171,46 +3091,46 @@ int keventd_up(void)
return system_wq != NULL; return system_wq != NULL;
} }
static int alloc_cwqs(struct workqueue_struct *wq) static int alloc_pwqs(struct workqueue_struct *wq)
{ {
/* /*
* cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS. * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
* Make sure that the alignment isn't lower than that of * Make sure that the alignment isn't lower than that of
* unsigned long long. * unsigned long long.
*/ */
const size_t size = sizeof(struct cpu_workqueue_struct); const size_t size = sizeof(struct pool_workqueue);
const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS, const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
__alignof__(unsigned long long)); __alignof__(unsigned long long));
if (!(wq->flags & WQ_UNBOUND)) if (!(wq->flags & WQ_UNBOUND))
wq->cpu_wq.pcpu = __alloc_percpu(size, align); wq->pool_wq.pcpu = __alloc_percpu(size, align);
else { else {
void *ptr; void *ptr;
/* /*
* Allocate enough room to align cwq and put an extra * Allocate enough room to align pwq and put an extra
* pointer at the end pointing back to the originally * pointer at the end pointing back to the originally
* allocated pointer which will be used for free. * allocated pointer which will be used for free.
*/ */
ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL); ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
if (ptr) { if (ptr) {
wq->cpu_wq.single = PTR_ALIGN(ptr, align); wq->pool_wq.single = PTR_ALIGN(ptr, align);
*(void **)(wq->cpu_wq.single + 1) = ptr; *(void **)(wq->pool_wq.single + 1) = ptr;
} }
} }
/* just in case, make sure it's actually aligned */ /* just in case, make sure it's actually aligned */
BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
return wq->cpu_wq.v ? 0 : -ENOMEM; return wq->pool_wq.v ? 0 : -ENOMEM;
} }
static void free_cwqs(struct workqueue_struct *wq) static void free_pwqs(struct workqueue_struct *wq)
{ {
if (!(wq->flags & WQ_UNBOUND)) if (!(wq->flags & WQ_UNBOUND))
free_percpu(wq->cpu_wq.pcpu); free_percpu(wq->pool_wq.pcpu);
else if (wq->cpu_wq.single) { else if (wq->pool_wq.single) {
/* the pointer to free is stored right after the cwq */ /* the pointer to free is stored right after the pwq */
kfree(*(void **)(wq->cpu_wq.single + 1)); kfree(*(void **)(wq->pool_wq.single + 1));
} }
} }
...@@ -3264,27 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, ...@@ -3264,27 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
wq->flags = flags; wq->flags = flags;
wq->saved_max_active = max_active; wq->saved_max_active = max_active;
mutex_init(&wq->flush_mutex); mutex_init(&wq->flush_mutex);
atomic_set(&wq->nr_cwqs_to_flush, 0); atomic_set(&wq->nr_pwqs_to_flush, 0);
INIT_LIST_HEAD(&wq->flusher_queue); INIT_LIST_HEAD(&wq->flusher_queue);
INIT_LIST_HEAD(&wq->flusher_overflow); INIT_LIST_HEAD(&wq->flusher_overflow);
lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
INIT_LIST_HEAD(&wq->list); INIT_LIST_HEAD(&wq->list);
if (alloc_cwqs(wq) < 0) if (alloc_pwqs(wq) < 0)
goto err; goto err;
for_each_cwq_cpu(cpu, wq) { for_each_pwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
struct global_cwq *gcwq = get_gcwq(cpu);
int pool_idx = (bool)(flags & WQ_HIGHPRI);
BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
cwq->pool = &gcwq->pools[pool_idx]; pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
cwq->wq = wq; pwq->wq = wq;
cwq->flush_color = -1; pwq->flush_color = -1;
cwq->max_active = max_active; pwq->max_active = max_active;
INIT_LIST_HEAD(&cwq->delayed_works); INIT_LIST_HEAD(&pwq->delayed_works);
} }
if (flags & WQ_RESCUER) { if (flags & WQ_RESCUER) {
...@@ -3297,7 +3215,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, ...@@ -3297,7 +3215,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
if (!rescuer) if (!rescuer)
goto err; goto err;
rescuer->task = kthread_create(rescuer_thread, wq, "%s", rescuer->rescue_wq = wq;
rescuer->task = kthread_create(rescuer_thread, rescuer, "%s",
wq->name); wq->name);
if (IS_ERR(rescuer->task)) if (IS_ERR(rescuer->task))
goto err; goto err;
...@@ -3314,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, ...@@ -3314,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
spin_lock(&workqueue_lock); spin_lock(&workqueue_lock);
if (workqueue_freezing && wq->flags & WQ_FREEZABLE) if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
for_each_cwq_cpu(cpu, wq) for_each_pwq_cpu(cpu, wq)
get_cwq(cpu, wq)->max_active = 0; get_pwq(cpu, wq)->max_active = 0;
list_add(&wq->list, &workqueues); list_add(&wq->list, &workqueues);
...@@ -3324,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, ...@@ -3324,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
return wq; return wq;
err: err:
if (wq) { if (wq) {
free_cwqs(wq); free_pwqs(wq);
free_mayday_mask(wq->mayday_mask); free_mayday_mask(wq->mayday_mask);
kfree(wq->rescuer); kfree(wq->rescuer);
kfree(wq); kfree(wq);
...@@ -3355,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq) ...@@ -3355,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
spin_unlock(&workqueue_lock); spin_unlock(&workqueue_lock);
/* sanity check */ /* sanity check */
for_each_cwq_cpu(cpu, wq) { for_each_pwq_cpu(cpu, wq) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
int i; int i;
for (i = 0; i < WORK_NR_COLORS; i++) for (i = 0; i < WORK_NR_COLORS; i++)
BUG_ON(cwq->nr_in_flight[i]); BUG_ON(pwq->nr_in_flight[i]);
BUG_ON(cwq->nr_active); BUG_ON(pwq->nr_active);
BUG_ON(!list_empty(&cwq->delayed_works)); BUG_ON(!list_empty(&pwq->delayed_works));
} }
if (wq->flags & WQ_RESCUER) { if (wq->flags & WQ_RESCUER) {
...@@ -3371,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq) ...@@ -3371,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
kfree(wq->rescuer); kfree(wq->rescuer);
} }
free_cwqs(wq); free_pwqs(wq);
kfree(wq); kfree(wq);
} }
EXPORT_SYMBOL_GPL(destroy_workqueue); EXPORT_SYMBOL_GPL(destroy_workqueue);
/** /**
* cwq_set_max_active - adjust max_active of a cwq * pwq_set_max_active - adjust max_active of a pwq
* @cwq: target cpu_workqueue_struct * @pwq: target pool_workqueue
* @max_active: new max_active value. * @max_active: new max_active value.
* *
* Set @cwq->max_active to @max_active and activate delayed works if * Set @pwq->max_active to @max_active and activate delayed works if
* increased. * increased.
* *
* CONTEXT: * CONTEXT:
* spin_lock_irq(gcwq->lock). * spin_lock_irq(pool->lock).
*/ */
static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active) static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
{ {
cwq->max_active = max_active; pwq->max_active = max_active;
while (!list_empty(&cwq->delayed_works) && while (!list_empty(&pwq->delayed_works) &&
cwq->nr_active < cwq->max_active) pwq->nr_active < pwq->max_active)
cwq_activate_first_delayed(cwq); pwq_activate_first_delayed(pwq);
} }
/** /**
...@@ -3416,16 +3335,17 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) ...@@ -3416,16 +3335,17 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
wq->saved_max_active = max_active; wq->saved_max_active = max_active;
for_each_cwq_cpu(cpu, wq) { for_each_pwq_cpu(cpu, wq) {
struct global_cwq *gcwq = get_gcwq(cpu); struct pool_workqueue *pwq = get_pwq(cpu, wq);
struct worker_pool *pool = pwq->pool;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
if (!(wq->flags & WQ_FREEZABLE) || if (!(wq->flags & WQ_FREEZABLE) ||
!(gcwq->flags & GCWQ_FREEZING)) !(pool->flags & POOL_FREEZING))
cwq_set_max_active(get_cwq(gcwq->cpu, wq), max_active); pwq_set_max_active(pwq, max_active);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
spin_unlock(&workqueue_lock); spin_unlock(&workqueue_lock);
...@@ -3446,27 +3366,12 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active); ...@@ -3446,27 +3366,12 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
*/ */
bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq) bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
{ {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
return !list_empty(&cwq->delayed_works); return !list_empty(&pwq->delayed_works);
} }
EXPORT_SYMBOL_GPL(workqueue_congested); EXPORT_SYMBOL_GPL(workqueue_congested);
/**
* work_cpu - return the last known associated cpu for @work
* @work: the work of interest
*
* RETURNS:
* CPU number if @work was ever queued. WORK_CPU_NONE otherwise.
*/
unsigned int work_cpu(struct work_struct *work)
{
struct global_cwq *gcwq = get_work_gcwq(work);
return gcwq ? gcwq->cpu : WORK_CPU_NONE;
}
EXPORT_SYMBOL_GPL(work_cpu);
/** /**
* work_busy - test whether a work is currently pending or running * work_busy - test whether a work is currently pending or running
* @work: the work to be tested * @work: the work to be tested
...@@ -3474,29 +3379,25 @@ EXPORT_SYMBOL_GPL(work_cpu); ...@@ -3474,29 +3379,25 @@ EXPORT_SYMBOL_GPL(work_cpu);
* Test whether @work is currently pending or running. There is no * Test whether @work is currently pending or running. There is no
* synchronization around this function and the test result is * synchronization around this function and the test result is
* unreliable and only useful as advisory hints or for debugging. * unreliable and only useful as advisory hints or for debugging.
* Especially for reentrant wqs, the pending state might hide the
* running state.
* *
* RETURNS: * RETURNS:
* OR'd bitmask of WORK_BUSY_* bits. * OR'd bitmask of WORK_BUSY_* bits.
*/ */
unsigned int work_busy(struct work_struct *work) unsigned int work_busy(struct work_struct *work)
{ {
struct global_cwq *gcwq = get_work_gcwq(work); struct worker_pool *pool = get_work_pool(work);
unsigned long flags; unsigned long flags;
unsigned int ret = 0; unsigned int ret = 0;
if (!gcwq)
return 0;
spin_lock_irqsave(&gcwq->lock, flags);
if (work_pending(work)) if (work_pending(work))
ret |= WORK_BUSY_PENDING; ret |= WORK_BUSY_PENDING;
if (find_worker_executing_work(gcwq, work))
ret |= WORK_BUSY_RUNNING;
spin_unlock_irqrestore(&gcwq->lock, flags); if (pool) {
spin_lock_irqsave(&pool->lock, flags);
if (find_worker_executing_work(pool, work))
ret |= WORK_BUSY_RUNNING;
spin_unlock_irqrestore(&pool->lock, flags);
}
return ret; return ret;
} }
...@@ -3506,65 +3407,49 @@ EXPORT_SYMBOL_GPL(work_busy); ...@@ -3506,65 +3407,49 @@ EXPORT_SYMBOL_GPL(work_busy);
* CPU hotplug. * CPU hotplug.
* *
* There are two challenges in supporting CPU hotplug. Firstly, there * There are two challenges in supporting CPU hotplug. Firstly, there
* are a lot of assumptions on strong associations among work, cwq and * are a lot of assumptions on strong associations among work, pwq and
* gcwq which make migrating pending and scheduled works very * pool which make migrating pending and scheduled works very
* difficult to implement without impacting hot paths. Secondly, * difficult to implement without impacting hot paths. Secondly,
* gcwqs serve mix of short, long and very long running works making * worker pools serve mix of short, long and very long running works making
* blocked draining impractical. * blocked draining impractical.
* *
* This is solved by allowing a gcwq to be disassociated from the CPU * This is solved by allowing the pools to be disassociated from the CPU
* running as an unbound one and allowing it to be reattached later if the * running as an unbound one and allowing it to be reattached later if the
* cpu comes back online. * cpu comes back online.
*/ */
/* claim manager positions of all pools */ static void wq_unbind_fn(struct work_struct *work)
static void gcwq_claim_assoc_and_lock(struct global_cwq *gcwq)
{
struct worker_pool *pool;
for_each_worker_pool(pool, gcwq)
mutex_lock_nested(&pool->assoc_mutex, pool - gcwq->pools);
spin_lock_irq(&gcwq->lock);
}
/* release manager positions */
static void gcwq_release_assoc_and_unlock(struct global_cwq *gcwq)
{ {
struct worker_pool *pool; int cpu = smp_processor_id();
spin_unlock_irq(&gcwq->lock);
for_each_worker_pool(pool, gcwq)
mutex_unlock(&pool->assoc_mutex);
}
static void gcwq_unbind_fn(struct work_struct *work)
{
struct global_cwq *gcwq = get_gcwq(smp_processor_id());
struct worker_pool *pool; struct worker_pool *pool;
struct worker *worker; struct worker *worker;
struct hlist_node *pos; struct hlist_node *pos;
int i; int i;
BUG_ON(gcwq->cpu != smp_processor_id()); for_each_std_worker_pool(pool, cpu) {
BUG_ON(cpu != smp_processor_id());
gcwq_claim_assoc_and_lock(gcwq); mutex_lock(&pool->assoc_mutex);
spin_lock_irq(&pool->lock);
/* /*
* We've claimed all manager positions. Make all workers unbound * We've claimed all manager positions. Make all workers
* and set DISASSOCIATED. Before this, all workers except for the * unbound and set DISASSOCIATED. Before this, all workers
* ones which are still executing works from before the last CPU * except for the ones which are still executing works from
* down must be on the cpu. After this, they may become diasporas. * before the last CPU down must be on the cpu. After
* this, they may become diasporas.
*/ */
for_each_worker_pool(pool, gcwq)
list_for_each_entry(worker, &pool->idle_list, entry) list_for_each_entry(worker, &pool->idle_list, entry)
worker->flags |= WORKER_UNBOUND; worker->flags |= WORKER_UNBOUND;
for_each_busy_worker(worker, i, pos, gcwq) for_each_busy_worker(worker, i, pos, pool)
worker->flags |= WORKER_UNBOUND; worker->flags |= WORKER_UNBOUND;
gcwq->flags |= GCWQ_DISASSOCIATED; pool->flags |= POOL_DISASSOCIATED;
gcwq_release_assoc_and_unlock(gcwq); spin_unlock_irq(&pool->lock);
mutex_unlock(&pool->assoc_mutex);
}
/* /*
* Call schedule() so that we cross rq->lock and thus can guarantee * Call schedule() so that we cross rq->lock and thus can guarantee
...@@ -3576,16 +3461,16 @@ static void gcwq_unbind_fn(struct work_struct *work) ...@@ -3576,16 +3461,16 @@ static void gcwq_unbind_fn(struct work_struct *work)
/* /*
* Sched callbacks are disabled now. Zap nr_running. After this, * Sched callbacks are disabled now. Zap nr_running. After this,
* nr_running stays zero and need_more_worker() and keep_working() * nr_running stays zero and need_more_worker() and keep_working()
* are always true as long as the worklist is not empty. @gcwq now * are always true as long as the worklist is not empty. Pools on
* behaves as unbound (in terms of concurrency management) gcwq * @cpu now behave as unbound (in terms of concurrency management)
* which is served by workers tied to the CPU. * pools which are served by workers tied to the CPU.
* *
* On return from this function, the current worker would trigger * On return from this function, the current worker would trigger
* unbound chain execution of pending work items if other workers * unbound chain execution of pending work items if other workers
* didn't already. * didn't already.
*/ */
for_each_worker_pool(pool, gcwq) for_each_std_worker_pool(pool, cpu)
atomic_set(get_pool_nr_running(pool), 0); atomic_set(&pool->nr_running, 0);
} }
/* /*
...@@ -3597,12 +3482,11 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, ...@@ -3597,12 +3482,11 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
void *hcpu) void *hcpu)
{ {
unsigned int cpu = (unsigned long)hcpu; unsigned int cpu = (unsigned long)hcpu;
struct global_cwq *gcwq = get_gcwq(cpu);
struct worker_pool *pool; struct worker_pool *pool;
switch (action & ~CPU_TASKS_FROZEN) { switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE: case CPU_UP_PREPARE:
for_each_worker_pool(pool, gcwq) { for_each_std_worker_pool(pool, cpu) {
struct worker *worker; struct worker *worker;
if (pool->nr_workers) if (pool->nr_workers)
...@@ -3612,18 +3496,24 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, ...@@ -3612,18 +3496,24 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
if (!worker) if (!worker)
return NOTIFY_BAD; return NOTIFY_BAD;
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
start_worker(worker); start_worker(worker);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
break; break;
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
case CPU_ONLINE: case CPU_ONLINE:
gcwq_claim_assoc_and_lock(gcwq); for_each_std_worker_pool(pool, cpu) {
gcwq->flags &= ~GCWQ_DISASSOCIATED; mutex_lock(&pool->assoc_mutex);
rebind_workers(gcwq); spin_lock_irq(&pool->lock);
gcwq_release_assoc_and_unlock(gcwq);
pool->flags &= ~POOL_DISASSOCIATED;
rebind_workers(pool);
spin_unlock_irq(&pool->lock);
mutex_unlock(&pool->assoc_mutex);
}
break; break;
} }
return NOTIFY_OK; return NOTIFY_OK;
...@@ -3643,7 +3533,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb, ...@@ -3643,7 +3533,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
switch (action & ~CPU_TASKS_FROZEN) { switch (action & ~CPU_TASKS_FROZEN) {
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
/* unbinding should happen on the local CPU */ /* unbinding should happen on the local CPU */
INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn);
queue_work_on(cpu, system_highpri_wq, &unbind_work); queue_work_on(cpu, system_highpri_wq, &unbind_work);
flush_work(&unbind_work); flush_work(&unbind_work);
break; break;
...@@ -3696,10 +3586,10 @@ EXPORT_SYMBOL_GPL(work_on_cpu); ...@@ -3696,10 +3586,10 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
* *
* Start freezing workqueues. After this function returns, all freezable * Start freezing workqueues. After this function returns, all freezable
* workqueues will queue new works to their frozen_works list instead of * workqueues will queue new works to their frozen_works list instead of
* gcwq->worklist. * pool->worklist.
* *
* CONTEXT: * CONTEXT:
* Grabs and releases workqueue_lock and gcwq->lock's. * Grabs and releases workqueue_lock and pool->lock's.
*/ */
void freeze_workqueues_begin(void) void freeze_workqueues_begin(void)
{ {
...@@ -3710,23 +3600,26 @@ void freeze_workqueues_begin(void) ...@@ -3710,23 +3600,26 @@ void freeze_workqueues_begin(void)
BUG_ON(workqueue_freezing); BUG_ON(workqueue_freezing);
workqueue_freezing = true; workqueue_freezing = true;
for_each_gcwq_cpu(cpu) { for_each_wq_cpu(cpu) {
struct global_cwq *gcwq = get_gcwq(cpu); struct worker_pool *pool;
struct workqueue_struct *wq; struct workqueue_struct *wq;
spin_lock_irq(&gcwq->lock); for_each_std_worker_pool(pool, cpu) {
spin_lock_irq(&pool->lock);
BUG_ON(gcwq->flags & GCWQ_FREEZING); WARN_ON_ONCE(pool->flags & POOL_FREEZING);
gcwq->flags |= GCWQ_FREEZING; pool->flags |= POOL_FREEZING;
list_for_each_entry(wq, &workqueues, list) { list_for_each_entry(wq, &workqueues, list) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
if (cwq && wq->flags & WQ_FREEZABLE) if (pwq && pwq->pool == pool &&
cwq->max_active = 0; (wq->flags & WQ_FREEZABLE))
pwq->max_active = 0;
} }
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
}
} }
spin_unlock(&workqueue_lock); spin_unlock(&workqueue_lock);
...@@ -3754,20 +3647,20 @@ bool freeze_workqueues_busy(void) ...@@ -3754,20 +3647,20 @@ bool freeze_workqueues_busy(void)
BUG_ON(!workqueue_freezing); BUG_ON(!workqueue_freezing);
for_each_gcwq_cpu(cpu) { for_each_wq_cpu(cpu) {
struct workqueue_struct *wq; struct workqueue_struct *wq;
/* /*
* nr_active is monotonically decreasing. It's safe * nr_active is monotonically decreasing. It's safe
* to peek without lock. * to peek without lock.
*/ */
list_for_each_entry(wq, &workqueues, list) { list_for_each_entry(wq, &workqueues, list) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
if (!cwq || !(wq->flags & WQ_FREEZABLE)) if (!pwq || !(wq->flags & WQ_FREEZABLE))
continue; continue;
BUG_ON(cwq->nr_active < 0); BUG_ON(pwq->nr_active < 0);
if (cwq->nr_active) { if (pwq->nr_active) {
busy = true; busy = true;
goto out_unlock; goto out_unlock;
} }
...@@ -3782,10 +3675,10 @@ bool freeze_workqueues_busy(void) ...@@ -3782,10 +3675,10 @@ bool freeze_workqueues_busy(void)
* thaw_workqueues - thaw workqueues * thaw_workqueues - thaw workqueues
* *
* Thaw workqueues. Normal queueing is restored and all collected * Thaw workqueues. Normal queueing is restored and all collected
* frozen works are transferred to their respective gcwq worklists. * frozen works are transferred to their respective pool worklists.
* *
* CONTEXT: * CONTEXT:
* Grabs and releases workqueue_lock and gcwq->lock's. * Grabs and releases workqueue_lock and pool->lock's.
*/ */
void thaw_workqueues(void) void thaw_workqueues(void)
{ {
...@@ -3796,30 +3689,31 @@ void thaw_workqueues(void) ...@@ -3796,30 +3689,31 @@ void thaw_workqueues(void)
if (!workqueue_freezing) if (!workqueue_freezing)
goto out_unlock; goto out_unlock;
for_each_gcwq_cpu(cpu) { for_each_wq_cpu(cpu) {
struct global_cwq *gcwq = get_gcwq(cpu);
struct worker_pool *pool; struct worker_pool *pool;
struct workqueue_struct *wq; struct workqueue_struct *wq;
spin_lock_irq(&gcwq->lock); for_each_std_worker_pool(pool, cpu) {
spin_lock_irq(&pool->lock);
BUG_ON(!(gcwq->flags & GCWQ_FREEZING)); WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
gcwq->flags &= ~GCWQ_FREEZING; pool->flags &= ~POOL_FREEZING;
list_for_each_entry(wq, &workqueues, list) { list_for_each_entry(wq, &workqueues, list) {
struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); struct pool_workqueue *pwq = get_pwq(cpu, wq);
if (!cwq || !(wq->flags & WQ_FREEZABLE)) if (!pwq || pwq->pool != pool ||
!(wq->flags & WQ_FREEZABLE))
continue; continue;
/* restore max_active and repopulate worklist */ /* restore max_active and repopulate worklist */
cwq_set_max_active(cwq, wq->saved_max_active); pwq_set_max_active(pwq, wq->saved_max_active);
} }
for_each_worker_pool(pool, gcwq)
wake_up_worker(pool); wake_up_worker(pool);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
}
} }
workqueue_freezing = false; workqueue_freezing = false;
...@@ -3831,60 +3725,56 @@ void thaw_workqueues(void) ...@@ -3831,60 +3725,56 @@ void thaw_workqueues(void)
static int __init init_workqueues(void) static int __init init_workqueues(void)
{ {
unsigned int cpu; unsigned int cpu;
int i;
/* make sure we have enough bits for OFFQ CPU number */ /* make sure we have enough bits for OFFQ pool ID */
BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) < BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
WORK_CPU_LAST); WORK_CPU_END * NR_STD_WORKER_POOLS);
cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
/* initialize gcwqs */ /* initialize CPU pools */
for_each_gcwq_cpu(cpu) { for_each_wq_cpu(cpu) {
struct global_cwq *gcwq = get_gcwq(cpu);
struct worker_pool *pool; struct worker_pool *pool;
spin_lock_init(&gcwq->lock); for_each_std_worker_pool(pool, cpu) {
gcwq->cpu = cpu; spin_lock_init(&pool->lock);
gcwq->flags |= GCWQ_DISASSOCIATED; pool->cpu = cpu;
pool->flags |= POOL_DISASSOCIATED;
for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
for_each_worker_pool(pool, gcwq) {
pool->gcwq = gcwq;
INIT_LIST_HEAD(&pool->worklist); INIT_LIST_HEAD(&pool->worklist);
INIT_LIST_HEAD(&pool->idle_list); INIT_LIST_HEAD(&pool->idle_list);
hash_init(pool->busy_hash);
init_timer_deferrable(&pool->idle_timer); init_timer_deferrable(&pool->idle_timer);
pool->idle_timer.function = idle_worker_timeout; pool->idle_timer.function = idle_worker_timeout;
pool->idle_timer.data = (unsigned long)pool; pool->idle_timer.data = (unsigned long)pool;
setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, setup_timer(&pool->mayday_timer, pool_mayday_timeout,
(unsigned long)pool); (unsigned long)pool);
mutex_init(&pool->assoc_mutex); mutex_init(&pool->assoc_mutex);
ida_init(&pool->worker_ida); ida_init(&pool->worker_ida);
/* alloc pool ID */
BUG_ON(worker_pool_assign_id(pool));
} }
} }
/* create the initial worker */ /* create the initial worker */
for_each_online_gcwq_cpu(cpu) { for_each_online_wq_cpu(cpu) {
struct global_cwq *gcwq = get_gcwq(cpu);
struct worker_pool *pool; struct worker_pool *pool;
if (cpu != WORK_CPU_UNBOUND) for_each_std_worker_pool(pool, cpu) {
gcwq->flags &= ~GCWQ_DISASSOCIATED;
for_each_worker_pool(pool, gcwq) {
struct worker *worker; struct worker *worker;
if (cpu != WORK_CPU_UNBOUND)
pool->flags &= ~POOL_DISASSOCIATED;
worker = create_worker(pool); worker = create_worker(pool);
BUG_ON(!worker); BUG_ON(!worker);
spin_lock_irq(&gcwq->lock); spin_lock_irq(&pool->lock);
start_worker(worker); start_worker(worker);
spin_unlock_irq(&gcwq->lock); spin_unlock_irq(&pool->lock);
} }
} }
......
/*
* kernel/workqueue_internal.h
*
* Workqueue internal header file. Only to be included by workqueue and
* core kernel subsystems.
*/
#ifndef _KERNEL_WORKQUEUE_INTERNAL_H
#define _KERNEL_WORKQUEUE_INTERNAL_H
#include <linux/workqueue.h>
#include <linux/kthread.h>
struct worker_pool;
/*
* The poor guys doing the actual heavy lifting. All on-duty workers are
* either serving the manager role, on idle list or on busy hash. For
* details on the locking annotation (L, I, X...), refer to workqueue.c.
*
* Only to be used in workqueue and async.
*/
struct worker {
/* on idle list while idle, on busy hash table while busy */
union {
struct list_head entry; /* L: while idle */
struct hlist_node hentry; /* L: while busy */
};
struct work_struct *current_work; /* L: work being processed */
work_func_t current_func; /* L: current_work's fn */
struct pool_workqueue *current_pwq; /* L: current_work's pwq */
struct list_head scheduled; /* L: scheduled works */
struct task_struct *task; /* I: worker task */
struct worker_pool *pool; /* I: the associated pool */
/* 64 bytes boundary on 64bit, 32 on 32bit */
unsigned long last_active; /* L: last active timestamp */
unsigned int flags; /* X: flags */
int id; /* I: worker id */
/* for rebinding worker to CPU */
struct work_struct rebind_work; /* L: for busy worker */
/* used only by rescuers to point to the target workqueue */
struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
};
/**
* current_wq_worker - return struct worker if %current is a workqueue worker
*/
static inline struct worker *current_wq_worker(void)
{
if (current->flags & PF_WQ_WORKER)
return kthread_data(current);
return NULL;
}
/*
* Scheduler hooks for concurrency managed workqueue. Only to be used from
* sched.c and workqueue.c.
*/
void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
struct task_struct *wq_worker_sleeping(struct task_struct *task,
unsigned int cpu);
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
/*
* kernel/workqueue_sched.h
*
* Scheduler hooks for concurrency managed workqueue. Only to be
* included from sched.c and workqueue.c.
*/
void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
struct task_struct *wq_worker_sleeping(struct task_struct *task,
unsigned int cpu);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment