Commit cd546fa3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'wq-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue updates from Tejun Heo:
 "Mostly changes from Petr to improve warning and error reporting.

  Workqueue now reports more of the relevant failures with better
  context which should help debugging"

* tag 'wq-for-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: Introduce show_freezable_workqueues
  workqueue: Print backtraces from CPUs with hung CPU bound workqueues
  workqueue: Warn when a rescuer could not be created
  workqueue: Interrupted create_worker() is not a repeated event
  workqueue: Warn when a new worker could not be created
  workqueue: Fix hung time report of worker pools
  workqueue: Simplify a pr_warn() call in wq_select_unbound_cpu()
  MAINTAINERS: Add workqueue_internal.h to the WORKQUEUE entry
parents 89d77f71 704bc669
...@@ -22743,6 +22743,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git ...@@ -22743,6 +22743,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git
F: Documentation/core-api/workqueue.rst F: Documentation/core-api/workqueue.rst
F: include/linux/workqueue.h F: include/linux/workqueue.h
F: kernel/workqueue.c F: kernel/workqueue.c
F: kernel/workqueue_internal.h
WWAN DRIVERS WWAN DRIVERS
M: Loic Poulain <loic.poulain@linaro.org> M: Loic Poulain <loic.poulain@linaro.org>
......
...@@ -472,6 +472,7 @@ extern unsigned int work_busy(struct work_struct *work); ...@@ -472,6 +472,7 @@ extern unsigned int work_busy(struct work_struct *work);
extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
extern void print_worker_info(const char *log_lvl, struct task_struct *task); extern void print_worker_info(const char *log_lvl, struct task_struct *task);
extern void show_all_workqueues(void); extern void show_all_workqueues(void);
extern void show_freezable_workqueues(void);
extern void show_one_workqueue(struct workqueue_struct *wq); extern void show_one_workqueue(struct workqueue_struct *wq);
extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task); extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
......
...@@ -93,7 +93,7 @@ static int try_to_freeze_tasks(bool user_only) ...@@ -93,7 +93,7 @@ static int try_to_freeze_tasks(bool user_only)
todo - wq_busy, wq_busy); todo - wq_busy, wq_busy);
if (wq_busy) if (wq_busy)
show_all_workqueues(); show_freezable_workqueues();
if (!wakeup || pm_debug_messages_on) { if (!wakeup || pm_debug_messages_on) {
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/sched/isolation.h> #include <linux/sched/isolation.h>
#include <linux/sched/debug.h>
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
...@@ -141,6 +142,8 @@ enum { ...@@ -141,6 +142,8 @@ enum {
* WR: wq->mutex protected for writes. RCU protected for reads. * WR: wq->mutex protected for writes. RCU protected for reads.
* *
* MD: wq_mayday_lock protected. * MD: wq_mayday_lock protected.
*
* WD: Used internally by the watchdog.
*/ */
/* struct worker is defined in workqueue_internal.h */ /* struct worker is defined in workqueue_internal.h */
...@@ -153,6 +156,7 @@ struct worker_pool { ...@@ -153,6 +156,7 @@ struct worker_pool {
unsigned int flags; /* X: flags */ unsigned int flags; /* X: flags */
unsigned long watchdog_ts; /* L: watchdog timestamp */ unsigned long watchdog_ts; /* L: watchdog timestamp */
bool cpu_stall; /* WD: stalled cpu bound pool */
/* /*
* The counter is incremented in a process context on the associated CPU * The counter is incremented in a process context on the associated CPU
...@@ -1392,15 +1396,13 @@ static bool is_chained_work(struct workqueue_struct *wq) ...@@ -1392,15 +1396,13 @@ static bool is_chained_work(struct workqueue_struct *wq)
*/ */
static int wq_select_unbound_cpu(int cpu) static int wq_select_unbound_cpu(int cpu)
{ {
static bool printed_dbg_warning;
int new_cpu; int new_cpu;
if (likely(!wq_debug_force_rr_cpu)) { if (likely(!wq_debug_force_rr_cpu)) {
if (cpumask_test_cpu(cpu, wq_unbound_cpumask)) if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
return cpu; return cpu;
} else if (!printed_dbg_warning) { } else {
pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n"); pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
printed_dbg_warning = true;
} }
if (cpumask_empty(wq_unbound_cpumask)) if (cpumask_empty(wq_unbound_cpumask))
...@@ -1938,12 +1940,17 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1938,12 +1940,17 @@ static struct worker *create_worker(struct worker_pool *pool)
/* ID is needed to determine kthread name */ /* ID is needed to determine kthread name */
id = ida_alloc(&pool->worker_ida, GFP_KERNEL); id = ida_alloc(&pool->worker_ida, GFP_KERNEL);
if (id < 0) if (id < 0) {
pr_err_once("workqueue: Failed to allocate a worker ID: %pe\n",
ERR_PTR(id));
return NULL; return NULL;
}
worker = alloc_worker(pool->node); worker = alloc_worker(pool->node);
if (!worker) if (!worker) {
pr_err_once("workqueue: Failed to allocate a worker\n");
goto fail; goto fail;
}
worker->id = id; worker->id = id;
...@@ -1955,8 +1962,16 @@ static struct worker *create_worker(struct worker_pool *pool) ...@@ -1955,8 +1962,16 @@ static struct worker *create_worker(struct worker_pool *pool)
worker->task = kthread_create_on_node(worker_thread, worker, pool->node, worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
"kworker/%s", id_buf); "kworker/%s", id_buf);
if (IS_ERR(worker->task)) if (IS_ERR(worker->task)) {
if (PTR_ERR(worker->task) == -EINTR) {
pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
id_buf);
} else {
pr_err_once("workqueue: Failed to create a worker thread: %pe",
worker->task);
}
goto fail; goto fail;
}
set_user_nice(worker->task, pool->attrs->nice); set_user_nice(worker->task, pool->attrs->nice);
kthread_bind_mask(worker->task, pool->attrs->cpumask); kthread_bind_mask(worker->task, pool->attrs->cpumask);
...@@ -4380,13 +4395,18 @@ static int init_rescuer(struct workqueue_struct *wq) ...@@ -4380,13 +4395,18 @@ static int init_rescuer(struct workqueue_struct *wq)
return 0; return 0;
rescuer = alloc_worker(NUMA_NO_NODE); rescuer = alloc_worker(NUMA_NO_NODE);
if (!rescuer) if (!rescuer) {
pr_err("workqueue: Failed to allocate a rescuer for wq \"%s\"\n",
wq->name);
return -ENOMEM; return -ENOMEM;
}
rescuer->rescue_wq = wq; rescuer->rescue_wq = wq;
rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name); rescuer->task = kthread_create(rescuer_thread, rescuer, "%s", wq->name);
if (IS_ERR(rescuer->task)) { if (IS_ERR(rescuer->task)) {
ret = PTR_ERR(rescuer->task); ret = PTR_ERR(rescuer->task);
pr_err("workqueue: Failed to create a rescuer kthread for wq \"%s\": %pe",
wq->name, ERR_PTR(ret));
kfree(rescuer); kfree(rescuer);
return ret; return ret;
} }
...@@ -5002,10 +5022,16 @@ static void show_one_worker_pool(struct worker_pool *pool) ...@@ -5002,10 +5022,16 @@ static void show_one_worker_pool(struct worker_pool *pool)
struct worker *worker; struct worker *worker;
bool first = true; bool first = true;
unsigned long flags; unsigned long flags;
unsigned long hung = 0;
raw_spin_lock_irqsave(&pool->lock, flags); raw_spin_lock_irqsave(&pool->lock, flags);
if (pool->nr_workers == pool->nr_idle) if (pool->nr_workers == pool->nr_idle)
goto next_pool; goto next_pool;
/* How long the first pending work is waiting for a worker. */
if (!list_empty(&pool->worklist))
hung = jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000;
/* /*
* Defer printing to avoid deadlocks in console drivers that * Defer printing to avoid deadlocks in console drivers that
* queue work while holding locks also taken in their write * queue work while holding locks also taken in their write
...@@ -5014,9 +5040,7 @@ static void show_one_worker_pool(struct worker_pool *pool) ...@@ -5014,9 +5040,7 @@ static void show_one_worker_pool(struct worker_pool *pool)
printk_deferred_enter(); printk_deferred_enter();
pr_info("pool %d:", pool->id); pr_info("pool %d:", pool->id);
pr_cont_pool_info(pool); pr_cont_pool_info(pool);
pr_cont(" hung=%us workers=%d", pr_cont(" hung=%lus workers=%d", hung, pool->nr_workers);
jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
pool->nr_workers);
if (pool->manager) if (pool->manager)
pr_cont(" manager: %d", pr_cont(" manager: %d",
task_pid_nr(pool->manager->task)); task_pid_nr(pool->manager->task));
...@@ -5041,8 +5065,7 @@ static void show_one_worker_pool(struct worker_pool *pool) ...@@ -5041,8 +5065,7 @@ static void show_one_worker_pool(struct worker_pool *pool)
/** /**
* show_all_workqueues - dump workqueue state * show_all_workqueues - dump workqueue state
* *
* Called from a sysrq handler or try_to_freeze_tasks() and prints out * Called from a sysrq handler and prints out all busy workqueues and pools.
* all busy workqueues and pools.
*/ */
void show_all_workqueues(void) void show_all_workqueues(void)
{ {
...@@ -5063,6 +5086,29 @@ void show_all_workqueues(void) ...@@ -5063,6 +5086,29 @@ void show_all_workqueues(void)
rcu_read_unlock(); rcu_read_unlock();
} }
/**
* show_freezable_workqueues - dump freezable workqueue state
*
* Called from try_to_freeze_tasks() and prints out all freezable workqueues
* still busy.
*/
void show_freezable_workqueues(void)
{
struct workqueue_struct *wq;
rcu_read_lock();
pr_info("Showing freezable workqueues that are still busy:\n");
list_for_each_entry_rcu(wq, &workqueues, list) {
if (!(wq->flags & WQ_FREEZABLE))
continue;
show_one_workqueue(wq);
}
rcu_read_unlock();
}
/* used to show worker information through /proc/PID/{comm,stat,status} */ /* used to show worker information through /proc/PID/{comm,stat,status} */
void wq_worker_comm(char *buf, size_t size, struct task_struct *task) void wq_worker_comm(char *buf, size_t size, struct task_struct *task)
{ {
...@@ -5962,6 +6008,57 @@ static struct timer_list wq_watchdog_timer; ...@@ -5962,6 +6008,57 @@ static struct timer_list wq_watchdog_timer;
static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
/*
* Show workers that might prevent the processing of pending work items.
* The only candidates are CPU-bound workers in the running state.
* Pending work items should be handled by another idle worker
* in all other situations.
*/
static void show_cpu_pool_hog(struct worker_pool *pool)
{
struct worker *worker;
unsigned long flags;
int bkt;
raw_spin_lock_irqsave(&pool->lock, flags);
hash_for_each(pool->busy_hash, bkt, worker, hentry) {
if (task_is_running(worker->task)) {
/*
* Defer printing to avoid deadlocks in console
* drivers that queue work while holding locks
* also taken in their write paths.
*/
printk_deferred_enter();
pr_info("pool %d:\n", pool->id);
sched_show_task(worker->task);
printk_deferred_exit();
}
}
raw_spin_unlock_irqrestore(&pool->lock, flags);
}
static void show_cpu_pools_hogs(void)
{
struct worker_pool *pool;
int pi;
pr_info("Showing backtraces of running workers in stalled CPU-bound worker pools:\n");
rcu_read_lock();
for_each_pool(pool, pi) {
if (pool->cpu_stall)
show_cpu_pool_hog(pool);
}
rcu_read_unlock();
}
static void wq_watchdog_reset_touched(void) static void wq_watchdog_reset_touched(void)
{ {
int cpu; int cpu;
...@@ -5975,6 +6072,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) ...@@ -5975,6 +6072,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
{ {
unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
bool lockup_detected = false; bool lockup_detected = false;
bool cpu_pool_stall = false;
unsigned long now = jiffies; unsigned long now = jiffies;
struct worker_pool *pool; struct worker_pool *pool;
int pi; int pi;
...@@ -5987,6 +6085,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) ...@@ -5987,6 +6085,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
for_each_pool(pool, pi) { for_each_pool(pool, pi) {
unsigned long pool_ts, touched, ts; unsigned long pool_ts, touched, ts;
pool->cpu_stall = false;
if (list_empty(&pool->worklist)) if (list_empty(&pool->worklist))
continue; continue;
...@@ -6011,11 +6110,17 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) ...@@ -6011,11 +6110,17 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
/* did we stall? */ /* did we stall? */
if (time_after(now, ts + thresh)) { if (time_after(now, ts + thresh)) {
lockup_detected = true; lockup_detected = true;
if (pool->cpu >= 0) {
pool->cpu_stall = true;
cpu_pool_stall = true;
}
pr_emerg("BUG: workqueue lockup - pool"); pr_emerg("BUG: workqueue lockup - pool");
pr_cont_pool_info(pool); pr_cont_pool_info(pool);
pr_cont(" stuck for %us!\n", pr_cont(" stuck for %us!\n",
jiffies_to_msecs(now - pool_ts) / 1000); jiffies_to_msecs(now - pool_ts) / 1000);
} }
} }
rcu_read_unlock(); rcu_read_unlock();
...@@ -6023,6 +6128,9 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) ...@@ -6023,6 +6128,9 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
if (lockup_detected) if (lockup_detected)
show_all_workqueues(); show_all_workqueues();
if (cpu_pool_stall)
show_cpu_pools_hogs();
wq_watchdog_reset_touched(); wq_watchdog_reset_touched();
mod_timer(&wq_watchdog_timer, jiffies + thresh); mod_timer(&wq_watchdog_timer, jiffies + thresh);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment