Commit 1551260d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core/softlockup' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'core/softlockup' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  softlockup: make DETECT_HUNG_TASK default depend on DETECT_SOFTLOCKUP
  softlockup: move 'one' to the softlockup section in sysctl.c
  softlockup: ensure the task has been switched out once
  softlockup: remove timestamp checking from hung_task
  softlockup: convert read_lock in hung_task to rcu_read_lock
  softlockup: check all tasks in hung_task
  softlockup: remove unused definition for spawn_softlockup_task
  softlockup: fix potential race in hung_task when resetting timeout
  softlockup: fix to allow compiling with !DETECT_HUNG_TASK
  softlockup: decouple hung tasks check from softlockup detection
parents c93f216b 5e344378
...@@ -300,17 +300,11 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, ...@@ -300,17 +300,11 @@ extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
struct file *filp, void __user *buffer, struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos); size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic; extern unsigned int softlockup_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int softlockup_thresh; extern int softlockup_thresh;
#else #else
static inline void softlockup_tick(void) static inline void softlockup_tick(void)
{ {
} }
static inline void spawn_softlockup_task(void)
{
}
static inline void touch_softlockup_watchdog(void) static inline void touch_softlockup_watchdog(void)
{ {
} }
...@@ -319,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void) ...@@ -319,6 +313,15 @@ static inline void touch_all_softlockup_watchdogs(void)
} }
#endif #endif
#ifdef CONFIG_DETECT_HUNG_TASK
extern unsigned int sysctl_hung_task_panic;
extern unsigned long sysctl_hung_task_check_count;
extern unsigned long sysctl_hung_task_timeout_secs;
extern unsigned long sysctl_hung_task_warnings;
extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos);
#endif
/* Attach to any functions which should be ignored in wchan output. */ /* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text"))) #define __sched __attribute__((__section__(".sched.text")))
...@@ -1255,9 +1258,8 @@ struct task_struct { ...@@ -1255,9 +1258,8 @@ struct task_struct {
/* ipc stuff */ /* ipc stuff */
struct sysv_sem sysvsem; struct sysv_sem sysvsem;
#endif #endif
#ifdef CONFIG_DETECT_SOFTLOCKUP #ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */ /* hung task detection */
unsigned long last_switch_timestamp;
unsigned long last_switch_count; unsigned long last_switch_count;
#endif #endif
/* CPU-specific state of this task */ /* CPU-specific state of this task */
......
...@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o ...@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
......
...@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) ...@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
tsk->min_flt = tsk->maj_flt = 0; tsk->min_flt = tsk->maj_flt = 0;
tsk->nvcsw = tsk->nivcsw = 0; tsk->nvcsw = tsk->nivcsw = 0;
#ifdef CONFIG_DETECT_HUNG_TASK
tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
#endif
tsk->mm = NULL; tsk->mm = NULL;
tsk->active_mm = NULL; tsk->active_mm = NULL;
...@@ -1032,11 +1035,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1032,11 +1035,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->default_timer_slack_ns = current->timer_slack_ns; p->default_timer_slack_ns = current->timer_slack_ns;
#ifdef CONFIG_DETECT_SOFTLOCKUP
p->last_switch_count = 0;
p->last_switch_timestamp = 0;
#endif
task_io_accounting_init(&p->ioac); task_io_accounting_init(&p->ioac);
acct_clear_integrals(p); acct_clear_integrals(p);
......
/*
* Detect Hung Task
*
* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
*
*/
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/module.h>
#include <linux/sysctl.h>
/*
* The number of tasks checked:
*/
unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
/*
* Limit number of tasks checked in a batch.
*
* This value controls the preemptibility of khungtaskd since preemption
* is disabled during the critical section. It also controls the size of
* the RCU grace period. So it needs to be upper-bound.
*/
#define HUNG_TASK_BATCHING 1024
/*
* Zero means infinite timeout - no checking done:
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
unsigned long __read_mostly sysctl_hung_task_warnings = 10;
static int __read_mostly did_panic;
static struct task_struct *watchdog_task;
/*
* Should we panic (and reboot, if panic_timeout= is set) when a
* hung task is detected:
*/
unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
static int __init hung_task_panic_setup(char *str)
{
sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
return 1;
}
__setup("hung_task_panic=", hung_task_panic_setup);
static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
did_panic = 1;
return NOTIFY_DONE;
}
static struct notifier_block panic_block = {
.notifier_call = hung_task_panic,
};
static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
unsigned long switch_count = t->nvcsw + t->nivcsw;
/*
* Ensure the task is not frozen.
* Also, when a freshly created task is scheduled once, changes
* its state to TASK_UNINTERRUPTIBLE without having ever been
* switched out once, it musn't be checked.
*/
if (unlikely(t->flags & PF_FROZEN || !switch_count))
return;
if (switch_count != t->last_switch_count) {
t->last_switch_count = switch_count;
return;
}
if (!sysctl_hung_task_warnings)
return;
sysctl_hung_task_warnings--;
/*
* Ok, the task did not get scheduled for more than 2 minutes,
* complain:
*/
printk(KERN_ERR "INFO: task %s:%d blocked for more than "
"%ld seconds.\n", t->comm, t->pid, timeout);
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
__debug_show_held_locks(t);
touch_nmi_watchdog();
if (sysctl_hung_task_panic)
panic("hung_task: blocked tasks");
}
/*
* To avoid extending the RCU grace period for an unbounded amount of time,
* periodically exit the critical section and enter a new one.
*
* For preemptible RCU it is sufficient to call rcu_read_unlock in order
* exit the grace period. For classic RCU, a reschedule is required.
*/
static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
{
get_task_struct(g);
get_task_struct(t);
rcu_read_unlock();
cond_resched();
rcu_read_lock();
put_task_struct(t);
put_task_struct(g);
}
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time (120 seconds). If that happens, print out
* a warning.
*/
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
int max_count = sysctl_hung_task_check_count;
int batch_count = HUNG_TASK_BATCHING;
struct task_struct *g, *t;
/*
* If the system crashed already then all bets are off,
* do not report extra hung tasks:
*/
if (test_taint(TAINT_DIE) || did_panic)
return;
rcu_read_lock();
do_each_thread(g, t) {
if (!--max_count)
goto unlock;
if (!--batch_count) {
batch_count = HUNG_TASK_BATCHING;
rcu_lock_break(g, t);
/* Exit if t or g was unhashed during refresh. */
if (t->state == TASK_DEAD || g->state == TASK_DEAD)
goto unlock;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, timeout);
} while_each_thread(g, t);
unlock:
rcu_read_unlock();
}
static unsigned long timeout_jiffies(unsigned long timeout)
{
/* timeout of 0 will disable the watchdog */
return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
}
/*
* Process updating of timeout sysctl
*/
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
struct file *filp, void __user *buffer,
size_t *lenp, loff_t *ppos)
{
int ret;
ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
if (ret || !write)
goto out;
wake_up_process(watchdog_task);
out:
return ret;
}
/*
* kthread which checks for tasks stuck in D state
*/
static int watchdog(void *dummy)
{
set_user_nice(current, 0);
for ( ; ; ) {
unsigned long timeout = sysctl_hung_task_timeout_secs;
while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
timeout = sysctl_hung_task_timeout_secs;
check_hung_uninterruptible_tasks(timeout);
}
return 0;
}
static int __init hung_task_init(void)
{
atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
return 0;
}
module_init(hung_task_init);
...@@ -165,98 +165,12 @@ void softlockup_tick(void) ...@@ -165,98 +165,12 @@ void softlockup_tick(void)
panic("softlockup: hung tasks"); panic("softlockup: hung tasks");
} }
/*
* Have a reasonable limit on the number of tasks checked:
*/
unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
/*
* Zero means infinite timeout - no checking done:
*/
unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
unsigned long __read_mostly sysctl_hung_task_warnings = 10;
/*
* Only do the hung-tasks check on one CPU:
*/
static int check_cpu __read_mostly = -1;
static void check_hung_task(struct task_struct *t, unsigned long now)
{
unsigned long switch_count = t->nvcsw + t->nivcsw;
if (t->flags & PF_FROZEN)
return;
if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
t->last_switch_count = switch_count;
t->last_switch_timestamp = now;
return;
}
if ((long)(now - t->last_switch_timestamp) <
sysctl_hung_task_timeout_secs)
return;
if (!sysctl_hung_task_warnings)
return;
sysctl_hung_task_warnings--;
/*
* Ok, the task did not get scheduled for more than 2 minutes,
* complain:
*/
printk(KERN_ERR "INFO: task %s:%d blocked for more than "
"%ld seconds.\n", t->comm, t->pid,
sysctl_hung_task_timeout_secs);
printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
" disables this message.\n");
sched_show_task(t);
__debug_show_held_locks(t);
t->last_switch_timestamp = now;
touch_nmi_watchdog();
if (softlockup_panic)
panic("softlockup: blocked tasks");
}
/*
* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
* a really long time (120 seconds). If that happens, print out
* a warning.
*/
static void check_hung_uninterruptible_tasks(int this_cpu)
{
int max_count = sysctl_hung_task_check_count;
unsigned long now = get_timestamp(this_cpu);
struct task_struct *g, *t;
/*
* If the system crashed already then all bets are off,
* do not report extra hung tasks:
*/
if (test_taint(TAINT_DIE) || did_panic)
return;
read_lock(&tasklist_lock);
do_each_thread(g, t) {
if (!--max_count)
goto unlock;
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, now);
} while_each_thread(g, t);
unlock:
read_unlock(&tasklist_lock);
}
/* /*
* The watchdog thread - runs every second and touches the timestamp. * The watchdog thread - runs every second and touches the timestamp.
*/ */
static int watchdog(void *__bind_cpu) static int watchdog(void *__bind_cpu)
{ {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
int this_cpu = (long)__bind_cpu;
sched_setscheduler(current, SCHED_FIFO, &param); sched_setscheduler(current, SCHED_FIFO, &param);
...@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu) ...@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
if (kthread_should_stop()) if (kthread_should_stop())
break; break;
if (this_cpu == check_cpu) {
if (sysctl_hung_task_timeout_secs)
check_hung_uninterruptible_tasks(this_cpu);
}
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
} }
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) ...@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break; break;
case CPU_ONLINE: case CPU_ONLINE:
case CPU_ONLINE_FROZEN: case CPU_ONLINE_FROZEN:
check_cpu = cpumask_any(cpu_online_mask);
wake_up_process(per_cpu(watchdog_task, hotcpu)); wake_up_process(per_cpu(watchdog_task, hotcpu));
break; break;
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
if (hotcpu == check_cpu) {
/* Pick any other online cpu. */
check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
}
break;
case CPU_UP_CANCELED: case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN: case CPU_UP_CANCELED_FROZEN:
if (!per_cpu(watchdog_task, hotcpu)) if (!per_cpu(watchdog_task, hotcpu))
......
...@@ -814,6 +814,19 @@ static struct ctl_table kern_table[] = { ...@@ -814,6 +814,19 @@ static struct ctl_table kern_table[] = {
.extra1 = &neg_one, .extra1 = &neg_one,
.extra2 = &sixty, .extra2 = &sixty,
}, },
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
{
.ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_panic",
.data = &sysctl_hung_task_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
.extra2 = &one,
},
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
.procname = "hung_task_check_count", .procname = "hung_task_check_count",
...@@ -829,7 +842,7 @@ static struct ctl_table kern_table[] = { ...@@ -829,7 +842,7 @@ static struct ctl_table kern_table[] = {
.data = &sysctl_hung_task_timeout_secs, .data = &sysctl_hung_task_timeout_secs,
.maxlen = sizeof(unsigned long), .maxlen = sizeof(unsigned long),
.mode = 0644, .mode = 0644,
.proc_handler = &proc_doulongvec_minmax, .proc_handler = &proc_dohung_task_timeout_secs,
.strategy = &sysctl_intvec, .strategy = &sysctl_intvec,
}, },
{ {
......
...@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE ...@@ -186,6 +186,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
default 1 if BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
config DETECT_HUNG_TASK
bool "Detect Hung Tasks"
depends on DEBUG_KERNEL
default DETECT_SOFTLOCKUP
help
Say Y here to enable the kernel to detect "hung tasks",
which are bugs that cause the task to be stuck in
uninterruptible "D" state indefinitiley.
When a hung task is detected, the kernel will print the
current stack trace (which you should report), but the
task will stay in uninterruptible state. If lockdep is
enabled then all held locks will also be reported. This
feature has negligible overhead.
config BOOTPARAM_HUNG_TASK_PANIC
bool "Panic (Reboot) On Hung Tasks"
depends on DETECT_HUNG_TASK
help
Say Y here to enable the kernel to panic on "hung tasks",
which are bugs that cause the kernel to leave a task stuck
in uninterruptible "D" state.
The panic can be used in combination with panic_timeout,
to cause the system to reboot automatically after a
hung task has been detected. This feature is useful for
high-availability systems that have uptime guarantees and
where a hung tasks must be resolved ASAP.
Say N if unsure.
config BOOTPARAM_HUNG_TASK_PANIC_VALUE
int
depends on DETECT_HUNG_TASK
range 0 1
default 0 if !BOOTPARAM_HUNG_TASK_PANIC
default 1 if BOOTPARAM_HUNG_TASK_PANIC
config SCHED_DEBUG config SCHED_DEBUG
bool "Collect scheduler debugging info" bool "Collect scheduler debugging info"
depends on DEBUG_KERNEL && PROC_FS depends on DEBUG_KERNEL && PROC_FS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment