Commit 27efed3e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'core-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull watchddog clean-up and fixes from Thomas Gleixner:
 "The watchdog (hard/softlockup detector) code is pretty much broken in
  its current state. The patch series addresses this by removing all
  duct tape and refactoring it into a workable state.

  The reasons why I ask for inclusion that late in the cycle are:

   1) The code causes lockdep splats vs. hotplug locking which get
      reported over and over. Unfortunately there is no easy fix.

   2) The risk of breakage is minimal because it's already broken

   3) As 4.14 is a long term stable kernel, I prefer to have working
      watchdog code in that and the lockdep issues resolved. I wouldn't
      ask you to pull if 4.14 wouldn't be a LTS kernel or if the
      solution would be easy to backport.

   4) The series was around before the merge window opened, but then got
      delayed due to the UP failure caused by the for_each_cpu()
      surprise which we discussed recently.

  Changes vs. V1:

   - Addressed your review points

   - Addressed the warning in the powerpc code which was discovered late

   - Changed two function names which made sense up to a certain point
     in the series. Now they match what they do in the end.

   - Fixed a 'unused variable' warning, which got not detected by the
     intel robot. I triggered it when trying all possible related config
     combinations manually. Randconfig testing seems not random enough.

  The changes have been tested by and reviewed by Don Zickus and tested
  and acked by Micheal Ellerman for powerpc"

* 'core-watchdog-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits)
  watchdog/core: Put softlockup_threads_initialized under ifdef guard
  watchdog/core: Rename some softlockup_* functions
  powerpc/watchdog: Make use of watchdog_nmi_probe()
  watchdog/core, powerpc: Lock cpus across reconfiguration
  watchdog/core, powerpc: Replace watchdog_nmi_reconfigure()
  watchdog/hardlockup/perf: Fix spelling mistake: "permanetely" -> "permanently"
  watchdog/hardlockup/perf: Cure UP damage
  watchdog/hardlockup: Clean up hotplug locking mess
  watchdog/hardlockup/perf: Simplify deferred event destroy
  watchdog/hardlockup/perf: Use new perf CPU enable mechanism
  watchdog/hardlockup/perf: Implement CPU enable replacement
  watchdog/hardlockup/perf: Implement init time detection of perf
  watchdog/hardlockup/perf: Implement init time perf validation
  watchdog/core: Get rid of the racy update loop
  watchdog/core, powerpc: Make watchdog_nmi_reconfigure() two stage
  watchdog/sysctl: Clean up sysctl variable name space
  watchdog/sysctl: Get rid of the #ifdeffery
  watchdog/core: Clean up header mess
  watchdog/core: Further simplify sysctl handling
  watchdog/core: Get rid of the thread teardown/setup dance
  ...
parents 7a92616c 0b62bf86
...@@ -146,7 +146,7 @@ void machine_power_off(void) ...@@ -146,7 +146,7 @@ void machine_power_off(void)
/* prevent soft lockup/stalled CPU messages for endless loop. */ /* prevent soft lockup/stalled CPU messages for endless loop. */
rcu_sysrq_start(); rcu_sysrq_start();
lockup_detector_suspend(); lockup_detector_soft_poweroff();
for (;;); for (;;);
} }
......
...@@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu) ...@@ -310,9 +310,6 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
return 0; return 0;
if (watchdog_suspended)
return 0;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0; return 0;
...@@ -358,36 +355,39 @@ static void watchdog_calc_timeouts(void) ...@@ -358,36 +355,39 @@ static void watchdog_calc_timeouts(void)
wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
} }
void watchdog_nmi_reconfigure(void) void watchdog_nmi_stop(void)
{ {
int cpu; int cpu;
watchdog_calc_timeouts();
for_each_cpu(cpu, &wd_cpus_enabled) for_each_cpu(cpu, &wd_cpus_enabled)
stop_wd_on_cpu(cpu); stop_wd_on_cpu(cpu);
}
void watchdog_nmi_start(void)
{
int cpu;
watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
start_wd_on_cpu(cpu); start_wd_on_cpu(cpu);
} }
/* /*
* This runs after lockup_detector_init() which sets up watchdog_cpumask. * Invoked from core watchdog init.
*/ */
static int __init powerpc_watchdog_init(void) int __init watchdog_nmi_probe(void)
{ {
int err; int err;
watchdog_calc_timeouts(); err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"powerpc/watchdog:online",
err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", start_wd_on_cpu, stop_wd_on_cpu);
start_wd_on_cpu, stop_wd_on_cpu); if (err < 0) {
if (err < 0)
pr_warn("Watchdog could not be initialized"); pr_warn("Watchdog could not be initialized");
return err;
}
return 0; return 0;
} }
arch_initcall(powerpc_watchdog_init);
static void handle_backtrace_ipi(struct pt_regs *regs) static void handle_backtrace_ipi(struct pt_regs *regs)
{ {
......
...@@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void) ...@@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void)
return 0; return 0;
} }
if (lockup_detector_suspend() != 0) { cpus_read_lock();
pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n");
return 0; hardlockup_detector_perf_stop();
}
x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
...@@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void) ...@@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void)
x86_pmu.commit_scheduling = NULL; x86_pmu.commit_scheduling = NULL;
x86_pmu.stop_scheduling = NULL; x86_pmu.stop_scheduling = NULL;
lockup_detector_resume(); hardlockup_detector_perf_restart();
cpus_read_lock();
for_each_online_cpu(c) for_each_online_cpu(c)
free_excl_cntrs(c); free_excl_cntrs(c);
......
...@@ -12,11 +12,31 @@ ...@@ -12,11 +12,31 @@
#ifdef CONFIG_LOCKUP_DETECTOR #ifdef CONFIG_LOCKUP_DETECTOR
void lockup_detector_init(void); void lockup_detector_init(void);
void lockup_detector_soft_poweroff(void);
void lockup_detector_cleanup(void);
bool is_hardlockup(void);
extern int watchdog_user_enabled;
extern int nmi_watchdog_user_enabled;
extern int soft_watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
#else #else
static inline void lockup_detector_init(void) #define sysctl_softlockup_all_cpu_backtrace 0
{ #define sysctl_hardlockup_all_cpu_backtrace 0
} #endif /* !CONFIG_SMP */
#endif
#else /* CONFIG_LOCKUP_DETECTOR */
static inline void lockup_detector_init(void) { }
static inline void lockup_detector_soft_poweroff(void) { }
static inline void lockup_detector_cleanup(void) { }
#endif /* !CONFIG_LOCKUP_DETECTOR */
#ifdef CONFIG_SOFTLOCKUP_DETECTOR #ifdef CONFIG_SOFTLOCKUP_DETECTOR
extern void touch_softlockup_watchdog_sched(void); extern void touch_softlockup_watchdog_sched(void);
...@@ -24,29 +44,17 @@ extern void touch_softlockup_watchdog(void); ...@@ -24,29 +44,17 @@ extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void); extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void); extern void touch_all_softlockup_watchdogs(void);
extern unsigned int softlockup_panic; extern unsigned int softlockup_panic;
extern int soft_watchdog_enabled;
extern atomic_t watchdog_park_in_progress;
#else #else
static inline void touch_softlockup_watchdog_sched(void) static inline void touch_softlockup_watchdog_sched(void) { }
{ static inline void touch_softlockup_watchdog(void) { }
} static inline void touch_softlockup_watchdog_sync(void) { }
static inline void touch_softlockup_watchdog(void) static inline void touch_all_softlockup_watchdogs(void) { }
{
}
static inline void touch_softlockup_watchdog_sync(void)
{
}
static inline void touch_all_softlockup_watchdogs(void)
{
}
#endif #endif
#ifdef CONFIG_DETECT_HUNG_TASK #ifdef CONFIG_DETECT_HUNG_TASK
void reset_hung_task_detector(void); void reset_hung_task_detector(void);
#else #else
static inline void reset_hung_task_detector(void) static inline void reset_hung_task_detector(void) { }
{
}
#endif #endif
/* /*
...@@ -54,12 +62,12 @@ static inline void reset_hung_task_detector(void) ...@@ -54,12 +62,12 @@ static inline void reset_hung_task_detector(void)
* 'watchdog_enabled' variable. Each lockup detector has its dedicated bit - * 'watchdog_enabled' variable. Each lockup detector has its dedicated bit -
* bit 0 for the hard lockup detector and bit 1 for the soft lockup detector. * bit 0 for the hard lockup detector and bit 1 for the soft lockup detector.
* *
* 'watchdog_user_enabled', 'nmi_watchdog_enabled' and 'soft_watchdog_enabled' * 'watchdog_user_enabled', 'nmi_watchdog_user_enabled' and
* are variables that are only used as an 'interface' between the parameters * 'soft_watchdog_user_enabled' are variables that are only used as an
* in /proc/sys/kernel and the internal state bits in 'watchdog_enabled'. The * 'interface' between the parameters in /proc/sys/kernel and the internal
* 'watchdog_thresh' variable is handled differently because its value is not * state bits in 'watchdog_enabled'. The 'watchdog_thresh' variable is
* boolean, and the lockup detectors are 'suspended' while 'watchdog_thresh' * handled differently because its value is not boolean, and the lockup
* is equal zero. * detectors are 'suspended' while 'watchdog_thresh' is equal zero.
*/ */
#define NMI_WATCHDOG_ENABLED_BIT 0 #define NMI_WATCHDOG_ENABLED_BIT 0
#define SOFT_WATCHDOG_ENABLED_BIT 1 #define SOFT_WATCHDOG_ENABLED_BIT 1
...@@ -73,17 +81,41 @@ extern unsigned int hardlockup_panic; ...@@ -73,17 +81,41 @@ extern unsigned int hardlockup_panic;
static inline void hardlockup_detector_disable(void) {} static inline void hardlockup_detector_disable(void) {}
#endif #endif
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
# define NMI_WATCHDOG_SYSCTL_PERM 0644
#else
# define NMI_WATCHDOG_SYSCTL_PERM 0444
#endif
#if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF) #if defined(CONFIG_HARDLOCKUP_DETECTOR_PERF)
extern void arch_touch_nmi_watchdog(void); extern void arch_touch_nmi_watchdog(void);
extern void hardlockup_detector_perf_stop(void);
extern void hardlockup_detector_perf_restart(void);
extern void hardlockup_detector_perf_disable(void);
extern void hardlockup_detector_perf_enable(void);
extern void hardlockup_detector_perf_cleanup(void);
extern int hardlockup_detector_perf_init(void);
#else #else
#if !defined(CONFIG_HAVE_NMI_WATCHDOG) static inline void hardlockup_detector_perf_stop(void) { }
static inline void hardlockup_detector_perf_restart(void) { }
static inline void hardlockup_detector_perf_disable(void) { }
static inline void hardlockup_detector_perf_enable(void) { }
static inline void hardlockup_detector_perf_cleanup(void) { }
# if !defined(CONFIG_HAVE_NMI_WATCHDOG)
static inline int hardlockup_detector_perf_init(void) { return -ENODEV; }
static inline void arch_touch_nmi_watchdog(void) {} static inline void arch_touch_nmi_watchdog(void) {}
# else
static inline int hardlockup_detector_perf_init(void) { return 0; }
# endif
#endif #endif
#endif
void watchdog_nmi_stop(void);
void watchdog_nmi_start(void);
int watchdog_nmi_probe(void);
/** /**
* touch_nmi_watchdog - restart NMI watchdog timeout. * touch_nmi_watchdog - restart NMI watchdog timeout.
* *
* If the architecture supports the NMI watchdog, touch_nmi_watchdog() * If the architecture supports the NMI watchdog, touch_nmi_watchdog()
* may be used to reset the timeout - for code which intentionally * may be used to reset the timeout - for code which intentionally
* disables interrupts for a long time. This call is stateless. * disables interrupts for a long time. This call is stateless.
...@@ -153,22 +185,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu) ...@@ -153,22 +185,6 @@ static inline bool trigger_single_cpu_backtrace(int cpu)
u64 hw_nmi_get_sample_period(int watchdog_thresh); u64 hw_nmi_get_sample_period(int watchdog_thresh);
#endif #endif
#ifdef CONFIG_LOCKUP_DETECTOR
extern int nmi_watchdog_enabled;
extern int watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
extern struct cpumask watchdog_cpumask;
extern unsigned long *watchdog_cpumask_bits;
extern int __read_mostly watchdog_suspended;
#ifdef CONFIG_SMP
extern int sysctl_softlockup_all_cpu_backtrace;
extern int sysctl_hardlockup_all_cpu_backtrace;
#else
#define sysctl_softlockup_all_cpu_backtrace 0
#define sysctl_hardlockup_all_cpu_backtrace 0
#endif
#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ #if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
defined(CONFIG_HARDLOCKUP_DETECTOR) defined(CONFIG_HARDLOCKUP_DETECTOR)
void watchdog_update_hrtimer_threshold(u64 period); void watchdog_update_hrtimer_threshold(u64 period);
...@@ -176,7 +192,6 @@ void watchdog_update_hrtimer_threshold(u64 period); ...@@ -176,7 +192,6 @@ void watchdog_update_hrtimer_threshold(u64 period);
static inline void watchdog_update_hrtimer_threshold(u64 period) { } static inline void watchdog_update_hrtimer_threshold(u64 period) { }
#endif #endif
extern bool is_hardlockup(void);
struct ctl_table; struct ctl_table;
extern int proc_watchdog(struct ctl_table *, int , extern int proc_watchdog(struct ctl_table *, int ,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
...@@ -188,18 +203,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int , ...@@ -188,18 +203,6 @@ extern int proc_watchdog_thresh(struct ctl_table *, int ,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
extern int proc_watchdog_cpumask(struct ctl_table *, int, extern int proc_watchdog_cpumask(struct ctl_table *, int,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
extern int lockup_detector_suspend(void);
extern void lockup_detector_resume(void);
#else
static inline int lockup_detector_suspend(void)
{
return 0;
}
static inline void lockup_detector_resume(void)
{
}
#endif
#ifdef CONFIG_HAVE_ACPI_APEI_NMI #ifdef CONFIG_HAVE_ACPI_APEI_NMI
#include <asm/nmi.h> #include <asm/nmi.h>
......
...@@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) ...@@ -55,7 +55,7 @@ smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread)
} }
void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread); void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread);
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *); const struct cpumask *);
#endif #endif
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/lockdep.h> #include <linux/lockdep.h>
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/irq.h> #include <linux/irq.h>
#include <linux/nmi.h>
#include <linux/smpboot.h> #include <linux/smpboot.h>
#include <linux/relay.h> #include <linux/relay.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, ...@@ -897,6 +898,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
out: out:
cpus_write_unlock(); cpus_write_unlock();
/*
* Do post unplug cleanup. This is still protected against
* concurrent CPU hotplug via cpu_add_remove_lock.
*/
lockup_detector_cleanup();
return ret; return ret;
} }
......
...@@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); ...@@ -344,39 +344,30 @@ EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
* by the client, but only by calling this function. * by the client, but only by calling this function.
* This function can only be called on a registered smp_hotplug_thread. * This function can only be called on a registered smp_hotplug_thread.
*/ */
int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, void smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread,
const struct cpumask *new) const struct cpumask *new)
{ {
struct cpumask *old = plug_thread->cpumask; struct cpumask *old = plug_thread->cpumask;
cpumask_var_t tmp; static struct cpumask tmp;
unsigned int cpu; unsigned int cpu;
if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) lockdep_assert_cpus_held();
return -ENOMEM;
get_online_cpus();
mutex_lock(&smpboot_threads_lock); mutex_lock(&smpboot_threads_lock);
/* Park threads that were exclusively enabled on the old mask. */ /* Park threads that were exclusively enabled on the old mask. */
cpumask_andnot(tmp, old, new); cpumask_andnot(&tmp, old, new);
for_each_cpu_and(cpu, tmp, cpu_online_mask) for_each_cpu_and(cpu, &tmp, cpu_online_mask)
smpboot_park_thread(plug_thread, cpu); smpboot_park_thread(plug_thread, cpu);
/* Unpark threads that are exclusively enabled on the new mask. */ /* Unpark threads that are exclusively enabled on the new mask. */
cpumask_andnot(tmp, new, old); cpumask_andnot(&tmp, new, old);
for_each_cpu_and(cpu, tmp, cpu_online_mask) for_each_cpu_and(cpu, &tmp, cpu_online_mask)
smpboot_unpark_thread(plug_thread, cpu); smpboot_unpark_thread(plug_thread, cpu);
cpumask_copy(old, new); cpumask_copy(old, new);
mutex_unlock(&smpboot_threads_lock); mutex_unlock(&smpboot_threads_lock);
put_online_cpus();
free_cpumask_var(tmp);
return 0;
} }
EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread);
static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
......
...@@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = { ...@@ -872,9 +872,9 @@ static struct ctl_table kern_table[] = {
#if defined(CONFIG_LOCKUP_DETECTOR) #if defined(CONFIG_LOCKUP_DETECTOR)
{ {
.procname = "watchdog", .procname = "watchdog",
.data = &watchdog_user_enabled, .data = &watchdog_user_enabled,
.maxlen = sizeof (int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_watchdog, .proc_handler = proc_watchdog,
.extra1 = &zero, .extra1 = &zero,
.extra2 = &one, .extra2 = &one,
...@@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = { ...@@ -890,16 +890,12 @@ static struct ctl_table kern_table[] = {
}, },
{ {
.procname = "nmi_watchdog", .procname = "nmi_watchdog",
.data = &nmi_watchdog_enabled, .data = &nmi_watchdog_user_enabled,
.maxlen = sizeof (int), .maxlen = sizeof(int),
.mode = 0644, .mode = NMI_WATCHDOG_SYSCTL_PERM,
.proc_handler = proc_nmi_watchdog, .proc_handler = proc_nmi_watchdog,
.extra1 = &zero, .extra1 = &zero,
#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
.extra2 = &one, .extra2 = &one,
#else
.extra2 = &zero,
#endif
}, },
{ {
.procname = "watchdog_cpumask", .procname = "watchdog_cpumask",
...@@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = { ...@@ -911,9 +907,9 @@ static struct ctl_table kern_table[] = {
#ifdef CONFIG_SOFTLOCKUP_DETECTOR #ifdef CONFIG_SOFTLOCKUP_DETECTOR
{ {
.procname = "soft_watchdog", .procname = "soft_watchdog",
.data = &soft_watchdog_enabled, .data = &soft_watchdog_user_enabled,
.maxlen = sizeof (int), .maxlen = sizeof(int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_soft_watchdog, .proc_handler = proc_soft_watchdog,
.extra1 = &zero, .extra1 = &zero,
.extra2 = &one, .extra2 = &one,
......
...@@ -29,20 +29,29 @@ ...@@ -29,20 +29,29 @@
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
#include <linux/kthread.h> #include <linux/kthread.h>
/* Watchdog configuration */ static DEFINE_MUTEX(watchdog_mutex);
static DEFINE_MUTEX(watchdog_proc_mutex);
int __read_mostly nmi_watchdog_enabled;
#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG) #if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED | # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED | NMI_WATCHDOG_ENABLED)
NMI_WATCHDOG_ENABLED; # define NMI_WATCHDOG_DEFAULT 1
#else #else
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED; # define WATCHDOG_DEFAULT (SOFT_WATCHDOG_ENABLED)
# define NMI_WATCHDOG_DEFAULT 0
#endif #endif
unsigned long __read_mostly watchdog_enabled;
int __read_mostly watchdog_user_enabled = 1;
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
int __read_mostly nmi_watchdog_available;
struct cpumask watchdog_allowed_mask __read_mostly;
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
#ifdef CONFIG_HARDLOCKUP_DETECTOR #ifdef CONFIG_HARDLOCKUP_DETECTOR
/* boot commands */
/* /*
* Should we panic when a soft-lockup or hard-lockup occurs: * Should we panic when a soft-lockup or hard-lockup occurs:
*/ */
...@@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic = ...@@ -56,9 +65,9 @@ unsigned int __read_mostly hardlockup_panic =
* kernel command line parameters are parsed, because otherwise it is not * kernel command line parameters are parsed, because otherwise it is not
* possible to override this in hardlockup_panic_setup(). * possible to override this in hardlockup_panic_setup().
*/ */
void hardlockup_detector_disable(void) void __init hardlockup_detector_disable(void)
{ {
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; nmi_watchdog_user_enabled = 0;
} }
static int __init hardlockup_panic_setup(char *str) static int __init hardlockup_panic_setup(char *str)
...@@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str) ...@@ -68,48 +77,24 @@ static int __init hardlockup_panic_setup(char *str)
else if (!strncmp(str, "nopanic", 7)) else if (!strncmp(str, "nopanic", 7))
hardlockup_panic = 0; hardlockup_panic = 0;
else if (!strncmp(str, "0", 1)) else if (!strncmp(str, "0", 1))
watchdog_enabled &= ~NMI_WATCHDOG_ENABLED; nmi_watchdog_user_enabled = 0;
else if (!strncmp(str, "1", 1)) else if (!strncmp(str, "1", 1))
watchdog_enabled |= NMI_WATCHDOG_ENABLED; nmi_watchdog_user_enabled = 1;
return 1; return 1;
} }
__setup("nmi_watchdog=", hardlockup_panic_setup); __setup("nmi_watchdog=", hardlockup_panic_setup);
#endif # ifdef CONFIG_SMP
#ifdef CONFIG_SOFTLOCKUP_DETECTOR
int __read_mostly soft_watchdog_enabled;
#endif
int __read_mostly watchdog_user_enabled;
int __read_mostly watchdog_thresh = 10;
#ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace; int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#endif
struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
/* static int __init hardlockup_all_cpu_backtrace_setup(char *str)
* The 'watchdog_running' variable is set to 1 when the watchdog threads {
* are registered/started and is set to 0 when the watchdog threads are sysctl_hardlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
* unregistered/stopped, so it is an indicator whether the threads exist. return 1;
*/ }
static int __read_mostly watchdog_running; __setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
/* # endif /* CONFIG_SMP */
* If a subsystem has a need to deactivate the watchdog temporarily, it #endif /* CONFIG_HARDLOCKUP_DETECTOR */
* can use the suspend/resume interface to achieve this. The content of
* the 'watchdog_suspended' variable reflects this state. Existing threads
* are parked/unparked by the lockup_detector_{suspend|resume} functions
* (see comment blocks pertaining to those functions for further details).
*
* 'watchdog_suspended' also prevents threads from being registered/started
* or unregistered/stopped via parameters in /proc/sys/kernel, so the state
* of 'watchdog_running' cannot change while the watchdog is deactivated
* temporarily (see related code in 'proc' handlers).
*/
int __read_mostly watchdog_suspended;
/* /*
* These functions can be overridden if an architecture implements its * These functions can be overridden if an architecture implements its
...@@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended; ...@@ -121,36 +106,68 @@ int __read_mostly watchdog_suspended;
*/ */
int __weak watchdog_nmi_enable(unsigned int cpu) int __weak watchdog_nmi_enable(unsigned int cpu)
{ {
hardlockup_detector_perf_enable();
return 0; return 0;
} }
void __weak watchdog_nmi_disable(unsigned int cpu) void __weak watchdog_nmi_disable(unsigned int cpu)
{ {
hardlockup_detector_perf_disable();
} }
/* /* Return 0, if a NMI watchdog is available. Error code otherwise */
* watchdog_nmi_reconfigure can be implemented to be notified after any int __weak __init watchdog_nmi_probe(void)
* watchdog configuration change. The arch hardlockup watchdog should {
* respond to the following variables: return hardlockup_detector_perf_init();
* - nmi_watchdog_enabled }
/**
* watchdog_nmi_stop - Stop the watchdog for reconfiguration
*
* The reconfiguration steps are:
* watchdog_nmi_stop();
* update_variables();
* watchdog_nmi_start();
*/
void __weak watchdog_nmi_stop(void) { }
/**
* watchdog_nmi_start - Start the watchdog after reconfiguration
*
* Counterpart to watchdog_nmi_stop().
*
* The following variables have been updated in update_variables() and
* contain the currently valid configuration:
* - watchdog_enabled
* - watchdog_thresh * - watchdog_thresh
* - watchdog_cpumask * - watchdog_cpumask
* - sysctl_hardlockup_all_cpu_backtrace
* - hardlockup_panic
* - watchdog_suspended
*/ */
void __weak watchdog_nmi_reconfigure(void) void __weak watchdog_nmi_start(void) { }
/**
* lockup_detector_update_enable - Update the sysctl enable bit
*
* Caller needs to make sure that the NMI/perf watchdogs are off, so this
* can't race with watchdog_nmi_disable().
*/
static void lockup_detector_update_enable(void)
{ {
watchdog_enabled = 0;
if (!watchdog_user_enabled)
return;
if (nmi_watchdog_available && nmi_watchdog_user_enabled)
watchdog_enabled |= NMI_WATCHDOG_ENABLED;
if (soft_watchdog_user_enabled)
watchdog_enabled |= SOFT_WATCHDOG_ENABLED;
} }
#ifdef CONFIG_SOFTLOCKUP_DETECTOR #ifdef CONFIG_SOFTLOCKUP_DETECTOR
/* Helper for online, unparked cpus. */ /* Global variables, exported for sysctl */
#define for_each_watchdog_cpu(cpu) \ unsigned int __read_mostly softlockup_panic =
for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask) CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
static bool softlockup_threads_initialized __read_mostly;
static u64 __read_mostly sample_period; static u64 __read_mostly sample_period;
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
...@@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved); ...@@ -164,50 +181,40 @@ static DEFINE_PER_CPU(struct task_struct *, softlockup_task_ptr_saved);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
static unsigned long soft_lockup_nmi_warn; static unsigned long soft_lockup_nmi_warn;
unsigned int __read_mostly softlockup_panic =
CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
static int __init softlockup_panic_setup(char *str) static int __init softlockup_panic_setup(char *str)
{ {
softlockup_panic = simple_strtoul(str, NULL, 0); softlockup_panic = simple_strtoul(str, NULL, 0);
return 1; return 1;
} }
__setup("softlockup_panic=", softlockup_panic_setup); __setup("softlockup_panic=", softlockup_panic_setup);
static int __init nowatchdog_setup(char *str) static int __init nowatchdog_setup(char *str)
{ {
watchdog_enabled = 0; watchdog_user_enabled = 0;
return 1; return 1;
} }
__setup("nowatchdog", nowatchdog_setup); __setup("nowatchdog", nowatchdog_setup);
static int __init nosoftlockup_setup(char *str) static int __init nosoftlockup_setup(char *str)
{ {
watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED; soft_watchdog_user_enabled = 0;
return 1; return 1;
} }
__setup("nosoftlockup", nosoftlockup_setup); __setup("nosoftlockup", nosoftlockup_setup);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
static int __init softlockup_all_cpu_backtrace_setup(char *str) static int __init softlockup_all_cpu_backtrace_setup(char *str)
{ {
sysctl_softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace = !!simple_strtol(str, NULL, 0);
!!simple_strtol(str, NULL, 0);
return 1; return 1;
} }
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_hardlockup_all_cpu_backtrace =
!!simple_strtol(str, NULL, 0);
return 1;
}
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif
#endif #endif
static void __lockup_detector_cleanup(void);
/* /*
* Hard-lockup warnings should be triggered after just a few seconds. Soft- * Hard-lockup warnings should be triggered after just a few seconds. Soft-
* lockups can have false positives under extreme conditions. So we generally * lockups can have false positives under extreme conditions. So we generally
...@@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void) ...@@ -278,11 +285,15 @@ void touch_all_softlockup_watchdogs(void)
int cpu; int cpu;
/* /*
* this is done lockless * watchdog_mutex cannpt be taken here, as this might be called
* do we care if a 0 races with a timestamp? * from (soft)interrupt context, so the access to
* all it means is the softlock check starts one cycle later * watchdog_allowed_cpumask might race with a concurrent update.
*
* The watchdog time stamp can race against a concurrent real
* update as well, the only side effect might be a cycle delay for
* the softlockup check.
*/ */
for_each_watchdog_cpu(cpu) for_each_cpu(cpu, &watchdog_allowed_mask)
per_cpu(watchdog_touch_ts, cpu) = 0; per_cpu(watchdog_touch_ts, cpu) = 0;
wq_watchdog_touch(-1); wq_watchdog_touch(-1);
} }
...@@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void) ...@@ -322,9 +333,6 @@ static void watchdog_interrupt_count(void)
__this_cpu_inc(hrtimer_interrupts); __this_cpu_inc(hrtimer_interrupts);
} }
static int watchdog_enable_all_cpus(void);
static void watchdog_disable_all_cpus(void);
/* watchdog kicker functions */ /* watchdog kicker functions */
static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{ {
...@@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) ...@@ -333,7 +341,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
int duration; int duration;
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
if (atomic_read(&watchdog_park_in_progress) != 0) if (!watchdog_enabled)
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
/* kick the hardlockup detector */ /* kick the hardlockup detector */
...@@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio) ...@@ -447,32 +455,38 @@ static void watchdog_set_prio(unsigned int policy, unsigned int prio)
static void watchdog_enable(unsigned int cpu) static void watchdog_enable(unsigned int cpu)
{ {
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
/* kick off the timer for the hardlockup detector */ /*
* Start the timer first to prevent the NMI watchdog triggering
* before the timer has a chance to fire.
*/
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn; hrtimer->function = watchdog_timer_fn;
/* Enable the perf event */
watchdog_nmi_enable(cpu);
/* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(sample_period), hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED); HRTIMER_MODE_REL_PINNED);
/* initialize timestamp */ /* Initialize timestamp */
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
__touch_watchdog(); __touch_watchdog();
/* Enable the perf event */
if (watchdog_enabled & NMI_WATCHDOG_ENABLED)
watchdog_nmi_enable(cpu);
watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);
} }
static void watchdog_disable(unsigned int cpu) static void watchdog_disable(unsigned int cpu)
{ {
struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer); struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
watchdog_set_prio(SCHED_NORMAL, 0); watchdog_set_prio(SCHED_NORMAL, 0);
hrtimer_cancel(hrtimer); /*
/* disable the perf event */ * Disable the perf event first. That prevents that a large delay
* between disabling the timer and disabling the perf event causes
* the perf NMI to detect a false positive.
*/
watchdog_nmi_disable(cpu); watchdog_nmi_disable(cpu);
hrtimer_cancel(hrtimer);
} }
static void watchdog_cleanup(unsigned int cpu, bool online) static void watchdog_cleanup(unsigned int cpu, bool online)
...@@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu) ...@@ -499,21 +513,6 @@ static void watchdog(unsigned int cpu)
__this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_write(soft_lockup_hrtimer_cnt,
__this_cpu_read(hrtimer_interrupts)); __this_cpu_read(hrtimer_interrupts));
__touch_watchdog(); __touch_watchdog();
/*
* watchdog_nmi_enable() clears the NMI_WATCHDOG_ENABLED bit in the
* failure path. Check for failures that can occur asynchronously -
* for example, when CPUs are on-lined - and shut down the hardware
* perf event on each CPU accordingly.
*
* The only non-obvious place this bit can be cleared is through
* watchdog_nmi_enable(), so a pr_info() is placed there. Placing a
* pr_info here would be too noisy as it would result in a message
* every few seconds if the hardlockup was disabled but the softlockup
* enabled.
*/
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
watchdog_nmi_disable(cpu);
} }
static struct smp_hotplug_thread watchdog_threads = { static struct smp_hotplug_thread watchdog_threads = {
...@@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = { ...@@ -527,295 +526,174 @@ static struct smp_hotplug_thread watchdog_threads = {
.unpark = watchdog_enable, .unpark = watchdog_enable,
}; };
/* static void softlockup_update_smpboot_threads(void)
* park all watchdog threads that are specified in 'watchdog_cpumask'
*
* This function returns an error if kthread_park() of a watchdog thread
* fails. In this situation, the watchdog threads of some CPUs can already
* be parked and the watchdog threads of other CPUs can still be runnable.
* Callers are expected to handle this special condition as appropriate in
* their context.
*
* This function may only be called in a context that is protected against
* races with CPU hotplug - for example, via get_online_cpus().
*/
static int watchdog_park_threads(void)
{ {
int cpu, ret = 0; lockdep_assert_held(&watchdog_mutex);
atomic_set(&watchdog_park_in_progress, 1); if (!softlockup_threads_initialized)
return;
for_each_watchdog_cpu(cpu) { smpboot_update_cpumask_percpu_thread(&watchdog_threads,
ret = kthread_park(per_cpu(softlockup_watchdog, cpu)); &watchdog_allowed_mask);
if (ret)
break;
}
atomic_set(&watchdog_park_in_progress, 0);
return ret;
} }
/* /* Temporarily park all watchdog threads */
* unpark all watchdog threads that are specified in 'watchdog_cpumask' static void softlockup_park_all_threads(void)
*
* This function may only be called in a context that is protected against
* races with CPU hotplug - for example, via get_online_cpus().
*/
static void watchdog_unpark_threads(void)
{ {
int cpu; cpumask_clear(&watchdog_allowed_mask);
softlockup_update_smpboot_threads();
for_each_watchdog_cpu(cpu)
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
} }
static int update_watchdog_all_cpus(void) /* Unpark enabled threads */
static void softlockup_unpark_threads(void)
{ {
int ret; cpumask_copy(&watchdog_allowed_mask, &watchdog_cpumask);
softlockup_update_smpboot_threads();
ret = watchdog_park_threads();
if (ret)
return ret;
watchdog_unpark_threads();
return 0;
} }
static int watchdog_enable_all_cpus(void) static void lockup_detector_reconfigure(void)
{ {
int err = 0; cpus_read_lock();
watchdog_nmi_stop();
if (!watchdog_running) { softlockup_park_all_threads();
err = smpboot_register_percpu_thread_cpumask(&watchdog_threads, set_sample_period();
&watchdog_cpumask); lockup_detector_update_enable();
if (err) if (watchdog_enabled && watchdog_thresh)
pr_err("Failed to create watchdog threads, disabled\n"); softlockup_unpark_threads();
else watchdog_nmi_start();
watchdog_running = 1; cpus_read_unlock();
} else { /*
/* * Must be called outside the cpus locked section to prevent
* Enable/disable the lockup detectors or * recursive locking in the perf code.
* change the sample period 'on the fly'. */
*/ __lockup_detector_cleanup();
err = update_watchdog_all_cpus();
if (err) {
watchdog_disable_all_cpus();
pr_err("Failed to update lockup detectors, disabled\n");
}
}
if (err)
watchdog_enabled = 0;
return err;
} }
static void watchdog_disable_all_cpus(void) /*
* Create the watchdog thread infrastructure and configure the detector(s).
*
* The threads are not unparked as watchdog_allowed_mask is empty. When
* the threads are sucessfully initialized, take the proper locks and
* unpark the threads in the watchdog_cpumask if the watchdog is enabled.
*/
static __init void lockup_detector_setup(void)
{ {
if (watchdog_running) { int ret;
watchdog_running = 0;
smpboot_unregister_percpu_thread(&watchdog_threads);
}
}
#ifdef CONFIG_SYSCTL /*
static int watchdog_update_cpus(void) * If sysctl is off and watchdog got disabled on the command line,
{ * nothing to do here.
return smpboot_update_cpumask_percpu_thread( */
&watchdog_threads, &watchdog_cpumask); lockup_detector_update_enable();
}
#endif
#else /* SOFTLOCKUP */ if (!IS_ENABLED(CONFIG_SYSCTL) &&
static int watchdog_park_threads(void) !(watchdog_enabled && watchdog_thresh))
{ return;
return 0;
}
static void watchdog_unpark_threads(void) ret = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
{ &watchdog_allowed_mask);
} if (ret) {
pr_err("Failed to initialize soft lockup detector threads\n");
return;
}
static int watchdog_enable_all_cpus(void) mutex_lock(&watchdog_mutex);
{ softlockup_threads_initialized = true;
return 0; lockup_detector_reconfigure();
mutex_unlock(&watchdog_mutex);
} }
static void watchdog_disable_all_cpus(void) #else /* CONFIG_SOFTLOCKUP_DETECTOR */
static inline int watchdog_park_threads(void) { return 0; }
static inline void watchdog_unpark_threads(void) { }
static inline int watchdog_enable_all_cpus(void) { return 0; }
static inline void watchdog_disable_all_cpus(void) { }
static void lockup_detector_reconfigure(void)
{ {
cpus_read_lock();
watchdog_nmi_stop();
lockup_detector_update_enable();
watchdog_nmi_start();
cpus_read_unlock();
} }
static inline void lockup_detector_setup(void)
#ifdef CONFIG_SYSCTL
static int watchdog_update_cpus(void)
{ {
return 0; lockup_detector_reconfigure();
} }
#endif #endif /* !CONFIG_SOFTLOCKUP_DETECTOR */
static void set_sample_period(void) static void __lockup_detector_cleanup(void)
{ {
lockdep_assert_held(&watchdog_mutex);
hardlockup_detector_perf_cleanup();
} }
#endif /* SOFTLOCKUP */
/* /**
* Suspend the hard and soft lockup detector by parking the watchdog threads. * lockup_detector_cleanup - Cleanup after cpu hotplug or sysctl changes
*
* Caller must not hold the cpu hotplug rwsem.
*/ */
int lockup_detector_suspend(void) void lockup_detector_cleanup(void)
{ {
int ret = 0; mutex_lock(&watchdog_mutex);
__lockup_detector_cleanup();
get_online_cpus(); mutex_unlock(&watchdog_mutex);
mutex_lock(&watchdog_proc_mutex);
/*
* Multiple suspend requests can be active in parallel (counted by
* the 'watchdog_suspended' variable). If the watchdog threads are
* running, the first caller takes care that they will be parked.
* The state of 'watchdog_running' cannot change while a suspend
* request is active (see related code in 'proc' handlers).
*/
if (watchdog_running && !watchdog_suspended)
ret = watchdog_park_threads();
if (ret == 0)
watchdog_suspended++;
else {
watchdog_disable_all_cpus();
pr_err("Failed to suspend lockup detectors, disabled\n");
watchdog_enabled = 0;
}
watchdog_nmi_reconfigure();
mutex_unlock(&watchdog_proc_mutex);
return ret;
} }
/* /**
* Resume the hard and soft lockup detector by unparking the watchdog threads. * lockup_detector_soft_poweroff - Interface to stop lockup detector(s)
*
* Special interface for parisc. It prevents lockup detector warnings from
* the default pm_poweroff() function which busy loops forever.
*/ */
void lockup_detector_resume(void) void lockup_detector_soft_poweroff(void)
{ {
mutex_lock(&watchdog_proc_mutex); watchdog_enabled = 0;
watchdog_suspended--;
/*
* The watchdog threads are unparked if they were previously running
* and if there is no more active suspend request.
*/
if (watchdog_running && !watchdog_suspended)
watchdog_unpark_threads();
watchdog_nmi_reconfigure();
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
} }
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
/* /* Propagate any changes to the watchdog threads */
* Update the run state of the lockup detectors. static void proc_watchdog_update(void)
*/
static int proc_watchdog_update(void)
{ {
int err = 0; /* Remove impossible cpus to keep sysctl output clean. */
cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
/* lockup_detector_reconfigure();
* Watchdog threads won't be started if they are already active.
* The 'watchdog_running' variable in watchdog_*_all_cpus() takes
* care of this. If those threads are already active, the sample
* period will be updated and the lockup detectors will be enabled
* or disabled 'on the fly'.
*/
if (watchdog_enabled && watchdog_thresh)
err = watchdog_enable_all_cpus();
else
watchdog_disable_all_cpus();
watchdog_nmi_reconfigure();
return err;
} }
/* /*
* common function for watchdog, nmi_watchdog and soft_watchdog parameter * common function for watchdog, nmi_watchdog and soft_watchdog parameter
* *
* caller | table->data points to | 'which' contains the flag(s) * caller | table->data points to | 'which'
* -------------------|-----------------------|----------------------------- * -------------------|----------------------------|--------------------------
* proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED or'ed * proc_watchdog | watchdog_user_enabled | NMI_WATCHDOG_ENABLED |
* | | with SOFT_WATCHDOG_ENABLED * | | SOFT_WATCHDOG_ENABLED
* -------------------|-----------------------|----------------------------- * -------------------|----------------------------|--------------------------
* proc_nmi_watchdog | nmi_watchdog_enabled | NMI_WATCHDOG_ENABLED * proc_nmi_watchdog | nmi_watchdog_user_enabled | NMI_WATCHDOG_ENABLED
* -------------------|-----------------------|----------------------------- * -------------------|----------------------------|--------------------------
* proc_soft_watchdog | soft_watchdog_enabled | SOFT_WATCHDOG_ENABLED * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
*/ */
static int proc_watchdog_common(int which, struct ctl_table *table, int write, static int proc_watchdog_common(int which, struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos) void __user *buffer, size_t *lenp, loff_t *ppos)
{ {
int err, old, new; int err, old, *param = table->data;
int *watchdog_param = (int *)table->data;
get_online_cpus(); mutex_lock(&watchdog_mutex);
mutex_lock(&watchdog_proc_mutex);
if (watchdog_suspended) {
/* no parameter changes allowed while watchdog is suspended */
err = -EAGAIN;
goto out;
}
/*
* If the parameter is being read return the state of the corresponding
* bit(s) in 'watchdog_enabled', else update 'watchdog_enabled' and the
* run state of the lockup detectors.
*/
if (!write) { if (!write) {
*watchdog_param = (watchdog_enabled & which) != 0; /*
* On read synchronize the userspace interface. This is a
* racy snapshot.
*/
*param = (watchdog_enabled & which) != 0;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
} else { } else {
old = READ_ONCE(*param);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err) if (!err && old != READ_ONCE(*param))
goto out; proc_watchdog_update();
/*
* There is a race window between fetching the current value
* from 'watchdog_enabled' and storing the new value. During
* this race window, watchdog_nmi_enable() can sneak in and
* clear the NMI_WATCHDOG_ENABLED bit in 'watchdog_enabled'.
* The 'cmpxchg' detects this race and the loop retries.
*/
do {
old = watchdog_enabled;
/*
* If the parameter value is not zero set the
* corresponding bit(s), else clear it(them).
*/
if (*watchdog_param)
new = old | which;
else
new = old & ~which;
} while (cmpxchg(&watchdog_enabled, old, new) != old);
/*
* Update the run state of the lockup detectors. There is _no_
* need to check the value returned by proc_watchdog_update()
* and to restore the previous value of 'watchdog_enabled' as
* both lockup detectors are disabled if proc_watchdog_update()
* returns an error.
*/
if (old == new)
goto out;
err = proc_watchdog_update();
} }
out: mutex_unlock(&watchdog_mutex);
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
return err; return err;
} }
...@@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write, ...@@ -835,6 +713,8 @@ int proc_watchdog(struct ctl_table *table, int write,
int proc_nmi_watchdog(struct ctl_table *table, int write, int proc_nmi_watchdog(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos) void __user *buffer, size_t *lenp, loff_t *ppos)
{ {
if (!nmi_watchdog_available && write)
return -ENOTSUPP;
return proc_watchdog_common(NMI_WATCHDOG_ENABLED, return proc_watchdog_common(NMI_WATCHDOG_ENABLED,
table, write, buffer, lenp, ppos); table, write, buffer, lenp, ppos);
} }
...@@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write, ...@@ -855,39 +735,17 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
int proc_watchdog_thresh(struct ctl_table *table, int write, int proc_watchdog_thresh(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos) void __user *buffer, size_t *lenp, loff_t *ppos)
{ {
int err, old, new; int err, old;
get_online_cpus();
mutex_lock(&watchdog_proc_mutex);
if (watchdog_suspended) { mutex_lock(&watchdog_mutex);
/* no parameter changes allowed while watchdog is suspended */
err = -EAGAIN;
goto out;
}
old = ACCESS_ONCE(watchdog_thresh); old = READ_ONCE(watchdog_thresh);
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err || !write) if (!err && write && old != READ_ONCE(watchdog_thresh))
goto out; proc_watchdog_update();
/*
* Update the sample period. Restore on failure.
*/
new = ACCESS_ONCE(watchdog_thresh);
if (old == new)
goto out;
set_sample_period(); mutex_unlock(&watchdog_mutex);
err = proc_watchdog_update();
if (err) {
watchdog_thresh = old;
set_sample_period();
}
out:
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
return err; return err;
} }
...@@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, ...@@ -902,45 +760,19 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
{ {
int err; int err;
get_online_cpus(); mutex_lock(&watchdog_mutex);
mutex_lock(&watchdog_proc_mutex);
if (watchdog_suspended) {
/* no parameter changes allowed while watchdog is suspended */
err = -EAGAIN;
goto out;
}
err = proc_do_large_bitmap(table, write, buffer, lenp, ppos); err = proc_do_large_bitmap(table, write, buffer, lenp, ppos);
if (!err && write) { if (!err && write)
/* Remove impossible cpus to keep sysctl output cleaner. */ proc_watchdog_update();
cpumask_and(&watchdog_cpumask, &watchdog_cpumask,
cpu_possible_mask);
if (watchdog_running) {
/*
* Failure would be due to being unable to allocate
* a temporary cpumask, so we are likely not in a
* position to do much else to make things better.
*/
if (watchdog_update_cpus() != 0)
pr_err("cpumask update failed\n");
}
watchdog_nmi_reconfigure(); mutex_unlock(&watchdog_mutex);
}
out:
mutex_unlock(&watchdog_proc_mutex);
put_online_cpus();
return err; return err;
} }
#endif /* CONFIG_SYSCTL */ #endif /* CONFIG_SYSCTL */
void __init lockup_detector_init(void) void __init lockup_detector_init(void)
{ {
set_sample_period();
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_enabled()) { if (tick_nohz_full_enabled()) {
pr_info("Disabling watchdog on nohz_full cores by default\n"); pr_info("Disabling watchdog on nohz_full cores by default\n");
...@@ -951,6 +783,7 @@ void __init lockup_detector_init(void) ...@@ -951,6 +783,7 @@ void __init lockup_detector_init(void)
cpumask_copy(&watchdog_cpumask, cpu_possible_mask); cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
#endif #endif
if (watchdog_enabled) if (!watchdog_nmi_probe())
watchdog_enable_all_cpus(); nmi_watchdog_available = true;
lockup_detector_setup();
} }
...@@ -21,8 +21,10 @@ ...@@ -21,8 +21,10 @@
static DEFINE_PER_CPU(bool, hard_watchdog_warn); static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
static struct cpumask dead_events_mask;
static unsigned long hardlockup_allcpu_dumped; static unsigned long hardlockup_allcpu_dumped;
static unsigned int watchdog_cpus;
void arch_touch_nmi_watchdog(void) void arch_touch_nmi_watchdog(void)
{ {
...@@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = { ...@@ -103,15 +105,12 @@ static struct perf_event_attr wd_hw_attr = {
/* Callback function for perf event subsystem */ /* Callback function for perf event subsystem */
static void watchdog_overflow_callback(struct perf_event *event, static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data, struct perf_sample_data *data,
struct pt_regs *regs) struct pt_regs *regs)
{ {
/* Ensure the watchdog never gets throttled */ /* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0; event->hw.interrupts = 0;
if (atomic_read(&watchdog_park_in_progress) != 0)
return;
if (__this_cpu_read(watchdog_nmi_touch) == true) { if (__this_cpu_read(watchdog_nmi_touch) == true) {
__this_cpu_write(watchdog_nmi_touch, false); __this_cpu_write(watchdog_nmi_touch, false);
return; return;
...@@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event, ...@@ -160,104 +159,131 @@ static void watchdog_overflow_callback(struct perf_event *event,
return; return;
} }
/* static int hardlockup_detector_event_create(void)
* People like the simple clean cpu node info on boot.
* Reduce the watchdog noise by only printing messages
* that are different from what cpu0 displayed.
*/
static unsigned long firstcpu_err;
static atomic_t watchdog_cpus;
int watchdog_nmi_enable(unsigned int cpu)
{ {
unsigned int cpu = smp_processor_id();
struct perf_event_attr *wd_attr; struct perf_event_attr *wd_attr;
struct perf_event *event = per_cpu(watchdog_ev, cpu); struct perf_event *evt;
int firstcpu = 0;
/* nothing to do if the hard lockup detector is disabled */
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
goto out;
/* is it already setup and enabled? */
if (event && event->state > PERF_EVENT_STATE_OFF)
goto out;
/* it is setup but not enabled */
if (event != NULL)
goto out_enable;
if (atomic_inc_return(&watchdog_cpus) == 1)
firstcpu = 1;
wd_attr = &wd_hw_attr; wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
/* Try to register using hardware perf events */ /* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL,
watchdog_overflow_callback, NULL);
if (IS_ERR(evt)) {
pr_info("Perf event create on CPU %d failed with %ld\n", cpu,
PTR_ERR(evt));
return PTR_ERR(evt);
}
this_cpu_write(watchdog_ev, evt);
return 0;
}
/* save the first cpu's error for future comparision */ /**
if (firstcpu && IS_ERR(event)) * hardlockup_detector_perf_enable - Enable the local event
firstcpu_err = PTR_ERR(event); */
void hardlockup_detector_perf_enable(void)
{
if (hardlockup_detector_event_create())
return;
if (!IS_ERR(event)) { if (!watchdog_cpus++)
/* only print for the first cpu initialized */ pr_info("Enabled. Permanently consumes one hw-PMU counter.\n");
if (firstcpu || firstcpu_err)
pr_info("enabled on all CPUs, permanently consumes one hw-PMU counter.\n");
goto out_save;
}
/* perf_event_enable(this_cpu_read(watchdog_ev));
* Disable the hard lockup detector if _any_ CPU fails to set up
* set up the hardware perf event. The watchdog() function checks
* the NMI_WATCHDOG_ENABLED bit periodically.
*
* The barriers are for syncing up watchdog_enabled across all the
* cpus, as clear_bit() does not use barriers.
*/
smp_mb__before_atomic();
clear_bit(NMI_WATCHDOG_ENABLED_BIT, &watchdog_enabled);
smp_mb__after_atomic();
/* skip displaying the same error again */
if (!firstcpu && (PTR_ERR(event) == firstcpu_err))
return PTR_ERR(event);
/* vary the KERN level based on the returned errno */
if (PTR_ERR(event) == -EOPNOTSUPP)
pr_info("disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
else if (PTR_ERR(event) == -ENOENT)
pr_warn("disabled (cpu%i): hardware events not enabled\n",
cpu);
else
pr_err("disabled (cpu%i): unable to create perf event: %ld\n",
cpu, PTR_ERR(event));
pr_info("Shutting down hard lockup detector on all cpus\n");
return PTR_ERR(event);
/* success path */
out_save:
per_cpu(watchdog_ev, cpu) = event;
out_enable:
perf_event_enable(per_cpu(watchdog_ev, cpu));
out:
return 0;
} }
void watchdog_nmi_disable(unsigned int cpu) /**
* hardlockup_detector_perf_disable - Disable the local event
*/
void hardlockup_detector_perf_disable(void)
{ {
struct perf_event *event = per_cpu(watchdog_ev, cpu); struct perf_event *event = this_cpu_read(watchdog_ev);
if (event) { if (event) {
perf_event_disable(event); perf_event_disable(event);
cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
watchdog_cpus--;
}
}
/**
* hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
*
* Called from lockup_detector_cleanup(). Serialized by the caller.
*/
void hardlockup_detector_perf_cleanup(void)
{
int cpu;
for_each_cpu(cpu, &dead_events_mask) {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
/*
* Required because for_each_cpu() reports unconditionally
* CPU0 as set on UP kernels. Sigh.
*/
if (event)
perf_event_release_kernel(event);
per_cpu(watchdog_ev, cpu) = NULL; per_cpu(watchdog_ev, cpu) = NULL;
}
cpumask_clear(&dead_events_mask);
}
/**
* hardlockup_detector_perf_stop - Globally stop watchdog events
*
* Special interface for x86 to handle the perf HT bug.
*/
void __init hardlockup_detector_perf_stop(void)
{
int cpu;
lockdep_assert_cpus_held();
for_each_online_cpu(cpu) {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event)
perf_event_disable(event);
}
}
/* should be in cleanup, but blocks oprofile */ /**
perf_event_release_kernel(event); * hardlockup_detector_perf_restart - Globally restart watchdog events
*
* Special interface for x86 to handle the perf HT bug.
*/
void __init hardlockup_detector_perf_restart(void)
{
int cpu;
lockdep_assert_cpus_held();
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
return;
for_each_online_cpu(cpu) {
struct perf_event *event = per_cpu(watchdog_ev, cpu);
if (event)
perf_event_enable(event);
}
}
/**
* hardlockup_detector_perf_init - Probe whether NMI event is available at all
*/
int __init hardlockup_detector_perf_init(void)
{
int ret = hardlockup_detector_event_create();
/* watchdog_nmi_enable() expects this to be zero initially. */ if (ret) {
if (atomic_dec_and_test(&watchdog_cpus)) pr_info("Perf NMI watchdog permanently disabled\n");
firstcpu_err = 0; } else {
perf_event_release_kernel(this_cpu_read(watchdog_ev));
this_cpu_write(watchdog_ev, NULL);
} }
return ret;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment