Commit 70fea60d authored by Benoit Goby's avatar Benoit Goby Committed by Rafael J. Wysocki

PM / Sleep: Detect device suspend/resume lockup and log event

Rather than hard-lock the kernel, dump the suspend/resume thread stack
and panic() to capture a message in pstore when a driver takes too long
to suspend/resume. Default suspend/resume watchdog timeout is set to 12
seconds to be longer than the usbhid 10 second timeout, but could be
changed at compile time.

Exclude from the watchdog the time spent waiting for children that
are resumed asynchronously and time every device, whether or not they
resumed synchronously.

This patch is targeted for mobile devices where a suspend/resume lockup
could cause a system reboot. Information about failing device can be
retrieved in subsequent boot session by mounting pstore and inspecting
the log. Laptops with EFI-enabled pstore could also benefit from
this feature.

The hardware watchdog timer is likely suspended during this time and
couldn't be relied upon. The soft-lockup detector would eventually tell
that tasks are not scheduled, but would provide little context as to why.
The patch hence uses system timer and assumes it is still active while the
devices are suspended/resumed.

This feature can be enabled/disabled during kernel configuration.

This change is based on earlier work by San Mehat.
Signed-off-by: default avatarBenoit Goby <benoit@android.com>
Signed-off-by: default avatarZoran Markovic <zoran.markovic@linaro.org>
Acked-by: default avatarUlf Hansson <ulf.hansson@linaro.org>
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
parent 61e6cfa8
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include <linux/suspend.h> #include <linux/suspend.h>
#include <trace/events/power.h> #include <trace/events/power.h>
#include <linux/cpuidle.h> #include <linux/cpuidle.h>
#include <linux/timer.h>
#include "../base.h" #include "../base.h"
#include "power.h" #include "power.h"
...@@ -390,6 +392,71 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev, ...@@ -390,6 +392,71 @@ static int dpm_run_callback(pm_callback_t cb, struct device *dev,
return error; return error;
} }
#ifdef CONFIG_DPM_WATCHDOG
struct dpm_watchdog {
struct device *dev;
struct task_struct *tsk;
struct timer_list timer;
};
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd) \
struct dpm_watchdog wd
/**
* dpm_watchdog_handler - Driver suspend / resume watchdog handler.
* @data: Watchdog object address.
*
* Called when a driver has timed out suspending or resuming.
* There's not much we can do here to recover so panic() to
* capture a crash-dump in pstore.
*/
static void dpm_watchdog_handler(unsigned long data)
{
struct dpm_watchdog *wd = (void *)data;
dev_emerg(wd->dev, "**** DPM device timeout ****\n");
show_stack(wd->tsk, NULL);
panic("%s %s: unrecoverable failure\n",
dev_driver_string(wd->dev), dev_name(wd->dev));
}
/**
* dpm_watchdog_set - Enable pm watchdog for given device.
* @wd: Watchdog. Must be allocated on the stack.
* @dev: Device to handle.
*/
static void dpm_watchdog_set(struct dpm_watchdog *wd, struct device *dev)
{
struct timer_list *timer = &wd->timer;
wd->dev = dev;
wd->tsk = current;
init_timer_on_stack(timer);
/* use same timeout value for both suspend and resume */
timer->expires = jiffies + HZ * CONFIG_DPM_WATCHDOG_TIMEOUT;
timer->function = dpm_watchdog_handler;
timer->data = (unsigned long)wd;
add_timer(timer);
}
/**
* dpm_watchdog_clear - Disable suspend/resume watchdog.
* @wd: Watchdog to disable.
*/
static void dpm_watchdog_clear(struct dpm_watchdog *wd)
{
struct timer_list *timer = &wd->timer;
del_timer_sync(timer);
destroy_timer_on_stack(timer);
}
#else
#define DECLARE_DPM_WATCHDOG_ON_STACK(wd)
#define dpm_watchdog_set(x, y)
#define dpm_watchdog_clear(x)
#endif
/*------------------------- Resume routines -------------------------*/ /*------------------------- Resume routines -------------------------*/
/** /**
...@@ -576,6 +643,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) ...@@ -576,6 +643,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
pm_callback_t callback = NULL; pm_callback_t callback = NULL;
char *info = NULL; char *info = NULL;
int error = 0; int error = 0;
DECLARE_DPM_WATCHDOG_ON_STACK(wd);
TRACE_DEVICE(dev); TRACE_DEVICE(dev);
TRACE_RESUME(0); TRACE_RESUME(0);
...@@ -584,6 +652,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) ...@@ -584,6 +652,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
goto Complete; goto Complete;
dpm_wait(dev->parent, async); dpm_wait(dev->parent, async);
dpm_watchdog_set(&wd, dev);
device_lock(dev); device_lock(dev);
/* /*
...@@ -642,6 +711,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) ...@@ -642,6 +711,7 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
Unlock: Unlock:
device_unlock(dev); device_unlock(dev);
dpm_watchdog_clear(&wd);
Complete: Complete:
complete_all(&dev->power.completion); complete_all(&dev->power.completion);
...@@ -1060,6 +1130,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) ...@@ -1060,6 +1130,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
pm_callback_t callback = NULL; pm_callback_t callback = NULL;
char *info = NULL; char *info = NULL;
int error = 0; int error = 0;
DECLARE_DPM_WATCHDOG_ON_STACK(wd);
dpm_wait_for_children(dev, async); dpm_wait_for_children(dev, async);
...@@ -1083,6 +1154,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) ...@@ -1083,6 +1154,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
if (dev->power.syscore) if (dev->power.syscore)
goto Complete; goto Complete;
dpm_watchdog_set(&wd, dev);
device_lock(dev); device_lock(dev);
if (dev->pm_domain) { if (dev->pm_domain) {
...@@ -1139,6 +1211,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) ...@@ -1139,6 +1211,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
} }
device_unlock(dev); device_unlock(dev);
dpm_watchdog_clear(&wd);
Complete: Complete:
complete_all(&dev->power.completion); complete_all(&dev->power.completion);
......
...@@ -178,6 +178,22 @@ config PM_SLEEP_DEBUG ...@@ -178,6 +178,22 @@ config PM_SLEEP_DEBUG
def_bool y def_bool y
depends on PM_DEBUG && PM_SLEEP depends on PM_DEBUG && PM_SLEEP
config DPM_WATCHDOG
bool "Device suspend/resume watchdog"
depends on PM_DEBUG && PSTORE
---help---
Sets up a watchdog timer to capture drivers that are
locked up attempting to suspend/resume a device.
A detected lockup causes system panic with message
captured in pstore device for inspection in subsequent
boot session.
config DPM_WATCHDOG_TIMEOUT
int "Watchdog timeout in seconds"
range 1 120
default 12
depends on DPM_WATCHDOG
config PM_TRACE config PM_TRACE
bool bool
help help
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment