Commit 1fcaa5db authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'thermal-6.11-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull thermal control fix from Rafael Wysocki:
 "Prevent the thermal core from flooding the kernel log with useless
  messages if thermal zone temperature can never be determined (or its
  sensor has failed permanently) and make it finally give up and disable
  defective thermal zones (Rafael Wysocki)"

* tag 'thermal-6.11-rc1-3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  thermal: core: Back off when polling thermal zones on errors
  thermal: trip: Split thermal_zone_device_set_mode()
parents 7b0acd91 f7c1b0e4
......@@ -272,6 +272,44 @@ static int __init thermal_register_governors(void)
return ret;
}
static int __thermal_zone_device_set_mode(struct thermal_zone_device *tz,
enum thermal_device_mode mode)
{
if (tz->ops.change_mode) {
int ret;
ret = tz->ops.change_mode(tz, mode);
if (ret)
return ret;
}
tz->mode = mode;
return 0;
}
static void thermal_zone_broken_disable(struct thermal_zone_device *tz)
{
struct thermal_trip_desc *td;
dev_err(&tz->device, "Unable to get temperature, disabling!\n");
/*
* This function only runs for enabled thermal zones, so no need to
* check for the current mode.
*/
__thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED);
thermal_notify_tz_disable(tz);
for_each_trip_desc(tz, td) {
if (td->trip.type == THERMAL_TRIP_CRITICAL &&
td->trip.temperature > THERMAL_TEMP_INVALID) {
dev_crit(&tz->device,
"Disabled thermal zone with critical trip point\n");
return;
}
}
}
/*
* Zone update section: main control loop applied to each zone while monitoring
* in polling mode. The monitoring is done using a workqueue.
......@@ -292,6 +330,34 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
cancel_delayed_work(&tz->poll_queue);
}
static void thermal_zone_recheck(struct thermal_zone_device *tz, int error)
{
if (error == -EAGAIN) {
thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY);
return;
}
/*
* Print the message once to reduce log noise. It will be followed by
* another one if the temperature cannot be determined after multiple
* attempts.
*/
if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY)
dev_info(&tz->device, "Temperature check failed (%d)\n", error);
thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies);
tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL);
if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) {
thermal_zone_broken_disable(tz);
/*
* Restore the original recheck delay value to allow the thermal
* zone to try to recover when it is reenabled by user space.
*/
tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
}
}
static void monitor_thermal_zone(struct thermal_zone_device *tz)
{
if (tz->mode != THERMAL_DEVICE_ENABLED)
......@@ -491,10 +557,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
ret = __thermal_zone_get_temp(tz, &temp);
if (ret) {
if (ret != -EAGAIN)
dev_info(&tz->device, "Temperature check failed (%d)\n", ret);
thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS));
thermal_zone_recheck(tz, ret);
return;
} else if (temp <= THERMAL_TEMP_INVALID) {
/*
......@@ -506,6 +569,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
goto monitor;
}
tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
tz->last_temperature = tz->temperature;
tz->temperature = temp;
......@@ -540,7 +605,7 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
enum thermal_device_mode mode)
{
int ret = 0;
int ret;
mutex_lock(&tz->lock);
......@@ -548,14 +613,15 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
if (mode == tz->mode) {
mutex_unlock(&tz->lock);
return ret;
return 0;
}
if (tz->ops.change_mode)
ret = tz->ops.change_mode(tz, mode);
ret = __thermal_zone_device_set_mode(tz, mode);
if (ret) {
mutex_unlock(&tz->lock);
if (!ret)
tz->mode = mode;
return ret;
}
__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
......@@ -566,7 +632,7 @@ static int thermal_zone_device_set_mode(struct thermal_zone_device *tz,
else
thermal_notify_tz_disable(tz);
return ret;
return 0;
}
int thermal_zone_device_enable(struct thermal_zone_device *tz)
......@@ -1445,6 +1511,7 @@ thermal_zone_device_register_with_trips(const char *type,
thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay);
thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay);
tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
/* sys I/F */
/* Add nodes that are always present via .groups */
......
......@@ -67,6 +67,8 @@ struct thermal_governor {
* @polling_delay_jiffies: number of jiffies to wait between polls when
* checking whether trip points have been crossed (0 for
* interrupt driven systems)
* @recheck_delay_jiffies: delay after a failed attempt to determine the zone
* temperature before trying again
* @temperature: current temperature. This is only for core code,
* drivers should use thermal_zone_get_temp() to get the
* current temperature
......@@ -108,6 +110,7 @@ struct thermal_zone_device {
int num_trips;
unsigned long passive_delay_jiffies;
unsigned long polling_delay_jiffies;
unsigned long recheck_delay_jiffies;
int temperature;
int last_temperature;
int emul_temperature;
......@@ -137,10 +140,11 @@ struct thermal_zone_device {
#define THERMAL_TEMP_INIT INT_MIN
/*
* Default delay after a failing thermal zone temperature check before
* attempting to check it again.
* Default and maximum delay after a failed thermal zone temperature check
* before attempting to check it again (in jiffies).
*/
#define THERMAL_RECHECK_DELAY_MS 250
#define THERMAL_RECHECK_DELAY msecs_to_jiffies(250)
#define THERMAL_MAX_RECHECK_DELAY (120 * HZ)
/* Default Thermal Governor */
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment