Commit faf9b616 authored by Hans de Goede's avatar Hans de Goede Committed by Greg Kroah-Hartman

[PATCH] hwmon: abituguru timeout fixes

This patch contains 2 sets of fixes for the abituguru:
 1) Much improved timeout handling, drasticly reducing the amount of
    timeout errors on some motherboards
 2) Fix the exit paths in the bank1 sensor type detect code to always
    restore the original settings even on an error. Without this our
    special test settings could remain seriously confusing the system
    BIOS's setup menu.

Both are very much related and are must haves, to avoid messing up the
uguru CMOS settings.

Detailed changes:
- Much improved timeout / wait for status handling. Many thanks to Sunil
  Kumar, for all his testing, ideas and patches! The code now first busy
  waits, polling the uguru for the expected status as this usually
  succeeds pretty quickly (within 90 reads). To avoid unnecessary CPU burn
  in timeout conditions, the amount of busy waiting has been halved from
  previous versions (120 tries instead of 250). This is not a problem,
  because this version goes to sleep after 120 attemps for 1 jiffy and
  then tries again, it does this sleep and try again 5 times before
  finally giving up. This (almost?) completly removes the timeout errors
  some people have seen regulary. Apparently some older uguru versions
  sometimes are distracted for a (relatively) long time. This solves this.
- These timeout errors not only occur in the sending address part of
  reading the uguru but also in the wait for read state, so errors in
  this state are now handled as retryable just like send address state
  errors and are only logged and reported to userspace if 3 executive
  tries fail.
- Fix a very nasty bug in the bank1 sensor type detection code, where it
  would not restore the original settings in any of the error paths!
- Since not successfully restoring the original settings can seriously
  confuse the system BIOS (hang when entering the relevant setup menu),
  we now try restoring them 3 times before giving up.
Signed-off-by: default avatarHans de Goede <j.w.r.degoede@hhs.nl>
Signed-off-by: default avatarJean Delvare <khali@linux-fr.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@suse.de>
parent 4801bc25
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/delay.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/hwmon.h> #include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h> #include <linux/hwmon-sysfs.h>
...@@ -64,17 +65,17 @@ ...@@ -64,17 +65,17 @@
#define ABIT_UGURU_IN_SENSOR 0 #define ABIT_UGURU_IN_SENSOR 0
#define ABIT_UGURU_TEMP_SENSOR 1 #define ABIT_UGURU_TEMP_SENSOR 1
#define ABIT_UGURU_NC 2 #define ABIT_UGURU_NC 2
/* Timeouts / Retries, if these turn out to need a lot of fiddling we could /* In many cases we need to wait for the uGuru to reach a certain status, most
convert them to params. */ of the time it will reach this status within 30 - 90 ISA reads, and thus we
/* 250 was determined by trial and error, 200 works most of the time, but not can best busy wait. This define gives the total amount of reads to try. */
always. I assume this is cpu-speed independent, since the ISA-bus and not #define ABIT_UGURU_WAIT_TIMEOUT 125
the CPU should be the bottleneck. Note that 250 sometimes is still not /* However sometimes older versions of the uGuru seem to be distracted and they
enough (only reported on AN7 mb) this is handled by a higher layer. */ do not respond for a long time. To handle this we sleep before each of the
#define ABIT_UGURU_WAIT_TIMEOUT 250 last ABIT_UGURU_WAIT_TIMEOUT_SLEEP tries. */
#define ABIT_UGURU_WAIT_TIMEOUT_SLEEP 5
/* Normally all expected status in abituguru_ready, are reported after the /* Normally all expected status in abituguru_ready, are reported after the
first read, but sometimes not and we need to poll, 5 polls was not enough first read, but sometimes not and we need to poll. */
50 sofar is. */ #define ABIT_UGURU_READY_TIMEOUT 5
#define ABIT_UGURU_READY_TIMEOUT 50
/* Maximum 3 retries on timedout reads/writes, delay 200 ms before retrying */ /* Maximum 3 retries on timedout reads/writes, delay 200 ms before retrying */
#define ABIT_UGURU_MAX_RETRIES 3 #define ABIT_UGURU_MAX_RETRIES 3
#define ABIT_UGURU_RETRY_DELAY (HZ/5) #define ABIT_UGURU_RETRY_DELAY (HZ/5)
...@@ -226,6 +227,10 @@ static int abituguru_wait(struct abituguru_data *data, u8 state) ...@@ -226,6 +227,10 @@ static int abituguru_wait(struct abituguru_data *data, u8 state)
timeout--; timeout--;
if (timeout == 0) if (timeout == 0)
return -EBUSY; return -EBUSY;
/* sleep a bit before our last few tries, see the comment on
this where ABIT_UGURU_WAIT_TIMEOUT_SLEEP is defined. */
if (timeout <= ABIT_UGURU_WAIT_TIMEOUT_SLEEP)
msleep(0);
} }
return 0; return 0;
} }
...@@ -256,6 +261,7 @@ static int abituguru_ready(struct abituguru_data *data) ...@@ -256,6 +261,7 @@ static int abituguru_ready(struct abituguru_data *data)
"CMD reg does not hold 0xAC after ready command\n"); "CMD reg does not hold 0xAC after ready command\n");
return -EIO; return -EIO;
} }
msleep(0);
} }
/* After this the ABIT_UGURU_DATA port should contain /* After this the ABIT_UGURU_DATA port should contain
...@@ -268,6 +274,7 @@ static int abituguru_ready(struct abituguru_data *data) ...@@ -268,6 +274,7 @@ static int abituguru_ready(struct abituguru_data *data)
"state != more input after ready command\n"); "state != more input after ready command\n");
return -EIO; return -EIO;
} }
msleep(0);
} }
data->uguru_ready = 1; data->uguru_ready = 1;
...@@ -331,7 +338,8 @@ static int abituguru_read(struct abituguru_data *data, ...@@ -331,7 +338,8 @@ static int abituguru_read(struct abituguru_data *data,
/* And read the data */ /* And read the data */
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
if (abituguru_wait(data, ABIT_UGURU_STATUS_READ)) { if (abituguru_wait(data, ABIT_UGURU_STATUS_READ)) {
ABIT_UGURU_DEBUG(1, "timeout exceeded waiting for " ABIT_UGURU_DEBUG(retries ? 1 : 3,
"timeout exceeded waiting for "
"read state (bank: %d, sensor: %d)\n", "read state (bank: %d, sensor: %d)\n",
(int)bank_addr, (int)sensor_addr); (int)bank_addr, (int)sensor_addr);
break; break;
...@@ -350,7 +358,9 @@ static int abituguru_read(struct abituguru_data *data, ...@@ -350,7 +358,9 @@ static int abituguru_read(struct abituguru_data *data,
static int abituguru_write(struct abituguru_data *data, static int abituguru_write(struct abituguru_data *data,
u8 bank_addr, u8 sensor_addr, u8 *buf, int count) u8 bank_addr, u8 sensor_addr, u8 *buf, int count)
{ {
int i; /* We use the ready timeout as we have to wait for 0xAC just like the
ready function */
int i, timeout = ABIT_UGURU_READY_TIMEOUT;
/* Send the address */ /* Send the address */
i = abituguru_send_address(data, bank_addr, sensor_addr, i = abituguru_send_address(data, bank_addr, sensor_addr,
...@@ -370,7 +380,8 @@ static int abituguru_write(struct abituguru_data *data, ...@@ -370,7 +380,8 @@ static int abituguru_write(struct abituguru_data *data,
} }
/* Now we need to wait till the chip is ready to be read again, /* Now we need to wait till the chip is ready to be read again,
don't ask why */ so that we can read 0xAC as confirmation that our write has
succeeded. */
if (abituguru_wait(data, ABIT_UGURU_STATUS_READ)) { if (abituguru_wait(data, ABIT_UGURU_STATUS_READ)) {
ABIT_UGURU_DEBUG(1, "timeout exceeded waiting for read state " ABIT_UGURU_DEBUG(1, "timeout exceeded waiting for read state "
"after write (bank: %d, sensor: %d)\n", (int)bank_addr, "after write (bank: %d, sensor: %d)\n", (int)bank_addr,
...@@ -379,12 +390,16 @@ static int abituguru_write(struct abituguru_data *data, ...@@ -379,12 +390,16 @@ static int abituguru_write(struct abituguru_data *data,
} }
/* Cmd port MUST be read now and should contain 0xAC */ /* Cmd port MUST be read now and should contain 0xAC */
if (inb_p(data->addr + ABIT_UGURU_CMD) != 0xAC) { while (inb_p(data->addr + ABIT_UGURU_CMD) != 0xAC) {
ABIT_UGURU_DEBUG(1, "CMD reg does not hold 0xAC after write " timeout--;
"(bank: %d, sensor: %d)\n", (int)bank_addr, if (timeout == 0) {
(int)sensor_addr); ABIT_UGURU_DEBUG(1, "CMD reg does not hold 0xAC after "
"write (bank: %d, sensor: %d)\n",
(int)bank_addr, (int)sensor_addr);
return -EIO; return -EIO;
} }
msleep(0);
}
/* Last put the chip back in ready state */ /* Last put the chip back in ready state */
abituguru_ready(data); abituguru_ready(data);
...@@ -403,7 +418,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data, ...@@ -403,7 +418,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
u8 sensor_addr) u8 sensor_addr)
{ {
u8 val, buf[3]; u8 val, buf[3];
int ret = ABIT_UGURU_NC; int i, ret = -ENODEV; /* error is the most common used retval :| */
/* If overriden by the user return the user selected type */ /* If overriden by the user return the user selected type */
if (bank1_types[sensor_addr] >= ABIT_UGURU_IN_SENSOR && if (bank1_types[sensor_addr] >= ABIT_UGURU_IN_SENSOR &&
...@@ -439,7 +454,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data, ...@@ -439,7 +454,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
buf[2] = 250; buf[2] = 250;
if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr, if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr,
buf, 3) != 3) buf, 3) != 3)
return -ENODEV; goto abituguru_detect_bank1_sensor_type_exit;
/* Now we need 20 ms to give the uguru time to read the sensors /* Now we need 20 ms to give the uguru time to read the sensors
and raise a voltage alarm */ and raise a voltage alarm */
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
...@@ -447,21 +462,16 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data, ...@@ -447,21 +462,16 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
/* Check for alarm and check the alarm is a volt low alarm. */ /* Check for alarm and check the alarm is a volt low alarm. */
if (abituguru_read(data, ABIT_UGURU_ALARM_BANK, 0, buf, 3, if (abituguru_read(data, ABIT_UGURU_ALARM_BANK, 0, buf, 3,
ABIT_UGURU_MAX_RETRIES) != 3) ABIT_UGURU_MAX_RETRIES) != 3)
return -ENODEV; goto abituguru_detect_bank1_sensor_type_exit;
if (buf[sensor_addr/8] & (0x01 << (sensor_addr % 8))) { if (buf[sensor_addr/8] & (0x01 << (sensor_addr % 8))) {
if (abituguru_read(data, ABIT_UGURU_SENSOR_BANK1 + 1, if (abituguru_read(data, ABIT_UGURU_SENSOR_BANK1 + 1,
sensor_addr, buf, 3, sensor_addr, buf, 3,
ABIT_UGURU_MAX_RETRIES) != 3) ABIT_UGURU_MAX_RETRIES) != 3)
return -ENODEV; goto abituguru_detect_bank1_sensor_type_exit;
if (buf[0] & ABIT_UGURU_VOLT_LOW_ALARM_FLAG) { if (buf[0] & ABIT_UGURU_VOLT_LOW_ALARM_FLAG) {
/* Restore original settings */
if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2,
sensor_addr,
data->bank1_settings[sensor_addr],
3) != 3)
return -ENODEV;
ABIT_UGURU_DEBUG(2, " found volt sensor\n"); ABIT_UGURU_DEBUG(2, " found volt sensor\n");
return ABIT_UGURU_IN_SENSOR; ret = ABIT_UGURU_IN_SENSOR;
goto abituguru_detect_bank1_sensor_type_exit;
} else } else
ABIT_UGURU_DEBUG(2, " alarm raised during volt " ABIT_UGURU_DEBUG(2, " alarm raised during volt "
"sensor test, but volt low flag not set\n"); "sensor test, but volt low flag not set\n");
...@@ -477,7 +487,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data, ...@@ -477,7 +487,7 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
buf[2] = 10; buf[2] = 10;
if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr, if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr,
buf, 3) != 3) buf, 3) != 3)
return -ENODEV; goto abituguru_detect_bank1_sensor_type_exit;
/* Now we need 50 ms to give the uguru time to read the sensors /* Now we need 50 ms to give the uguru time to read the sensors
and raise a temp alarm */ and raise a temp alarm */
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
...@@ -485,15 +495,16 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data, ...@@ -485,15 +495,16 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
/* Check for alarm and check the alarm is a temp high alarm. */ /* Check for alarm and check the alarm is a temp high alarm. */
if (abituguru_read(data, ABIT_UGURU_ALARM_BANK, 0, buf, 3, if (abituguru_read(data, ABIT_UGURU_ALARM_BANK, 0, buf, 3,
ABIT_UGURU_MAX_RETRIES) != 3) ABIT_UGURU_MAX_RETRIES) != 3)
return -ENODEV; goto abituguru_detect_bank1_sensor_type_exit;
if (buf[sensor_addr/8] & (0x01 << (sensor_addr % 8))) { if (buf[sensor_addr/8] & (0x01 << (sensor_addr % 8))) {
if (abituguru_read(data, ABIT_UGURU_SENSOR_BANK1 + 1, if (abituguru_read(data, ABIT_UGURU_SENSOR_BANK1 + 1,
sensor_addr, buf, 3, sensor_addr, buf, 3,
ABIT_UGURU_MAX_RETRIES) != 3) ABIT_UGURU_MAX_RETRIES) != 3)
return -ENODEV; goto abituguru_detect_bank1_sensor_type_exit;
if (buf[0] & ABIT_UGURU_TEMP_HIGH_ALARM_FLAG) { if (buf[0] & ABIT_UGURU_TEMP_HIGH_ALARM_FLAG) {
ret = ABIT_UGURU_TEMP_SENSOR;
ABIT_UGURU_DEBUG(2, " found temp sensor\n"); ABIT_UGURU_DEBUG(2, " found temp sensor\n");
ret = ABIT_UGURU_TEMP_SENSOR;
goto abituguru_detect_bank1_sensor_type_exit;
} else } else
ABIT_UGURU_DEBUG(2, " alarm raised during temp " ABIT_UGURU_DEBUG(2, " alarm raised during temp "
"sensor test, but temp high flag not set\n"); "sensor test, but temp high flag not set\n");
...@@ -501,11 +512,23 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data, ...@@ -501,11 +512,23 @@ abituguru_detect_bank1_sensor_type(struct abituguru_data *data,
ABIT_UGURU_DEBUG(2, " alarm not raised during temp sensor " ABIT_UGURU_DEBUG(2, " alarm not raised during temp sensor "
"test\n"); "test\n");
/* Restore original settings */ ret = ABIT_UGURU_NC;
if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2, sensor_addr, abituguru_detect_bank1_sensor_type_exit:
data->bank1_settings[sensor_addr], 3) != 3) /* Restore original settings, failing here is really BAD, it has been
reported that some BIOS-es hang when entering the uGuru menu with
invalid settings present in the uGuru, so we try this 3 times. */
for (i = 0; i < 3; i++)
if (abituguru_write(data, ABIT_UGURU_SENSOR_BANK1 + 2,
sensor_addr, data->bank1_settings[sensor_addr],
3) == 3)
break;
if (i == 3) {
printk(KERN_ERR ABIT_UGURU_NAME
": Fatal error could not restore original settings. "
"This should never happen please report this to the "
"abituguru maintainer (see MAINTAINERS)\n");
return -ENODEV; return -ENODEV;
}
return ret; return ret;
} }
...@@ -1305,7 +1328,7 @@ static struct abituguru_data *abituguru_update_device(struct device *dev) ...@@ -1305,7 +1328,7 @@ static struct abituguru_data *abituguru_update_device(struct device *dev)
data->update_timeouts = 0; data->update_timeouts = 0;
LEAVE_UPDATE: LEAVE_UPDATE:
/* handle timeout condition */ /* handle timeout condition */
if (err == -EBUSY) { if (!success && (err == -EBUSY || err >= 0)) {
/* No overflow please */ /* No overflow please */
if (data->update_timeouts < 255u) if (data->update_timeouts < 255u)
data->update_timeouts++; data->update_timeouts++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment