Commit b2b9a8d7 authored by Shannon Nelson's avatar Shannon Nelson Committed by David S. Miller

ionic: avoid races in ionic_heartbeat_check

Rework the heartbeat checks to be sure that we're getting an
atomic operation.  Through testing we found occasions where a
separate thread could clash with this check and cause erroneous
heartbeat check results.
Signed-off-by: default avatarAllen Hubbe <allenbh@pensando.io>
Signed-off-by: default avatarShannon Nelson <snelson@pensando.io>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 230efff4
...@@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t) ...@@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t)
return; return;
hb = ionic_heartbeat_check(ionic); hb = ionic_heartbeat_check(ionic);
dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n",
__func__, hb, netif_running(lif->netdev),
test_bit(IONIC_LIF_F_UP, lif->state));
if (hb >= 0 && if (hb >= 0 &&
!test_bit(IONIC_LIF_F_FW_RESET, lif->state)) !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
...@@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic) ...@@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic)
return -EFAULT; return -EFAULT;
} }
idev->last_fw_status = 0xff;
timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0); timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ; ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;
/* set times to ensure the first check will proceed */
atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
/* init as ready, so no transition if the first check succeeds */
idev->last_fw_hb = 0;
idev->fw_hb_ready = true;
idev->fw_status_ready = true;
mod_timer(&ionic->watchdog_timer, mod_timer(&ionic->watchdog_timer,
round_jiffies(jiffies + ionic->watchdog_period)); round_jiffies(jiffies + ionic->watchdog_period));
...@@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic) ...@@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic)
int ionic_heartbeat_check(struct ionic *ionic) int ionic_heartbeat_check(struct ionic *ionic)
{ {
struct ionic_dev *idev = &ionic->idev; struct ionic_dev *idev = &ionic->idev;
unsigned long hb_time; unsigned long check_time, last_check_time;
bool fw_status_ready, fw_hb_ready;
u8 fw_status; u8 fw_status;
u32 hb; u32 fw_hb;
/* wait a little more than one second before testing again */ /* wait a least one second before testing again */
hb_time = jiffies; check_time = jiffies;
if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period))) last_check_time = atomic_long_read(&idev->last_check_time);
do_check_time:
if (time_before(check_time, last_check_time + HZ))
return 0; return 0;
if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time,
&last_check_time, check_time)) {
/* if called concurrently, only the first should proceed. */
dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__);
goto do_check_time;
}
/* firmware is useful only if the running bit is set and /* firmware is useful only if the running bit is set and
* fw_status != 0xff (bad PCI read) * fw_status != 0xff (bad PCI read)
*/ */
fw_status = ioread8(&idev->dev_info_regs->fw_status); fw_status = ioread8(&idev->dev_info_regs->fw_status);
if (fw_status != 0xff) fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);
fw_status &= IONIC_FW_STS_F_RUNNING; /* use only the run bit */
/* is this a transition? */ /* is this a transition? */
if (fw_status != idev->last_fw_status && if (fw_status_ready != idev->fw_status_ready) {
idev->last_fw_status != 0xff) {
struct ionic_lif *lif = ionic->lif; struct ionic_lif *lif = ionic->lif;
bool trigger = false; bool trigger = false;
if (!fw_status || fw_status == 0xff) { idev->fw_status_ready = fw_status_ready;
if (!fw_status_ready) {
dev_info(ionic->dev, "FW stopped %u\n", fw_status); dev_info(ionic->dev, "FW stopped %u\n", fw_status);
if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state)) if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
trigger = true; trigger = true;
...@@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic) ...@@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic)
struct ionic_deferred_work *work; struct ionic_deferred_work *work;
work = kzalloc(sizeof(*work), GFP_ATOMIC); work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) { if (work) {
dev_err(ionic->dev, "LIF reset trigger dropped\n");
} else {
work->type = IONIC_DW_TYPE_LIF_RESET; work->type = IONIC_DW_TYPE_LIF_RESET;
if (fw_status & IONIC_FW_STS_F_RUNNING && work->fw_status = fw_status_ready;
fw_status != 0xff)
work->fw_status = 1;
ionic_lif_deferred_enqueue(&lif->deferred, work); ionic_lif_deferred_enqueue(&lif->deferred, work);
} }
} }
} }
idev->last_fw_status = fw_status;
if (!fw_status || fw_status == 0xff) if (!fw_status_ready)
return -ENXIO; return -ENXIO;
/* early FW has no heartbeat, else FW will return non-zero */ /* wait at least one watchdog period since the last heartbeat */
hb = ioread32(&idev->dev_info_regs->fw_heartbeat); last_check_time = idev->last_hb_time;
if (!hb) if (time_before(check_time, last_check_time + ionic->watchdog_period))
return 0; return 0;
/* are we stalled? */ fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
if (hb == idev->last_hb) { fw_hb_ready = fw_hb != idev->last_fw_hb;
/* only complain once for each stall seen */
if (idev->last_hb_time != 1) {
dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
idev->last_hb);
idev->last_hb_time = 1;
}
return -ENXIO; /* early FW version had no heartbeat, so fake it */
if (!fw_hb_ready && !fw_hb)
fw_hb_ready = true;
dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n",
__func__, fw_hb, idev->last_fw_hb, fw_hb_ready);
idev->last_fw_hb = fw_hb;
/* log a transition */
if (fw_hb_ready != idev->fw_hb_ready) {
idev->fw_hb_ready = fw_hb_ready;
if (!fw_hb_ready)
dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb);
else
dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb);
} }
if (idev->last_hb_time == 1) if (!fw_hb_ready)
dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb); return -ENXIO;
idev->last_hb = hb; idev->last_hb_time = check_time;
idev->last_hb_time = hb_time;
return 0; return 0;
} }
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#ifndef _IONIC_DEV_H_ #ifndef _IONIC_DEV_H_
#define _IONIC_DEV_H_ #define _IONIC_DEV_H_
#include <linux/atomic.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
...@@ -135,9 +136,11 @@ struct ionic_dev { ...@@ -135,9 +136,11 @@ struct ionic_dev {
union ionic_dev_info_regs __iomem *dev_info_regs; union ionic_dev_info_regs __iomem *dev_info_regs;
union ionic_dev_cmd_regs __iomem *dev_cmd_regs; union ionic_dev_cmd_regs __iomem *dev_cmd_regs;
atomic_long_t last_check_time;
unsigned long last_hb_time; unsigned long last_hb_time;
u32 last_hb; u32 last_fw_hb;
u8 last_fw_status; bool fw_hb_ready;
bool fw_status_ready;
u64 __iomem *db_pages; u64 __iomem *db_pages;
dma_addr_t phy_db_pages; dma_addr_t phy_db_pages;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment