Commit ef8693eb authored by Mitch Williams's avatar Mitch Williams Committed by David S. Miller

i40evf: refactor reset handling

Respond better to a VF reset event. When a reset is signaled by the
PF, or detected by the watchdog task, prevent the watchdog from
processing admin queue requests, and schedule the reset task.

In the reset task, wait first for the reset to start, then for it to
complete, then reinit the driver.

If the reset never appears to complete after a long, long time (>10
seconds is possible depending on what's going on with the PF driver),
then set a flag to indicate that PF communications have failed.

If this flag is set, check for the reset to complete in the watchdog,
and  attempt to do a full reinitialization of the driver from scratch.

With these changes the VF driver correctly handles a PF reset event
while running on bare metal, or in a VM.

Also update copyrights.

Change-ID: I93513efd0b50523a8345e7f6a33a5e4f8a2a5996
Signed-off-by: default avatarMitch Williams <mitch.a.williams@intel.com>
Signed-off-by: default avatarJesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: default avatarSibai Li <sibai.li@intel.com>
Signed-off-by: default avatarAaron Brown <aaron.f.brown@intel.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e8106ebe
...@@ -164,15 +164,14 @@ struct i40evf_vlan_filter { ...@@ -164,15 +164,14 @@ struct i40evf_vlan_filter {
/* Driver state. The order of these is important! */ /* Driver state. The order of these is important! */
enum i40evf_state_t { enum i40evf_state_t {
__I40EVF_STARTUP, /* driver loaded, probe complete */ __I40EVF_STARTUP, /* driver loaded, probe complete */
__I40EVF_FAILED, /* PF communication failed. Fatal. */
__I40EVF_REMOVE, /* driver is being unloaded */ __I40EVF_REMOVE, /* driver is being unloaded */
__I40EVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ __I40EVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */
__I40EVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ __I40EVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */
__I40EVF_INIT_SW, /* got resources, setting up structs */ __I40EVF_INIT_SW, /* got resources, setting up structs */
__I40EVF_RESETTING, /* in reset */
/* Below here, watchdog is running */ /* Below here, watchdog is running */
__I40EVF_DOWN, /* ready, can be opened */ __I40EVF_DOWN, /* ready, can be opened */
__I40EVF_TESTING, /* in ethtool self-test */ __I40EVF_TESTING, /* in ethtool self-test */
__I40EVF_RESETTING, /* in reset */
__I40EVF_RUNNING, /* opened, working */ __I40EVF_RUNNING, /* opened, working */
}; };
...@@ -214,6 +213,8 @@ struct i40evf_adapter { ...@@ -214,6 +213,8 @@ struct i40evf_adapter {
#define I40EVF_FLAG_IMIR_ENABLED (u32)(1 << 5) #define I40EVF_FLAG_IMIR_ENABLED (u32)(1 << 5)
#define I40EVF_FLAG_MQ_CAPABLE (u32)(1 << 6) #define I40EVF_FLAG_MQ_CAPABLE (u32)(1 << 6)
#define I40EVF_FLAG_NEED_LINK_UPDATE (u32)(1 << 7) #define I40EVF_FLAG_NEED_LINK_UPDATE (u32)(1 << 7)
#define I40EVF_FLAG_PF_COMMS_FAILED (u32)(1 << 8)
#define I40EVF_FLAG_RESET_PENDING (u32)(1 << 9)
/* duplcates for common code */ /* duplcates for common code */
#define I40E_FLAG_FDIR_ATR_ENABLED 0 #define I40E_FLAG_FDIR_ATR_ENABLED 0
#define I40E_FLAG_DCB_ENABLED 0 #define I40E_FLAG_DCB_ENABLED 0
...@@ -231,6 +232,7 @@ struct i40evf_adapter { ...@@ -231,6 +232,7 @@ struct i40evf_adapter {
#define I40EVF_FLAG_AQ_CONFIGURE_QUEUES (u32)(1 << 6) #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES (u32)(1 << 6)
#define I40EVF_FLAG_AQ_MAP_VECTORS (u32)(1 << 7) #define I40EVF_FLAG_AQ_MAP_VECTORS (u32)(1 << 7)
#define I40EVF_FLAG_AQ_HANDLE_RESET (u32)(1 << 8) #define I40EVF_FLAG_AQ_HANDLE_RESET (u32)(1 << 8)
/* OS defined structs */ /* OS defined structs */
struct net_device *netdev; struct net_device *netdev;
struct pci_dev *pdev; struct pci_dev *pdev;
......
...@@ -964,16 +964,18 @@ void i40evf_down(struct i40evf_adapter *adapter) ...@@ -964,16 +964,18 @@ void i40evf_down(struct i40evf_adapter *adapter)
struct net_device *netdev = adapter->netdev; struct net_device *netdev = adapter->netdev;
struct i40evf_mac_filter *f; struct i40evf_mac_filter *f;
/* remove all MAC filters from the VSI */ /* remove all MAC filters */
list_for_each_entry(f, &adapter->mac_filter_list, list) { list_for_each_entry(f, &adapter->mac_filter_list, list) {
f->remove = true; f->remove = true;
} }
adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER; if (!(adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) &&
/* disable receives */ adapter->state != __I40EVF_RESETTING) {
adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES; adapter->aq_required |= I40EVF_FLAG_AQ_DEL_MAC_FILTER;
mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); /* disable receives */
msleep(20); adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_QUEUES;
mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
msleep(20);
}
netif_tx_disable(netdev); netif_tx_disable(netdev);
netif_tx_stop_all_queues(netdev); netif_tx_stop_all_queues(netdev);
...@@ -1292,19 +1294,47 @@ static void i40evf_watchdog_task(struct work_struct *work) ...@@ -1292,19 +1294,47 @@ static void i40evf_watchdog_task(struct work_struct *work)
watchdog_task); watchdog_task);
struct i40e_hw *hw = &adapter->hw; struct i40e_hw *hw = &adapter->hw;
if (adapter->state < __I40EVF_DOWN) if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section))
goto restart_watchdog;
if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
dev_info(&adapter->pdev->dev, "Checking for redemption\n");
if ((rd32(hw, I40E_VFGEN_RSTAT) & 0x3) == I40E_VFR_VFACTIVE) {
/* A chance for redemption! */
dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n");
adapter->state = __I40EVF_STARTUP;
adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
schedule_delayed_work(&adapter->init_task, 10);
clear_bit(__I40EVF_IN_CRITICAL_TASK,
&adapter->crit_section);
/* Don't reschedule the watchdog, since we've restarted
* the init task. When init_task contacts the PF and
* gets everything set up again, it'll restart the
* watchdog for us. Down, boy. Sit. Stay. Woof.
*/
return;
}
adapter->aq_pending = 0;
adapter->aq_required = 0;
adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
goto watchdog_done; goto watchdog_done;
}
if (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section)) if ((adapter->state < __I40EVF_DOWN) ||
(adapter->flags & I40EVF_FLAG_RESET_PENDING))
goto watchdog_done; goto watchdog_done;
/* check for unannounced reset */ /* check for reset */
if ((adapter->state != __I40EVF_RESETTING) && if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING) &&
(rd32(hw, I40E_VFGEN_RSTAT) & 0x3) != I40E_VFR_VFACTIVE) { (rd32(hw, I40E_VFGEN_RSTAT) & 0x3) != I40E_VFR_VFACTIVE) {
adapter->state = __I40EVF_RESETTING; adapter->state = __I40EVF_RESETTING;
adapter->flags |= I40EVF_FLAG_RESET_PENDING;
dev_err(&adapter->pdev->dev, "Hardware reset detected.\n");
dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
schedule_work(&adapter->reset_task); schedule_work(&adapter->reset_task);
dev_info(&adapter->pdev->dev, "%s: hardware reset detected\n", adapter->aq_pending = 0;
__func__); adapter->aq_required = 0;
adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
goto watchdog_done; goto watchdog_done;
} }
...@@ -1359,13 +1389,15 @@ static void i40evf_watchdog_task(struct work_struct *work) ...@@ -1359,13 +1389,15 @@ static void i40evf_watchdog_task(struct work_struct *work)
i40evf_irq_enable(adapter, true); i40evf_irq_enable(adapter, true);
i40evf_fire_sw_int(adapter, 0xFF); i40evf_fire_sw_int(adapter, 0xFF);
watchdog_done: watchdog_done:
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
restart_watchdog:
if (adapter->aq_required) if (adapter->aq_required)
mod_timer(&adapter->watchdog_timer, mod_timer(&adapter->watchdog_timer,
jiffies + msecs_to_jiffies(20)); jiffies + msecs_to_jiffies(20));
else else
mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2)); mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2));
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
schedule_work(&adapter->adminq_task); schedule_work(&adapter->adminq_task);
} }
...@@ -1412,6 +1444,8 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) ...@@ -1412,6 +1444,8 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
i40e_flush(hw); i40e_flush(hw);
} }
#define I40EVF_RESET_WAIT_MS 100
#define I40EVF_RESET_WAIT_COUNT 200
/** /**
* i40evf_reset_task - Call-back task to handle hardware reset * i40evf_reset_task - Call-back task to handle hardware reset
* @work: pointer to work_struct * @work: pointer to work_struct
...@@ -1422,8 +1456,9 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter) ...@@ -1422,8 +1456,9 @@ static void i40evf_configure_rss(struct i40evf_adapter *adapter)
**/ **/
static void i40evf_reset_task(struct work_struct *work) static void i40evf_reset_task(struct work_struct *work)
{ {
struct i40evf_adapter *adapter = struct i40evf_adapter *adapter = container_of(work,
container_of(work, struct i40evf_adapter, reset_task); struct i40evf_adapter,
reset_task);
struct i40e_hw *hw = &adapter->hw; struct i40e_hw *hw = &adapter->hw;
int i = 0, err; int i = 0, err;
uint32_t rstat_val; uint32_t rstat_val;
...@@ -1431,22 +1466,56 @@ static void i40evf_reset_task(struct work_struct *work) ...@@ -1431,22 +1466,56 @@ static void i40evf_reset_task(struct work_struct *work)
while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK, while (test_and_set_bit(__I40EVF_IN_CRITICAL_TASK,
&adapter->crit_section)) &adapter->crit_section))
udelay(500); udelay(500);
/* poll until we see the reset actually happen */
for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
I40E_VFGEN_RSTAT_VFR_STATE_MASK;
if (rstat_val != I40E_VFR_VFACTIVE) {
dev_info(&adapter->pdev->dev, "Reset now occurring\n");
break;
} else {
msleep(I40EVF_RESET_WAIT_MS);
}
}
if (i == I40EVF_RESET_WAIT_COUNT) {
dev_err(&adapter->pdev->dev, "Reset was not detected\n");
adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
goto continue_reset; /* act like the reset happened */
}
/* wait until the reset is complete */ /* wait until the reset is complete and the PF is responding to us */
for (i = 0; i < 20; i++) { for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
rstat_val = rd32(hw, I40E_VFGEN_RSTAT) & rstat_val = rd32(hw, I40E_VFGEN_RSTAT) &
I40E_VFGEN_RSTAT_VFR_STATE_MASK; I40E_VFGEN_RSTAT_VFR_STATE_MASK;
if (rstat_val == I40E_VFR_COMPLETED) if (rstat_val == I40E_VFR_VFACTIVE) {
dev_info(&adapter->pdev->dev, "Reset is complete. Reinitializing.\n");
break; break;
else } else {
mdelay(100); msleep(I40EVF_RESET_WAIT_MS);
}
} }
if (i == 20) { if (i == I40EVF_RESET_WAIT_COUNT) {
/* reset never finished */ /* reset never finished */
dev_info(&adapter->pdev->dev, "%s: reset never finished: %x\n", dev_err(&adapter->pdev->dev, "Reset never finished (%x). PF driver is dead, and so am I.\n",
__func__, rstat_val); rstat_val);
/* carry on anyway */ adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
if (netif_running(adapter->netdev))
i40evf_close(adapter->netdev);
i40evf_free_misc_irq(adapter);
i40evf_reset_interrupt_capability(adapter);
i40evf_free_queues(adapter);
kfree(adapter->vf_res);
i40evf_shutdown_adminq(hw);
adapter->netdev->flags &= ~IFF_UP;
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
return; /* Do not attempt to reinit. It's dead, Jim. */
} }
continue_reset:
adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
i40evf_down(adapter); i40evf_down(adapter);
adapter->state = __I40EVF_RESETTING; adapter->state = __I40EVF_RESETTING;
...@@ -1506,6 +1575,9 @@ static void i40evf_adminq_task(struct work_struct *work) ...@@ -1506,6 +1575,9 @@ static void i40evf_adminq_task(struct work_struct *work)
i40e_status ret; i40e_status ret;
u16 pending; u16 pending;
if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
return;
event.msg_size = I40EVF_MAX_AQ_BUF_SIZE; event.msg_size = I40EVF_MAX_AQ_BUF_SIZE;
event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL); event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
if (!event.msg_buf) { if (!event.msg_buf) {
...@@ -1637,6 +1709,10 @@ static int i40evf_open(struct net_device *netdev) ...@@ -1637,6 +1709,10 @@ static int i40evf_open(struct net_device *netdev)
struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40evf_adapter *adapter = netdev_priv(netdev);
int err; int err;
if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED) {
dev_err(&adapter->pdev->dev, "Unable to open device due to PF driver failure.\n");
return -EIO;
}
if (adapter->state != __I40EVF_DOWN) if (adapter->state != __I40EVF_DOWN)
return -EBUSY; return -EBUSY;
...@@ -1691,8 +1767,12 @@ static int i40evf_close(struct net_device *netdev) ...@@ -1691,8 +1767,12 @@ static int i40evf_close(struct net_device *netdev)
{ {
struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40evf_adapter *adapter = netdev_priv(netdev);
if (adapter->state <= __I40EVF_DOWN)
return 0;
/* signal that we are down to the interrupt handler */ /* signal that we are down to the interrupt handler */
adapter->state = __I40EVF_DOWN; adapter->state = __I40EVF_DOWN;
set_bit(__I40E_DOWN, &adapter->vsi.state); set_bit(__I40E_DOWN, &adapter->vsi.state);
i40evf_down(adapter); i40evf_down(adapter);
...@@ -1843,6 +1923,8 @@ static void i40evf_init_task(struct work_struct *work) ...@@ -1843,6 +1923,8 @@ static void i40evf_init_task(struct work_struct *work)
switch (adapter->state) { switch (adapter->state) {
case __I40EVF_STARTUP: case __I40EVF_STARTUP:
/* driver loaded, probe complete */ /* driver loaded, probe complete */
adapter->flags &= ~I40EVF_FLAG_PF_COMMS_FAILED;
adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
err = i40e_set_mac_type(hw); err = i40e_set_mac_type(hw);
if (err) { if (err) {
dev_info(&pdev->dev, "%s: set_mac_type failed: %d\n", dev_info(&pdev->dev, "%s: set_mac_type failed: %d\n",
...@@ -2006,9 +2088,11 @@ static void i40evf_init_task(struct work_struct *work) ...@@ -2006,9 +2088,11 @@ static void i40evf_init_task(struct work_struct *work)
adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC; adapter->vsi.tx_itr_setting = I40E_ITR_DYNAMIC;
adapter->vsi.netdev = adapter->netdev; adapter->vsi.netdev = adapter->netdev;
err = register_netdev(netdev); if (!adapter->netdev_registered) {
if (err) err = register_netdev(netdev);
goto err_register; if (err)
goto err_register;
}
adapter->netdev_registered = true; adapter->netdev_registered = true;
...@@ -2032,17 +2116,16 @@ static void i40evf_init_task(struct work_struct *work) ...@@ -2032,17 +2116,16 @@ static void i40evf_init_task(struct work_struct *work)
i40evf_free_misc_irq(adapter); i40evf_free_misc_irq(adapter);
err_sw_init: err_sw_init:
i40evf_reset_interrupt_capability(adapter); i40evf_reset_interrupt_capability(adapter);
adapter->state = __I40EVF_FAILED;
err_alloc: err_alloc:
kfree(adapter->vf_res); kfree(adapter->vf_res);
adapter->vf_res = NULL; adapter->vf_res = NULL;
err: err:
if (hw->aq.asq.count)
i40evf_shutdown_adminq(hw); /* ignore error */
/* Things went into the weeds, so try again later */ /* Things went into the weeds, so try again later */
if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) { if (++adapter->aq_wait_count > I40EVF_AQ_MAX_ERR) {
dev_err(&pdev->dev, "Failed to communicate with PF; giving up.\n"); dev_err(&pdev->dev, "Failed to communicate with PF; giving up.\n");
if (hw->aq.asq.count) adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
i40evf_shutdown_adminq(hw); /* ignore error */
adapter->state = __I40EVF_FAILED;
return; /* do not reschedule */ return; /* do not reschedule */
} }
schedule_delayed_work(&adapter->init_task, HZ * 3); schedule_delayed_work(&adapter->init_task, HZ * 3);
...@@ -2272,6 +2355,7 @@ static void i40evf_remove(struct pci_dev *pdev) ...@@ -2272,6 +2355,7 @@ static void i40evf_remove(struct pci_dev *pdev)
struct i40e_hw *hw = &adapter->hw; struct i40e_hw *hw = &adapter->hw;
cancel_delayed_work_sync(&adapter->init_task); cancel_delayed_work_sync(&adapter->init_task);
cancel_work_sync(&adapter->reset_task);
if (adapter->netdev_registered) { if (adapter->netdev_registered) {
unregister_netdev(netdev); unregister_netdev(netdev);
......
/******************************************************************************* /*******************************************************************************
* *
* Intel Ethernet Controller XL710 Family Linux Virtual Function Driver * Intel Ethernet Controller XL710 Family Linux Virtual Function Driver
* Copyright(c) 2013 Intel Corporation. * Copyright(c) 2013 - 2014 Intel Corporation.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
...@@ -43,6 +43,9 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter, ...@@ -43,6 +43,9 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
struct i40e_hw *hw = &adapter->hw; struct i40e_hw *hw = &adapter->hw;
i40e_status err; i40e_status err;
if (adapter->flags & I40EVF_FLAG_PF_COMMS_FAILED)
return 0; /* nothing to see here, move along */
err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL); err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
if (err) if (err)
dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, error %d, aq status %d\n", dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, error %d, aq status %d\n",
...@@ -689,10 +692,12 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, ...@@ -689,10 +692,12 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
} }
break; break;
case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
adapter->state = __I40EVF_RESETTING; dev_info(&adapter->pdev->dev, "PF reset warning received\n");
schedule_work(&adapter->reset_task); if (!(adapter->flags & I40EVF_FLAG_RESET_PENDING)) {
dev_info(&adapter->pdev->dev, adapter->flags |= I40EVF_FLAG_RESET_PENDING;
"%s: hardware reset pending\n", __func__); dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
schedule_work(&adapter->reset_task);
}
break; break;
default: default:
dev_err(&adapter->pdev->dev, dev_err(&adapter->pdev->dev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment