Commit 6ae4e733 authored by Shiju Jose's avatar Shiju Jose Committed by David S. Miller

net: hns3: Add PCIe AER error recovery

This patch adds the error recovery for the HNS hw errors.
Signed-off-by: default avatarShiju Jose <shiju.jose@huawei.com>
Signed-off-by: default avatarSalil Mehta <salil.mehta@huawei.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5a9f0eac
...@@ -402,7 +402,7 @@ struct hnae3_ae_ops { ...@@ -402,7 +402,7 @@ struct hnae3_ae_ops {
int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid, int (*set_vf_vlan_filter)(struct hnae3_handle *handle, int vfid,
u16 vlan, u8 qos, __be16 proto); u16 vlan, u8 qos, __be16 proto);
int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable); int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable);
void (*reset_event)(struct hnae3_handle *handle); void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle);
void (*get_channels)(struct hnae3_handle *handle, void (*get_channels)(struct hnae3_handle *handle,
struct ethtool_channels *ch); struct ethtool_channels *ch);
void (*get_tqps_and_rss_info)(struct hnae3_handle *h, void (*get_tqps_and_rss_info)(struct hnae3_handle *h,
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/aer.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/sctp.h> #include <linux/sctp.h>
#include <linux/vermagic.h> #include <linux/vermagic.h>
...@@ -1613,7 +1614,7 @@ static void hns3_nic_net_timeout(struct net_device *ndev) ...@@ -1613,7 +1614,7 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
/* request the reset */ /* request the reset */
if (h->ae_algo->ops->reset_event) if (h->ae_algo->ops->reset_event)
h->ae_algo->ops->reset_event(h); h->ae_algo->ops->reset_event(h->pdev, h);
} }
static const struct net_device_ops hns3_nic_netdev_ops = { static const struct net_device_ops hns3_nic_netdev_ops = {
...@@ -1796,8 +1797,25 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, ...@@ -1796,8 +1797,25 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev,
return ret; return ret;
} }
static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
struct device *dev = &pdev->dev;
dev_info(dev, "requesting reset due to PCI error\n");
/* request the reset */
if (ae_dev->ops->reset_event) {
ae_dev->ops->reset_event(pdev, NULL);
return PCI_ERS_RESULT_RECOVERED;
}
return PCI_ERS_RESULT_DISCONNECT;
}
static const struct pci_error_handlers hns3_err_handler = { static const struct pci_error_handlers hns3_err_handler = {
.error_detected = hns3_error_detected, .error_detected = hns3_error_detected,
.slot_reset = hns3_slot_reset,
}; };
static struct pci_driver hns3_driver = { static struct pci_driver hns3_driver = {
......
...@@ -2489,12 +2489,18 @@ static void hclge_reset(struct hclge_dev *hdev) ...@@ -2489,12 +2489,18 @@ static void hclge_reset(struct hclge_dev *hdev)
ae_dev->reset_type = HNAE3_NONE_RESET; ae_dev->reset_type = HNAE3_NONE_RESET;
} }
static void hclge_reset_event(struct hnae3_handle *handle) static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
{ {
struct hclge_vport *vport = hclge_get_vport(handle); struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
struct hclge_dev *hdev = vport->back; struct hclge_dev *hdev = ae_dev->priv;
/* check if this is a new reset request and we are not here just because /* We might end up getting called broadly because of 2 below cases:
* 1. Recoverable error was conveyed through APEI and only way to bring
* normalcy is to reset.
* 2. A new reset request from the stack due to timeout
*
* For the first case,error event might not have ae handle available.
* check if this is a new reset request and we are not here just because
* last reset attempt did not succeed and watchdog hit us again. We will * last reset attempt did not succeed and watchdog hit us again. We will
* know this if last reset request did not occur very recently (watchdog * know this if last reset request did not occur very recently (watchdog
* timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz) * timer = 5*HZ, let us check after sufficiently large time, say 4*5*Hz)
...@@ -2503,6 +2509,9 @@ static void hclge_reset_event(struct hnae3_handle *handle) ...@@ -2503,6 +2509,9 @@ static void hclge_reset_event(struct hnae3_handle *handle)
* want to make sure we throttle the reset request. Therefore, we will * want to make sure we throttle the reset request. Therefore, we will
* not allow it again before 3*HZ times. * not allow it again before 3*HZ times.
*/ */
if (!handle)
handle = &hdev->vport[0].nic;
if (time_before(jiffies, (handle->last_reset_time + 3 * HZ))) if (time_before(jiffies, (handle->last_reset_time + 3 * HZ)))
return; return;
else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ))) else if (time_after(jiffies, (handle->last_reset_time + 4 * 5 * HZ)))
......
...@@ -1214,7 +1214,8 @@ static int hclgevf_do_reset(struct hclgevf_dev *hdev) ...@@ -1214,7 +1214,8 @@ static int hclgevf_do_reset(struct hclgevf_dev *hdev)
return status; return status;
} }
static void hclgevf_reset_event(struct hnae3_handle *handle) static void hclgevf_reset_event(struct pci_dev *pdev,
struct hnae3_handle *handle)
{ {
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment