Commit 954f6825 authored by Venkata Duvvuru's avatar Venkata Duvvuru Committed by David S. Miller

be2net: Report a "link down" to the stack when a fatal error or fw reset happens.

When an error (related to HW or FW) is detected on a function, the driver
must pro-actively report a "link down" to the stack so that a possible
failover can be initiated. This is being done currently only for some
HW errors. This patch reports a "link down" even for fatal FW errors and
EEH errors.
Signed-off-by: default avatarVenkat Duvvuru <VenkatKumar.Duvvuru@Emulex.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 29e9122b
...@@ -522,6 +522,7 @@ struct be_adapter { ...@@ -522,6 +522,7 @@ struct be_adapter {
u16 work_counter; u16 work_counter;
struct delayed_work be_err_detection_work; struct delayed_work be_err_detection_work;
u8 err_flags;
u32 flags; u32 flags;
u32 cmd_privileges; u32 cmd_privileges;
/* Ethtool knobs and info */ /* Ethtool knobs and info */
...@@ -781,26 +782,36 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb) ...@@ -781,26 +782,36 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb)
return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
} }
static inline bool be_multi_rxq(const struct be_adapter *adapter) #define BE_ERROR_EEH 1
#define BE_ERROR_UE BIT(1)
#define BE_ERROR_FW BIT(2)
#define BE_ERROR_HW (BE_ERROR_EEH | BE_ERROR_UE)
#define BE_ERROR_ANY (BE_ERROR_EEH | BE_ERROR_UE | BE_ERROR_FW)
#define BE_CLEAR_ALL 0xFF
static inline u8 be_check_error(struct be_adapter *adapter, u32 err_type)
{ {
return adapter->num_rx_qs > 1; return (adapter->err_flags & err_type);
} }
static inline bool be_error(struct be_adapter *adapter) static inline void be_set_error(struct be_adapter *adapter, int err_type)
{ {
return adapter->eeh_error || adapter->hw_error || adapter->fw_timeout; struct net_device *netdev = adapter->netdev;
adapter->err_flags |= err_type;
netif_carrier_off(netdev);
dev_info(&adapter->pdev->dev, "%s: Link down\n", netdev->name);
} }
static inline bool be_hw_error(struct be_adapter *adapter) static inline void be_clear_error(struct be_adapter *adapter, int err_type)
{ {
return adapter->eeh_error || adapter->hw_error; adapter->err_flags &= ~err_type;
} }
static inline void be_clear_all_error(struct be_adapter *adapter) static inline bool be_multi_rxq(const struct be_adapter *adapter)
{ {
adapter->eeh_error = false; return adapter->num_rx_qs > 1;
adapter->hw_error = false;
adapter->fw_timeout = false;
} }
void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm,
......
...@@ -93,7 +93,7 @@ static void be_mcc_notify(struct be_adapter *adapter) ...@@ -93,7 +93,7 @@ static void be_mcc_notify(struct be_adapter *adapter)
struct be_queue_info *mccq = &adapter->mcc_obj.q; struct be_queue_info *mccq = &adapter->mcc_obj.q;
u32 val = 0; u32 val = 0;
if (be_error(adapter)) if (be_check_error(adapter, BE_ERROR_ANY))
return; return;
val |= mccq->id & DB_MCCQ_RING_ID_MASK; val |= mccq->id & DB_MCCQ_RING_ID_MASK;
...@@ -489,7 +489,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) ...@@ -489,7 +489,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter)
struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
for (i = 0; i < mcc_timeout; i++) { for (i = 0; i < mcc_timeout; i++) {
if (be_error(adapter)) if (be_check_error(adapter, BE_ERROR_ANY))
return -EIO; return -EIO;
local_bh_disable(); local_bh_disable();
...@@ -502,7 +502,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) ...@@ -502,7 +502,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter)
} }
if (i == mcc_timeout) { if (i == mcc_timeout) {
dev_err(&adapter->pdev->dev, "FW not responding\n"); dev_err(&adapter->pdev->dev, "FW not responding\n");
adapter->fw_timeout = true; be_set_error(adapter, BE_ERROR_FW);
return -EIO; return -EIO;
} }
return status; return status;
...@@ -541,7 +541,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db) ...@@ -541,7 +541,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
u32 ready; u32 ready;
do { do {
if (be_error(adapter)) if (be_check_error(adapter, BE_ERROR_ANY))
return -EIO; return -EIO;
ready = ioread32(db); ready = ioread32(db);
...@@ -554,7 +554,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db) ...@@ -554,7 +554,7 @@ static int be_mbox_db_ready_wait(struct be_adapter *adapter, void __iomem *db)
if (msecs > 4000) { if (msecs > 4000) {
dev_err(&adapter->pdev->dev, "FW not responding\n"); dev_err(&adapter->pdev->dev, "FW not responding\n");
adapter->fw_timeout = true; be_set_error(adapter, BE_ERROR_FW);
be_detect_error(adapter); be_detect_error(adapter);
return -1; return -1;
} }
......
...@@ -179,7 +179,7 @@ static void be_intr_set(struct be_adapter *adapter, bool enable) ...@@ -179,7 +179,7 @@ static void be_intr_set(struct be_adapter *adapter, bool enable)
if (lancer_chip(adapter)) if (lancer_chip(adapter))
return; return;
if (adapter->eeh_error) if (be_check_error(adapter, BE_ERROR_EEH))
return; return;
status = be_cmd_intr_set(adapter, enable); status = be_cmd_intr_set(adapter, enable);
...@@ -191,6 +191,9 @@ static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted) ...@@ -191,6 +191,9 @@ static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
{ {
u32 val = 0; u32 val = 0;
if (be_check_error(adapter, BE_ERROR_HW))
return;
val |= qid & DB_RQ_RING_ID_MASK; val |= qid & DB_RQ_RING_ID_MASK;
val |= posted << DB_RQ_NUM_POSTED_SHIFT; val |= posted << DB_RQ_NUM_POSTED_SHIFT;
...@@ -203,6 +206,9 @@ static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo, ...@@ -203,6 +206,9 @@ static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
{ {
u32 val = 0; u32 val = 0;
if (be_check_error(adapter, BE_ERROR_HW))
return;
val |= txo->q.id & DB_TXULP_RING_ID_MASK; val |= txo->q.id & DB_TXULP_RING_ID_MASK;
val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT; val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
...@@ -219,7 +225,7 @@ static void be_eq_notify(struct be_adapter *adapter, u16 qid, ...@@ -219,7 +225,7 @@ static void be_eq_notify(struct be_adapter *adapter, u16 qid,
val |= qid & DB_EQ_RING_ID_MASK; val |= qid & DB_EQ_RING_ID_MASK;
val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT); val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
if (adapter->eeh_error) if (be_check_error(adapter, BE_ERROR_HW))
return; return;
if (arm) if (arm)
...@@ -240,7 +246,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped) ...@@ -240,7 +246,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
val |= ((qid & DB_CQ_RING_ID_EXT_MASK) << val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
DB_CQ_RING_ID_EXT_MASK_SHIFT); DB_CQ_RING_ID_EXT_MASK_SHIFT);
if (adapter->eeh_error) if (be_check_error(adapter, BE_ERROR_HW))
return; return;
if (arm) if (arm)
...@@ -2324,7 +2330,9 @@ static void be_rx_cq_clean(struct be_rx_obj *rxo) ...@@ -2324,7 +2330,9 @@ static void be_rx_cq_clean(struct be_rx_obj *rxo)
if (lancer_chip(adapter)) if (lancer_chip(adapter))
break; break;
if (flush_wait++ > 10 || be_hw_error(adapter)) { if (flush_wait++ > 50 ||
be_check_error(adapter,
BE_ERROR_HW)) {
dev_warn(&adapter->pdev->dev, dev_warn(&adapter->pdev->dev,
"did not receive flush compl\n"); "did not receive flush compl\n");
break; break;
...@@ -2385,7 +2393,8 @@ static void be_tx_compl_clean(struct be_adapter *adapter) ...@@ -2385,7 +2393,8 @@ static void be_tx_compl_clean(struct be_adapter *adapter)
pending_txqs--; pending_txqs--;
} }
if (pending_txqs == 0 || ++timeo > 10 || be_hw_error(adapter)) if (pending_txqs == 0 || ++timeo > 10 ||
be_check_error(adapter, BE_ERROR_HW))
break; break;
mdelay(1); mdelay(1);
...@@ -2995,22 +3004,19 @@ void be_detect_error(struct be_adapter *adapter) ...@@ -2995,22 +3004,19 @@ void be_detect_error(struct be_adapter *adapter)
u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0; u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
u32 i; u32 i;
bool error_detected = false;
struct device *dev = &adapter->pdev->dev; struct device *dev = &adapter->pdev->dev;
struct net_device *netdev = adapter->netdev;
if (be_hw_error(adapter)) if (be_check_error(adapter, BE_ERROR_HW))
return; return;
if (lancer_chip(adapter)) { if (lancer_chip(adapter)) {
sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
if (sliport_status & SLIPORT_STATUS_ERR_MASK) { if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
be_set_error(adapter, BE_ERROR_UE);
sliport_err1 = ioread32(adapter->db + sliport_err1 = ioread32(adapter->db +
SLIPORT_ERROR1_OFFSET); SLIPORT_ERROR1_OFFSET);
sliport_err2 = ioread32(adapter->db + sliport_err2 = ioread32(adapter->db +
SLIPORT_ERROR2_OFFSET); SLIPORT_ERROR2_OFFSET);
adapter->hw_error = true;
error_detected = true;
/* Do not log error messages if its a FW reset */ /* Do not log error messages if its a FW reset */
if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
sliport_err2 == SLIPORT_ERROR_FW_RESET2) { sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
...@@ -3042,12 +3048,12 @@ void be_detect_error(struct be_adapter *adapter) ...@@ -3042,12 +3048,12 @@ void be_detect_error(struct be_adapter *adapter)
*/ */
if (ue_lo || ue_hi) { if (ue_lo || ue_hi) {
error_detected = true;
dev_err(dev, dev_err(dev,
"Unrecoverable Error detected in the adapter"); "Unrecoverable Error detected in the adapter");
dev_err(dev, "Please reboot server to recover"); dev_err(dev, "Please reboot server to recover");
if (skyhawk_chip(adapter)) if (skyhawk_chip(adapter))
adapter->hw_error = true; be_set_error(adapter, BE_ERROR_UE);
for (i = 0; ue_lo; ue_lo >>= 1, i++) { for (i = 0; ue_lo; ue_lo >>= 1, i++) {
if (ue_lo & 1) if (ue_lo & 1)
dev_err(dev, "UE: %s bit set\n", dev_err(dev, "UE: %s bit set\n",
...@@ -3060,8 +3066,6 @@ void be_detect_error(struct be_adapter *adapter) ...@@ -3060,8 +3066,6 @@ void be_detect_error(struct be_adapter *adapter)
} }
} }
} }
if (error_detected)
netif_carrier_off(netdev);
} }
static void be_msix_disable(struct be_adapter *adapter) static void be_msix_disable(struct be_adapter *adapter)
...@@ -4183,7 +4187,7 @@ static int be_func_init(struct be_adapter *adapter) ...@@ -4183,7 +4187,7 @@ static int be_func_init(struct be_adapter *adapter)
msleep(100); msleep(100);
/* We can clear all errors when function reset succeeds */ /* We can clear all errors when function reset succeeds */
be_clear_all_error(adapter); be_clear_error(adapter, BE_CLEAR_ALL);
} }
/* Tell FW we're ready to fire cmds */ /* Tell FW we're ready to fire cmds */
...@@ -5204,7 +5208,7 @@ static void be_err_detection_task(struct work_struct *work) ...@@ -5204,7 +5208,7 @@ static void be_err_detection_task(struct work_struct *work)
be_detect_error(adapter); be_detect_error(adapter);
if (adapter->hw_error) { if (be_check_error(adapter, BE_ERROR_HW)) {
be_cleanup(adapter); be_cleanup(adapter);
/* As of now error recovery support is in Lancer only */ /* As of now error recovery support is in Lancer only */
...@@ -5715,8 +5719,8 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, ...@@ -5715,8 +5719,8 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
dev_err(&adapter->pdev->dev, "EEH error detected\n"); dev_err(&adapter->pdev->dev, "EEH error detected\n");
if (!adapter->eeh_error) { if (!be_check_error(adapter, BE_ERROR_EEH)) {
adapter->eeh_error = true; be_set_error(adapter, BE_ERROR_EEH);
be_cancel_err_detection(adapter); be_cancel_err_detection(adapter);
...@@ -5763,7 +5767,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev) ...@@ -5763,7 +5767,7 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_DISCONNECT; return PCI_ERS_RESULT_DISCONNECT;
pci_cleanup_aer_uncorrect_error_status(pdev); pci_cleanup_aer_uncorrect_error_status(pdev);
be_clear_all_error(adapter); be_clear_error(adapter, BE_CLEAR_ALL);
return PCI_ERS_RESULT_RECOVERED; return PCI_ERS_RESULT_RECOVERED;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment