Commit bf23ffc8 authored by Thinh Tran's avatar Thinh Tran Committed by Jakub Kicinski

bnx2x: new flag for track HW resource allocation

While injecting PCIe errors to the upstream PCIe switch of
a BCM57810 NIC, system hangs/crashes were observed.

After several calls to bnx2x_tx_timout() complete,
bnx2x_nic_unload() is called to free up HW resources
and bnx2x_napi_disable() is called to release NAPI objects.
Later, when the EEH driver calls bnx2x_io_slot_reset() to
complete the recovery process, bnx2x attempts to disable
NAPI again by calling bnx2x_napi_disable() and freeing
resources which have already been freed, resulting in a
hang or crash.

Introduce a new flag to track the HW resource and NAPI
allocation state, refactor duplicated code into a single
function, check page pool allocation status before freeing,
and reduces debug output when a TX timeout event occurs.
Reviewed-by: default avatarManish Chopra <manishc@marvell.com>
Tested-by: default avatarAbdul Haleem <abdhalee@in.ibm.com>
Tested-by: default avatarDavid Christensen <drc@linux.vnet.ibm.com>
Reviewed-by: default avatarSimon Horman <simon.horman@corigine.com>
Tested-by: default avatarVenkata Sai Duggi <venkata.sai.duggi@ibm.com>
Signed-off-by: default avatarThinh Tran <thinhtr@linux.vnet.ibm.com>
Link: https://lore.kernel.org/r/20230818161443.708785-2-thinhtr@linux.vnet.ibm.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 6dc5774d
...@@ -1508,6 +1508,8 @@ struct bnx2x { ...@@ -1508,6 +1508,8 @@ struct bnx2x {
bool cnic_loaded; bool cnic_loaded;
struct cnic_eth_dev *(*cnic_probe)(struct net_device *); struct cnic_eth_dev *(*cnic_probe)(struct net_device *);
bool nic_stopped;
/* Flag that indicates that we can start looking for FCoE L2 queue /* Flag that indicates that we can start looking for FCoE L2 queue
* completions in the default status block. * completions in the default status block.
*/ */
......
...@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) ...@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
bnx2x_add_all_napi(bp); bnx2x_add_all_napi(bp);
DP(NETIF_MSG_IFUP, "napi added\n"); DP(NETIF_MSG_IFUP, "napi added\n");
bnx2x_napi_enable(bp); bnx2x_napi_enable(bp);
bp->nic_stopped = false;
if (IS_PF(bp)) { if (IS_PF(bp)) {
/* set pf load just before approaching the MCP */ /* set pf load just before approaching the MCP */
...@@ -2960,6 +2961,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) ...@@ -2960,6 +2961,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
load_error1: load_error1:
bnx2x_napi_disable(bp); bnx2x_napi_disable(bp);
bnx2x_del_all_napi(bp); bnx2x_del_all_napi(bp);
bp->nic_stopped = true;
/* clear pf_load status, as it was already set */ /* clear pf_load status, as it was already set */
if (IS_PF(bp)) if (IS_PF(bp))
...@@ -3095,6 +3097,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) ...@@ -3095,6 +3097,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
if (!CHIP_IS_E1x(bp)) if (!CHIP_IS_E1x(bp))
bnx2x_pf_disable(bp); bnx2x_pf_disable(bp);
if (!bp->nic_stopped) {
/* Disable HW interrupts, NAPI */ /* Disable HW interrupts, NAPI */
bnx2x_netif_stop(bp, 1); bnx2x_netif_stop(bp, 1);
/* Delete all NAPI objects */ /* Delete all NAPI objects */
...@@ -3103,6 +3106,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link) ...@@ -3103,6 +3106,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
bnx2x_del_all_napi_cnic(bp); bnx2x_del_all_napi_cnic(bp);
/* Release IRQs */ /* Release IRQs */
bnx2x_free_irq(bp); bnx2x_free_irq(bp);
bp->nic_stopped = true;
}
/* Report UNLOAD_DONE to MCP */ /* Report UNLOAD_DONE to MCP */
bnx2x_send_unload_done(bp, false); bnx2x_send_unload_done(bp, false);
......
...@@ -9474,6 +9474,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) ...@@ -9474,6 +9474,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
} }
} }
if (!bp->nic_stopped) {
/* Disable HW interrupts, NAPI */ /* Disable HW interrupts, NAPI */
bnx2x_netif_stop(bp, 1); bnx2x_netif_stop(bp, 1);
/* Delete all NAPI objects */ /* Delete all NAPI objects */
...@@ -9483,6 +9484,8 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link) ...@@ -9483,6 +9484,8 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
/* Release IRQs */ /* Release IRQs */
bnx2x_free_irq(bp); bnx2x_free_irq(bp);
bp->nic_stopped = true;
}
/* Reset the chip, unless PCI function is offline. If we reach this /* Reset the chip, unless PCI function is offline. If we reach this
* point following a PCI error handling, it means device is really * point following a PCI error handling, it means device is really
...@@ -14238,6 +14241,7 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) ...@@ -14238,6 +14241,7 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
} }
bnx2x_drain_tx_queues(bp); bnx2x_drain_tx_queues(bp);
bnx2x_send_unload_req(bp, UNLOAD_RECOVERY); bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
if (!bp->nic_stopped) {
bnx2x_netif_stop(bp, 1); bnx2x_netif_stop(bp, 1);
bnx2x_del_all_napi(bp); bnx2x_del_all_napi(bp);
...@@ -14245,6 +14249,8 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev) ...@@ -14245,6 +14249,8 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
bnx2x_del_all_napi_cnic(bp); bnx2x_del_all_napi_cnic(bp);
bnx2x_free_irq(bp); bnx2x_free_irq(bp);
bp->nic_stopped = true;
}
/* Report UNLOAD_DONE to MCP */ /* Report UNLOAD_DONE to MCP */
bnx2x_send_unload_done(bp, true); bnx2x_send_unload_done(bp, true);
......
...@@ -529,6 +529,7 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp) ...@@ -529,6 +529,7 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
bnx2x_vfpf_finalize(bp, &req->first_tlv); bnx2x_vfpf_finalize(bp, &req->first_tlv);
free_irq: free_irq:
if (!bp->nic_stopped) {
/* Disable HW interrupts, NAPI */ /* Disable HW interrupts, NAPI */
bnx2x_netif_stop(bp, 0); bnx2x_netif_stop(bp, 0);
/* Delete all NAPI objects */ /* Delete all NAPI objects */
...@@ -536,6 +537,8 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp) ...@@ -536,6 +537,8 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
/* Release IRQs */ /* Release IRQs */
bnx2x_free_irq(bp); bnx2x_free_irq(bp);
bp->nic_stopped = true;
}
} }
static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf, static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment