Commit 278396de authored by Tomer Tayar's avatar Tomer Tayar Committed by David S. Miller

qede: Error recovery process

This patch adds the error recovery process in the qede driver.
The process includes a partial/customized driver unload and load, which
allows it to look like a short suspend period to the kernel while
preserving the net devices' state.
Signed-off-by: default avatarTomer Tayar <tomer.tayar@cavium.com>
Signed-off-by: default avatarAriel Elior <ariel.elior@cavium.com>
Signed-off-by: default avatarMichal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c75860e4
...@@ -162,6 +162,7 @@ struct qede_rdma_dev { ...@@ -162,6 +162,7 @@ struct qede_rdma_dev {
struct list_head entry; struct list_head entry;
struct list_head rdma_event_list; struct list_head rdma_event_list;
struct workqueue_struct *rdma_wq; struct workqueue_struct *rdma_wq;
bool exp_recovery;
}; };
struct qede_ptp; struct qede_ptp;
...@@ -264,6 +265,7 @@ struct qede_dev { ...@@ -264,6 +265,7 @@ struct qede_dev {
enum QEDE_STATE { enum QEDE_STATE {
QEDE_STATE_CLOSED, QEDE_STATE_CLOSED,
QEDE_STATE_OPEN, QEDE_STATE_OPEN,
QEDE_STATE_RECOVERY,
}; };
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) #define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo))
...@@ -462,6 +464,7 @@ struct qede_fastpath { ...@@ -462,6 +464,7 @@ struct qede_fastpath {
#define QEDE_CSUM_UNNECESSARY BIT(1) #define QEDE_CSUM_UNNECESSARY BIT(1)
#define QEDE_TUNN_CSUM_UNNECESSARY BIT(2) #define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
#define QEDE_SP_RECOVERY 0
#define QEDE_SP_RX_MODE 1 #define QEDE_SP_RX_MODE 1
#ifdef CONFIG_RFS_ACCEL #ifdef CONFIG_RFS_ACCEL
......
...@@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id); ...@@ -133,23 +133,12 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
static void qede_remove(struct pci_dev *pdev); static void qede_remove(struct pci_dev *pdev);
static void qede_shutdown(struct pci_dev *pdev); static void qede_shutdown(struct pci_dev *pdev);
static void qede_link_update(void *dev, struct qed_link_output *link); static void qede_link_update(void *dev, struct qed_link_output *link);
static void qede_schedule_recovery_handler(void *dev);
static void qede_recovery_handler(struct qede_dev *edev);
static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_eth_tlv_data(void *edev, void *data);
static void qede_get_generic_tlv_data(void *edev, static void qede_get_generic_tlv_data(void *edev,
struct qed_generic_tlvs *data); struct qed_generic_tlvs *data);
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
#ifdef CONFIG_QED_SRIOV #ifdef CONFIG_QED_SRIOV
static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
__be16 vlan_proto) __be16 vlan_proto)
...@@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = { ...@@ -231,6 +220,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
.arfs_filter_op = qede_arfs_filter_op, .arfs_filter_op = qede_arfs_filter_op,
#endif #endif
.link_update = qede_link_update, .link_update = qede_link_update,
.schedule_recovery_handler = qede_schedule_recovery_handler,
.get_generic_tlv_data = qede_get_generic_tlv_data, .get_generic_tlv_data = qede_get_generic_tlv_data,
.get_protocol_tlv_data = qede_get_eth_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data,
}, },
...@@ -950,11 +940,57 @@ static int qede_alloc_fp_array(struct qede_dev *edev) ...@@ -950,11 +940,57 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
return -ENOMEM; return -ENOMEM;
} }
/* The qede lock is used to protect driver state change and driver flows that
* are not reentrant.
*/
void __qede_lock(struct qede_dev *edev)
{
mutex_lock(&edev->qede_lock);
}
void __qede_unlock(struct qede_dev *edev)
{
mutex_unlock(&edev->qede_lock);
}
/* This version of the lock should be used when acquiring the RTNL lock is also
* needed in addition to the internal qede lock.
*/
void qede_lock(struct qede_dev *edev)
{
rtnl_lock();
__qede_lock(edev);
}
void qede_unlock(struct qede_dev *edev)
{
__qede_unlock(edev);
rtnl_unlock();
}
static void qede_sp_task(struct work_struct *work) static void qede_sp_task(struct work_struct *work)
{ {
struct qede_dev *edev = container_of(work, struct qede_dev, struct qede_dev *edev = container_of(work, struct qede_dev,
sp_task.work); sp_task.work);
/* The locking scheme depends on the specific flag:
* In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to
* ensure that ongoing flows are ended and new ones are not started.
* In other cases - only the internal qede lock should be acquired.
*/
if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) {
#ifdef CONFIG_QED_SRIOV
/* SRIOV must be disabled outside the lock to avoid a deadlock.
* The recovery of the active VFs is currently not supported.
*/
qede_sriov_configure(edev->pdev, 0);
#endif
qede_lock(edev);
qede_recovery_handler(edev);
qede_unlock(edev);
}
__qede_lock(edev); __qede_lock(edev);
if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags)) if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
...@@ -1031,8 +1067,13 @@ static void qede_log_probe(struct qede_dev *edev) ...@@ -1031,8 +1067,13 @@ static void qede_log_probe(struct qede_dev *edev)
enum qede_probe_mode { enum qede_probe_mode {
QEDE_PROBE_NORMAL, QEDE_PROBE_NORMAL,
QEDE_PROBE_RECOVERY,
}; };
#define QEDE_RDMA_PROBE_MODE(mode) \
((mode) == QEDE_PROBE_NORMAL ? QEDE_RDMA_PROBE_NORMAL \
: QEDE_RDMA_PROBE_RECOVERY)
static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
bool is_vf, enum qede_probe_mode mode) bool is_vf, enum qede_probe_mode mode)
{ {
...@@ -1051,6 +1092,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, ...@@ -1051,6 +1092,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
probe_params.dp_module = dp_module; probe_params.dp_module = dp_module;
probe_params.dp_level = dp_level; probe_params.dp_level = dp_level;
probe_params.is_vf = is_vf; probe_params.is_vf = is_vf;
probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY);
cdev = qed_ops->common->probe(pdev, &probe_params); cdev = qed_ops->common->probe(pdev, &probe_params);
if (!cdev) { if (!cdev) {
rc = -ENODEV; rc = -ENODEV;
...@@ -1078,11 +1120,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, ...@@ -1078,11 +1120,20 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
if (rc) if (rc)
goto err2; goto err2;
edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, if (mode != QEDE_PROBE_RECOVERY) {
dp_level); edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module,
if (!edev) { dp_level);
rc = -ENOMEM; if (!edev) {
goto err2; rc = -ENOMEM;
goto err2;
}
} else {
struct net_device *ndev = pci_get_drvdata(pdev);
edev = netdev_priv(ndev);
edev->cdev = cdev;
memset(&edev->stats, 0, sizeof(edev->stats));
memcpy(&edev->dev_info, &dev_info, sizeof(dev_info));
} }
if (is_vf) if (is_vf)
...@@ -1090,28 +1141,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, ...@@ -1090,28 +1141,31 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
qede_init_ndev(edev); qede_init_ndev(edev);
rc = qede_rdma_dev_add(edev); rc = qede_rdma_dev_add(edev, QEDE_RDMA_PROBE_MODE(mode));
if (rc) if (rc)
goto err3; goto err3;
/* Prepare the lock prior to the registration of the netdev, if (mode != QEDE_PROBE_RECOVERY) {
* as once it's registered we might reach flows requiring it /* Prepare the lock prior to the registration of the netdev,
* [it's even possible to reach a flow needing it directly * as once it's registered we might reach flows requiring it
* from there, although it's unlikely]. * [it's even possible to reach a flow needing it directly
*/ * from there, although it's unlikely].
INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); */
mutex_init(&edev->qede_lock); INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
rc = register_netdev(edev->ndev); mutex_init(&edev->qede_lock);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n"); rc = register_netdev(edev->ndev);
goto err4; if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
goto err4;
}
} }
edev->ops->common->set_name(cdev, edev->ndev->name); edev->ops->common->set_name(cdev, edev->ndev->name);
/* PTP not supported on VFs */ /* PTP not supported on VFs */
if (!is_vf) if (!is_vf)
qede_ptp_enable(edev, true); qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
edev->ops->register_ops(cdev, &qede_ll_ops, edev); edev->ops->register_ops(cdev, &qede_ll_ops, edev);
...@@ -1126,7 +1180,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, ...@@ -1126,7 +1180,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
return 0; return 0;
err4: err4:
qede_rdma_dev_remove(edev); qede_rdma_dev_remove(edev, QEDE_RDMA_PROBE_MODE(mode));
err3: err3:
free_netdev(edev->ndev); free_netdev(edev->ndev);
err2: err2:
...@@ -1162,8 +1216,13 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -1162,8 +1216,13 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
enum qede_remove_mode { enum qede_remove_mode {
QEDE_REMOVE_NORMAL, QEDE_REMOVE_NORMAL,
QEDE_REMOVE_RECOVERY,
}; };
#define QEDE_RDMA_REMOVE_MODE(mode) \
((mode) == QEDE_REMOVE_NORMAL ? QEDE_RDMA_REMOVE_NORMAL \
: QEDE_RDMA_REMOVE_RECOVERY)
static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
{ {
struct net_device *ndev = pci_get_drvdata(pdev); struct net_device *ndev = pci_get_drvdata(pdev);
...@@ -1172,15 +1231,19 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) ...@@ -1172,15 +1231,19 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
DP_INFO(edev, "Starting qede_remove\n"); DP_INFO(edev, "Starting qede_remove\n");
qede_rdma_dev_remove(edev); qede_rdma_dev_remove(edev, QEDE_RDMA_REMOVE_MODE(mode));
unregister_netdev(ndev);
cancel_delayed_work_sync(&edev->sp_task);
qede_ptp_disable(edev); if (mode != QEDE_REMOVE_RECOVERY) {
unregister_netdev(ndev);
edev->ops->common->set_power_state(cdev, PCI_D0); cancel_delayed_work_sync(&edev->sp_task);
pci_set_drvdata(pdev, NULL); edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
}
qede_ptp_disable(edev);
/* Use global ops since we've freed edev */ /* Use global ops since we've freed edev */
qed_ops->common->slowpath_stop(cdev); qed_ops->common->slowpath_stop(cdev);
...@@ -1194,7 +1257,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) ...@@ -1194,7 +1257,8 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
* [e.g., QED register callbacks] won't break anything when * [e.g., QED register callbacks] won't break anything when
* accessing the netdevice. * accessing the netdevice.
*/ */
free_netdev(ndev); if (mode != QEDE_REMOVE_RECOVERY)
free_netdev(ndev);
dev_info(&pdev->dev, "Ending qede_remove successfully\n"); dev_info(&pdev->dev, "Ending qede_remove successfully\n");
} }
...@@ -1539,6 +1603,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev) ...@@ -1539,6 +1603,58 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
return 0; return 0;
} }
static void qede_empty_tx_queue(struct qede_dev *edev,
struct qede_tx_queue *txq)
{
unsigned int pkts_compl = 0, bytes_compl = 0;
struct netdev_queue *netdev_txq;
int rc, len = 0;
netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id);
while (qed_chain_get_cons_idx(&txq->tx_pbl) !=
qed_chain_get_prod_idx(&txq->tx_pbl)) {
DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
"Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index, qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
rc = qede_free_tx_pkt(edev, txq, &len);
if (rc) {
DP_NOTICE(edev,
"Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n",
txq->index,
qed_chain_get_cons_idx(&txq->tx_pbl),
qed_chain_get_prod_idx(&txq->tx_pbl));
break;
}
bytes_compl += len;
pkts_compl++;
txq->sw_tx_cons++;
}
netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
}
static void qede_empty_tx_queues(struct qede_dev *edev)
{
int i;
for_each_queue(i)
if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
int cos;
for_each_cos_in_txq(edev, cos) {
struct qede_fastpath *fp;
fp = &edev->fp_array[i];
qede_empty_tx_queue(edev,
&fp->txq[cos]);
}
}
}
/* This function inits fp content and resets the SB, RXQ and TXQ structures */ /* This function inits fp content and resets the SB, RXQ and TXQ structures */
static void qede_init_fp(struct qede_dev *edev) static void qede_init_fp(struct qede_dev *edev)
{ {
...@@ -2053,6 +2169,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) ...@@ -2053,6 +2169,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
enum qede_unload_mode { enum qede_unload_mode {
QEDE_UNLOAD_NORMAL, QEDE_UNLOAD_NORMAL,
QEDE_UNLOAD_RECOVERY,
}; };
static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
...@@ -2068,7 +2185,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, ...@@ -2068,7 +2185,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags); clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
edev->state = QEDE_STATE_CLOSED; if (mode != QEDE_UNLOAD_RECOVERY)
edev->state = QEDE_STATE_CLOSED;
qede_rdma_dev_event_close(edev); qede_rdma_dev_event_close(edev);
...@@ -2076,17 +2194,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, ...@@ -2076,17 +2194,20 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
netif_tx_disable(edev->ndev); netif_tx_disable(edev->ndev);
netif_carrier_off(edev->ndev); netif_carrier_off(edev->ndev);
/* Reset the link */ if (mode != QEDE_UNLOAD_RECOVERY) {
memset(&link_params, 0, sizeof(link_params)); /* Reset the link */
link_params.link_up = false; memset(&link_params, 0, sizeof(link_params));
edev->ops->common->set_link(edev->cdev, &link_params); link_params.link_up = false;
rc = qede_stop_queues(edev); edev->ops->common->set_link(edev->cdev, &link_params);
if (rc) {
qede_sync_free_irqs(edev);
goto out;
}
DP_INFO(edev, "Stopped Queues\n"); rc = qede_stop_queues(edev);
if (rc) {
qede_sync_free_irqs(edev);
goto out;
}
DP_INFO(edev, "Stopped Queues\n");
}
qede_vlan_mark_nonconfigured(edev); qede_vlan_mark_nonconfigured(edev);
edev->ops->fastpath_stop(edev->cdev); edev->ops->fastpath_stop(edev->cdev);
...@@ -2102,18 +2223,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, ...@@ -2102,18 +2223,26 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
qede_napi_disable_remove(edev); qede_napi_disable_remove(edev);
if (mode == QEDE_UNLOAD_RECOVERY)
qede_empty_tx_queues(edev);
qede_free_mem_load(edev); qede_free_mem_load(edev);
qede_free_fp_array(edev); qede_free_fp_array(edev);
out: out:
if (!is_locked) if (!is_locked)
__qede_unlock(edev); __qede_unlock(edev);
if (mode != QEDE_UNLOAD_RECOVERY)
DP_NOTICE(edev, "Link is down\n");
DP_INFO(edev, "Ending qede unload\n"); DP_INFO(edev, "Ending qede unload\n");
} }
enum qede_load_mode { enum qede_load_mode {
QEDE_LOAD_NORMAL, QEDE_LOAD_NORMAL,
QEDE_LOAD_RELOAD, QEDE_LOAD_RELOAD,
QEDE_LOAD_RECOVERY,
}; };
static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
...@@ -2293,6 +2422,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link) ...@@ -2293,6 +2422,77 @@ static void qede_link_update(void *dev, struct qed_link_output *link)
} }
} }
static void qede_schedule_recovery_handler(void *dev)
{
struct qede_dev *edev = dev;
if (edev->state == QEDE_STATE_RECOVERY) {
DP_NOTICE(edev,
"Avoid scheduling a recovery handling since already in recovery state\n");
return;
}
set_bit(QEDE_SP_RECOVERY, &edev->sp_flags);
schedule_delayed_work(&edev->sp_task, 0);
DP_INFO(edev, "Scheduled a recovery handler\n");
}
static void qede_recovery_failed(struct qede_dev *edev)
{
netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n");
netif_device_detach(edev->ndev);
if (edev->cdev)
edev->ops->common->set_power_state(edev->cdev, PCI_D3hot);
}
static void qede_recovery_handler(struct qede_dev *edev)
{
u32 curr_state = edev->state;
int rc;
DP_NOTICE(edev, "Starting a recovery process\n");
/* No need to acquire first the qede_lock since is done by qede_sp_task
* before calling this function.
*/
edev->state = QEDE_STATE_RECOVERY;
edev->ops->common->recovery_prolog(edev->cdev);
if (curr_state == QEDE_STATE_OPEN)
qede_unload(edev, QEDE_UNLOAD_RECOVERY, true);
__qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY);
rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level,
IS_VF(edev), QEDE_PROBE_RECOVERY);
if (rc) {
edev->cdev = NULL;
goto err;
}
if (curr_state == QEDE_STATE_OPEN) {
rc = qede_load(edev, QEDE_LOAD_RECOVERY, true);
if (rc)
goto err;
qede_config_rx_mode(edev->ndev);
udp_tunnel_get_rx_info(edev->ndev);
}
edev->state = curr_state;
DP_NOTICE(edev, "Recovery handling is done\n");
return;
err:
qede_recovery_failed(edev);
}
static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
{ {
struct netdev_queue *netdev_txq; struct netdev_queue *netdev_txq;
......
...@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) ...@@ -50,6 +50,8 @@ static void _qede_rdma_dev_add(struct qede_dev *edev)
if (!qedr_drv) if (!qedr_drv)
return; return;
/* Leftovers from previous error recovery */
edev->rdma_info.exp_recovery = false;
edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
edev->ndev); edev->ndev);
} }
...@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev) ...@@ -87,21 +89,26 @@ static void qede_rdma_destroy_wq(struct qede_dev *edev)
destroy_workqueue(edev->rdma_info.rdma_wq); destroy_workqueue(edev->rdma_info.rdma_wq);
} }
int qede_rdma_dev_add(struct qede_dev *edev) int qede_rdma_dev_add(struct qede_dev *edev, enum qede_rdma_probe_mode mode)
{ {
int rc = 0; int rc;
if (qede_rdma_supported(edev)) { if (!qede_rdma_supported(edev))
rc = qede_rdma_create_wq(edev); return 0;
if (rc)
return rc;
INIT_LIST_HEAD(&edev->rdma_info.entry); /* Cannot start qedr while recovering since it wasn't fully stopped */
mutex_lock(&qedr_dev_list_lock); if (mode == QEDE_RDMA_PROBE_RECOVERY)
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); return 0;
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock); rc = qede_rdma_create_wq(edev);
} if (rc)
return rc;
INIT_LIST_HEAD(&edev->rdma_info.entry);
mutex_lock(&qedr_dev_list_lock);
list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
_qede_rdma_dev_add(edev);
mutex_unlock(&qedr_dev_list_lock);
return rc; return rc;
} }
...@@ -110,19 +117,31 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev) ...@@ -110,19 +117,31 @@ static void _qede_rdma_dev_remove(struct qede_dev *edev)
{ {
if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
qedr_drv->remove(edev->rdma_info.qedr_dev); qedr_drv->remove(edev->rdma_info.qedr_dev);
edev->rdma_info.qedr_dev = NULL;
} }
void qede_rdma_dev_remove(struct qede_dev *edev) void qede_rdma_dev_remove(struct qede_dev *edev,
enum qede_rdma_remove_mode mode)
{ {
if (!qede_rdma_supported(edev)) if (!qede_rdma_supported(edev))
return; return;
qede_rdma_destroy_wq(edev); /* Cannot remove qedr while recovering since it wasn't fully stopped */
mutex_lock(&qedr_dev_list_lock); if (mode == QEDE_RDMA_REMOVE_NORMAL) {
_qede_rdma_dev_remove(edev); qede_rdma_destroy_wq(edev);
list_del(&edev->rdma_info.entry); mutex_lock(&qedr_dev_list_lock);
mutex_unlock(&qedr_dev_list_lock); if (!edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev);
edev->rdma_info.qedr_dev = NULL;
list_del(&edev->rdma_info.entry);
mutex_unlock(&qedr_dev_list_lock);
} else {
if (!edev->rdma_info.exp_recovery) {
mutex_lock(&qedr_dev_list_lock);
_qede_rdma_dev_remove(edev);
mutex_unlock(&qedr_dev_list_lock);
}
edev->rdma_info.exp_recovery = true;
}
} }
static void _qede_rdma_dev_open(struct qede_dev *edev) static void _qede_rdma_dev_open(struct qede_dev *edev)
...@@ -204,7 +223,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv) ...@@ -204,7 +223,8 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv)
mutex_lock(&qedr_dev_list_lock); mutex_lock(&qedr_dev_list_lock);
list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
if (edev->rdma_info.qedr_dev) /* If device has experienced recovery it was already removed */
if (edev->rdma_info.qedr_dev && !edev->rdma_info.exp_recovery)
_qede_rdma_dev_remove(edev); _qede_rdma_dev_remove(edev);
} }
qedr_drv = NULL; qedr_drv = NULL;
...@@ -284,6 +304,10 @@ static void qede_rdma_add_event(struct qede_dev *edev, ...@@ -284,6 +304,10 @@ static void qede_rdma_add_event(struct qede_dev *edev,
{ {
struct qede_rdma_event_work *event_node; struct qede_rdma_event_work *event_node;
/* If a recovery was experienced avoid adding the event */
if (edev->rdma_info.exp_recovery)
return;
if (!edev->rdma_info.qedr_dev) if (!edev->rdma_info.qedr_dev)
return; return;
......
...@@ -55,6 +55,16 @@ struct qede_rdma_event_work { ...@@ -55,6 +55,16 @@ struct qede_rdma_event_work {
enum qede_rdma_event event; enum qede_rdma_event event;
}; };
enum qede_rdma_probe_mode {
QEDE_RDMA_PROBE_NORMAL,
QEDE_RDMA_PROBE_RECOVERY,
};
enum qede_rdma_remove_mode {
QEDE_RDMA_REMOVE_NORMAL,
QEDE_RDMA_REMOVE_RECOVERY,
};
struct qedr_driver { struct qedr_driver {
unsigned char name[32]; unsigned char name[32];
...@@ -74,21 +84,24 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv); ...@@ -74,21 +84,24 @@ void qede_rdma_unregister_driver(struct qedr_driver *drv);
bool qede_rdma_supported(struct qede_dev *dev); bool qede_rdma_supported(struct qede_dev *dev);
#if IS_ENABLED(CONFIG_QED_RDMA) #if IS_ENABLED(CONFIG_QED_RDMA)
int qede_rdma_dev_add(struct qede_dev *dev); int qede_rdma_dev_add(struct qede_dev *dev, enum qede_rdma_probe_mode mode);
void qede_rdma_dev_event_open(struct qede_dev *dev); void qede_rdma_dev_event_open(struct qede_dev *dev);
void qede_rdma_dev_event_close(struct qede_dev *dev); void qede_rdma_dev_event_close(struct qede_dev *dev);
void qede_rdma_dev_remove(struct qede_dev *dev); void qede_rdma_dev_remove(struct qede_dev *dev,
enum qede_rdma_remove_mode mode);
void qede_rdma_event_changeaddr(struct qede_dev *edr); void qede_rdma_event_changeaddr(struct qede_dev *edr);
#else #else
static inline int qede_rdma_dev_add(struct qede_dev *dev) static inline int qede_rdma_dev_add(struct qede_dev *dev,
enum qede_rdma_probe_mode mode)
{ {
return 0; return 0;
} }
static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {} static inline void qede_rdma_dev_event_open(struct qede_dev *dev) {}
static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {} static inline void qede_rdma_dev_event_close(struct qede_dev *dev) {}
static inline void qede_rdma_dev_remove(struct qede_dev *dev) {} static inline void qede_rdma_dev_remove(struct qede_dev *dev,
enum qede_rdma_remove_mode mode) {}
static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {} static inline void qede_rdma_event_changeaddr(struct qede_dev *edr) {}
#endif #endif
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment