Commit 55ad3592 authored by Yishai Hadas's avatar Yishai Hadas Committed by David S. Miller

net/mlx4_core: Enable device recovery flow with SRIOV

In SRIOV, both the PF and the VF may attempt device recovery whenever they
assume that the device is not functioning.  When the PF driver resets the
device, the VF should detect this and attempt to reinitialize itself.

The VF must be able to reset itself under all circumstances, even
if the PF is not responsive.

The VF shall reset itself in the following cases:

1. Commands are not processed within reasonable time over the communication channel.
This is done considering device state and the correct return code based on
the command as was done in the native mode, done in the next patch.

2. The VF driver receives an internal error event reported by the PF on the
communication channel. This occurs when the PF driver resets the device or
when VF is out of sync with the PF.

Add 'VF reset' capability, which allows the VF to reinitialize itself even when the
PF is not responsive.

As PF and VF may run their reset flow simulantanisly, there are several cases
that are handled:
- Prevent freeing VF resources upon FLR, when PF is in its unloading stage.
- Prevent PF getting VF commands before it has finished initializing its resources.
- Upon VF startup, check that comm-channel is online before sending
  commands to the PF and getting timed-out.
Signed-off-by: default avatarYishai Hadas <yishaih@mellanox.com>
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 2ba5fbd6
...@@ -45,8 +45,7 @@ enum { ...@@ -45,8 +45,7 @@ enum {
int mlx4_internal_err_reset = 1; int mlx4_internal_err_reset = 1;
module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset, MODULE_PARM_DESC(internal_err_reset,
"Reset device on internal errors if non-zero" "Reset device on internal errors if non-zero (default 1)");
" (default 1, in SRIOV mode default is 0)");
static int read_vendor_id(struct mlx4_dev *dev) static int read_vendor_id(struct mlx4_dev *dev)
{ {
...@@ -71,6 +70,9 @@ static int mlx4_reset_master(struct mlx4_dev *dev) ...@@ -71,6 +70,9 @@ static int mlx4_reset_master(struct mlx4_dev *dev)
{ {
int err = 0; int err = 0;
if (mlx4_is_master(dev))
mlx4_report_internal_err_comm_event(dev);
if (!pci_channel_offline(dev->persist->pdev)) { if (!pci_channel_offline(dev->persist->pdev)) {
err = read_vendor_id(dev); err = read_vendor_id(dev);
/* If PCI can't be accessed to read vendor ID we assume that its /* If PCI can't be accessed to read vendor ID we assume that its
...@@ -87,6 +89,81 @@ static int mlx4_reset_master(struct mlx4_dev *dev) ...@@ -87,6 +89,81 @@ static int mlx4_reset_master(struct mlx4_dev *dev)
return err; return err;
} }
static int mlx4_reset_slave(struct mlx4_dev *dev)
{
#define COM_CHAN_RST_REQ_OFFSET 0x10
#define COM_CHAN_RST_ACK_OFFSET 0x08
u32 comm_flags;
u32 rst_req;
u32 rst_ack;
unsigned long end;
struct mlx4_priv *priv = mlx4_priv(dev);
if (pci_channel_offline(dev->persist->pdev))
return 0;
comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
if (comm_flags == 0xffffffff) {
mlx4_err(dev, "VF reset is not needed\n");
return 0;
}
if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
mlx4_err(dev, "VF reset is not supported\n");
return -EOPNOTSUPP;
}
rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
COM_CHAN_RST_REQ_OFFSET;
rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
COM_CHAN_RST_ACK_OFFSET;
if (rst_req != rst_ack) {
mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
return -EIO;
}
rst_req ^= 1;
mlx4_warn(dev, "VF is sending reset request to Firmware\n");
comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
__raw_writel((__force u32)cpu_to_be32(comm_flags),
(__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
/* Make sure that our comm channel write doesn't
* get mixed in with writes from another CPU.
*/
mmiowb();
end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
while (time_before(jiffies, end)) {
comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
COM_CHAN_RST_ACK_OFFSET;
/* Reading rst_req again since the communication channel can
* be reset at any time by the PF and all its bits will be
* set to zero.
*/
rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
COM_CHAN_RST_REQ_OFFSET;
if (rst_ack == rst_req) {
mlx4_warn(dev, "VF Reset succeed\n");
return 0;
}
cond_resched();
}
mlx4_err(dev, "Fail to send reset over the communication channel\n");
return -ETIMEDOUT;
}
static int mlx4_comm_internal_err(u32 slave_read)
{
return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
(slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
}
void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
{ {
int err; int err;
...@@ -101,6 +178,9 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) ...@@ -101,6 +178,9 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
dev = persist->dev; dev = persist->dev;
mlx4_err(dev, "device is going to be reset\n"); mlx4_err(dev, "device is going to be reset\n");
if (mlx4_is_slave(dev))
err = mlx4_reset_slave(dev);
else
err = mlx4_reset_master(dev); err = mlx4_reset_master(dev);
BUG_ON(err != 0); BUG_ON(err != 0);
...@@ -148,8 +228,15 @@ static void poll_catas(unsigned long dev_ptr) ...@@ -148,8 +228,15 @@ static void poll_catas(unsigned long dev_ptr)
{ {
struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
u32 slave_read;
if (readl(priv->catas_err.map)) { if (mlx4_is_slave(dev)) {
slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
if (mlx4_comm_internal_err(slave_read)) {
mlx4_warn(dev, "Internal error detected on the communication channel\n");
goto internal_err;
}
} else if (readl(priv->catas_err.map)) {
dump_err_buf(dev); dump_err_buf(dev);
goto internal_err; goto internal_err;
} }
...@@ -182,23 +269,22 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) ...@@ -182,23 +269,22 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
phys_addr_t addr; phys_addr_t addr;
/*If we are in SRIOV the default of the module param must be 0*/
if (mlx4_is_mfunc(dev))
mlx4_internal_err_reset = 0;
INIT_LIST_HEAD(&priv->catas_err.list); INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer); init_timer(&priv->catas_err.timer);
priv->catas_err.map = NULL; priv->catas_err.map = NULL;
addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) + if (!mlx4_is_slave(dev)) {
addr = pci_resource_start(dev->persist->pdev,
priv->fw.catas_bar) +
priv->fw.catas_offset; priv->fw.catas_offset;
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) { if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
(unsigned long long) addr); (unsigned long long)addr);
return; return;
} }
}
priv->catas_err.timer.data = (unsigned long) dev; priv->catas_err.timer.data = (unsigned long) dev;
priv->catas_err.timer.function = poll_catas; priv->catas_err.timer.function = poll_catas;
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/mlx4/device.h> #include <linux/mlx4/device.h>
#include <linux/semaphore.h> #include <linux/semaphore.h>
#include <rdma/ib_smi.h> #include <rdma/ib_smi.h>
#include <linux/delay.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, ...@@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
EXPORT_SYMBOL_GPL(__mlx4_cmd); EXPORT_SYMBOL_GPL(__mlx4_cmd);
static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
{ {
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
...@@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, ...@@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
break; break;
case MLX4_COMM_CMD_VHCR_POST: case MLX4_COMM_CMD_VHCR_POST:
if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
(slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
slave, cmd, slave_state[slave].last_cmd);
goto reset_slave; goto reset_slave;
}
mutex_lock(&priv->cmd.slave_cmd_mutex); mutex_lock(&priv->cmd.slave_cmd_mutex);
if (mlx4_master_process_vhcr(dev, slave, NULL)) { if (mlx4_master_process_vhcr(dev, slave, NULL)) {
...@@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, ...@@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
reset_slave: reset_slave:
/* cleanup any slave resources */ /* cleanup any slave resources */
if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
mlx4_delete_all_resources_for_slave(dev, slave); mlx4_delete_all_resources_for_slave(dev, slave);
if (cmd != MLX4_COMM_CMD_RESET) {
mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
slave, cmd);
/* Turn on internal error letting slave reset itself immeditaly,
* otherwise it might take till timeout on command is passed
*/
reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
}
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (!slave_state[slave].is_slave_going_down) if (!slave_state[slave].is_slave_going_down)
slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
...@@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work) ...@@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work)
static int sync_toggles(struct mlx4_dev *dev) static int sync_toggles(struct mlx4_dev *dev)
{ {
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
int wr_toggle; u32 wr_toggle;
int rd_toggle; u32 rd_toggle;
unsigned long end; unsigned long end;
wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
if (wr_toggle == 0xffffffff)
end = jiffies + msecs_to_jiffies(30000);
else
end = jiffies + msecs_to_jiffies(5000); end = jiffies + msecs_to_jiffies(5000);
while (time_before(jiffies, end)) { while (time_before(jiffies, end)) {
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
if (rd_toggle == wr_toggle) { if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
priv->cmd.comm_toggle = rd_toggle; /* PCI might be offline */
msleep(100);
wr_toggle = swab32(readl(&priv->mfunc.comm->
slave_write));
continue;
}
if (rd_toggle >> 31 == wr_toggle >> 31) {
priv->cmd.comm_toggle = rd_toggle >> 31;
return 0; return 0;
} }
...@@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) ...@@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (mlx4_init_resource_tracker(dev)) if (mlx4_init_resource_tracker(dev))
goto err_thread; goto err_thread;
err = mlx4_ARM_COMM_CHANNEL(dev);
if (err) {
mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
err);
goto err_resource;
}
} else { } else {
err = sync_toggles(dev); err = sync_toggles(dev);
if (err) { if (err) {
...@@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) ...@@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
} }
return 0; return 0;
err_resource:
mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
err_thread: err_thread:
flush_workqueue(priv->mfunc.master.comm_wq); flush_workqueue(priv->mfunc.master.comm_wq);
destroy_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq);
...@@ -2266,6 +2283,27 @@ int mlx4_cmd_init(struct mlx4_dev *dev) ...@@ -2266,6 +2283,27 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
return -ENOMEM; return -ENOMEM;
} }
void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int slave;
u32 slave_read;
/* Report an internal error event to all
* communication channels.
*/
for (slave = 0; slave < dev->num_slaves; slave++) {
slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
__raw_writel((__force u32)cpu_to_be32(slave_read),
&priv->mfunc.comm[slave].slave_read);
/* Make sure that our comm channel write doesn't
* get mixed in with writes from another CPU.
*/
mmiowb();
}
}
void mlx4_multi_func_cleanup(struct mlx4_dev *dev) void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
{ {
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
...@@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) ...@@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.slave_state);
kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_admin);
kfree(priv->mfunc.master.vf_oper); kfree(priv->mfunc.master.vf_oper);
dev->num_slaves = 0;
} }
iounmap(priv->mfunc.comm); iounmap(priv->mfunc.comm);
......
...@@ -429,7 +429,13 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) ...@@ -429,7 +429,13 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
i); i);
/* In case of 'Reset flow' FLR can be generated for
* a slave before mlx4_load_one is done.
* make sure interface is up before trying to delete
* slave resources which weren't allocated yet.
*/
if (dev->persist->interface_state &
MLX4_INTERFACE_STATE_UP)
mlx4_delete_all_resources_for_slave(dev, i); mlx4_delete_all_resources_for_slave(dev, i);
/*return the slave to running mode*/ /*return the slave to running mode*/
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
......
...@@ -144,7 +144,6 @@ int mlx4_register_device(struct mlx4_dev *dev) ...@@ -144,7 +144,6 @@ int mlx4_register_device(struct mlx4_dev *dev)
mlx4_add_device(intf, priv); mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex); mutex_unlock(&intf_mutex);
if (!mlx4_is_slave(dev))
mlx4_start_catas_poll(dev); mlx4_start_catas_poll(dev);
return 0; return 0;
...@@ -155,7 +154,6 @@ void mlx4_unregister_device(struct mlx4_dev *dev) ...@@ -155,7 +154,6 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf; struct mlx4_interface *intf;
if (!mlx4_is_slave(dev))
mlx4_stop_catas_poll(dev); mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex); mutex_lock(&intf_mutex);
......
...@@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, ...@@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe,
MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
MLX4_FUNC_CAP_DMFS_A0_STATIC) MLX4_FUNC_CAP_DMFS_A0_STATIC)
#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV)
static char mlx4_version[] = static char mlx4_version[] =
DRV_NAME ": Mellanox ConnectX core driver v" DRV_NAME ": Mellanox ConnectX core driver v"
DRV_VERSION " (" DRV_RELDATE ")\n"; DRV_VERSION " (" DRV_RELDATE ")\n";
...@@ -1579,6 +1581,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev) ...@@ -1579,6 +1581,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev)
} }
} }
static int mlx4_comm_check_offline(struct mlx4_dev *dev)
{
#define COMM_CHAN_OFFLINE_OFFSET 0x09
u32 comm_flags;
u32 offline_bit;
unsigned long end;
struct mlx4_priv *priv = mlx4_priv(dev);
end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
while (time_before(jiffies, end)) {
comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
offline_bit = (comm_flags &
(u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
if (!offline_bit)
return 0;
/* There are cases as part of AER/Reset flow that PF needs
* around 100 msec to load. We therefore sleep for 100 msec
* to allow other tasks to make use of that CPU during this
* time interval.
*/
msleep(100);
}
mlx4_err(dev, "Communication channel is offline.\n");
return -EIO;
}
static void mlx4_reset_vf_support(struct mlx4_dev *dev)
{
#define COMM_CHAN_RST_OFFSET 0x1e
struct mlx4_priv *priv = mlx4_priv(dev);
u32 comm_rst;
u32 comm_caps;
comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_CAPS));
comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
if (comm_rst)
dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
}
static int mlx4_init_slave(struct mlx4_dev *dev) static int mlx4_init_slave(struct mlx4_dev *dev)
{ {
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
...@@ -1594,6 +1640,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev) ...@@ -1594,6 +1640,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
mutex_lock(&priv->cmd.slave_cmd_mutex); mutex_lock(&priv->cmd.slave_cmd_mutex);
priv->cmd.max_cmds = 1; priv->cmd.max_cmds = 1;
if (mlx4_comm_check_offline(dev)) {
mlx4_err(dev, "PF is not responsive, skipping initialization\n");
goto err_offline;
}
mlx4_reset_vf_support(dev);
mlx4_warn(dev, "Sending reset\n"); mlx4_warn(dev, "Sending reset\n");
ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
MLX4_COMM_TIME); MLX4_COMM_TIME);
...@@ -1637,6 +1689,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) ...@@ -1637,6 +1689,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
err: err:
mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
err_offline:
mutex_unlock(&priv->cmd.slave_cmd_mutex); mutex_unlock(&priv->cmd.slave_cmd_mutex);
return -EIO; return -EIO;
} }
...@@ -2494,11 +2547,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) ...@@ -2494,11 +2547,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
!!((flags) & MLX4_FLAG_MASTER)) !!((flags) & MLX4_FLAG_MASTER))
static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
u8 total_vfs, int existing_vfs) u8 total_vfs, int existing_vfs, int reset_flow)
{ {
u64 dev_flags = dev->flags; u64 dev_flags = dev->flags;
int err = 0; int err = 0;
if (reset_flow) {
dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
GFP_KERNEL);
if (!dev->dev_vfs)
goto free_mem;
return dev_flags;
}
atomic_inc(&pf_loading); atomic_inc(&pf_loading);
if (dev->flags & MLX4_FLAG_SRIOV) { if (dev->flags & MLX4_FLAG_SRIOV) {
if (existing_vfs != total_vfs) { if (existing_vfs != total_vfs) {
...@@ -2533,6 +2594,7 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, ...@@ -2533,6 +2594,7 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
disable_sriov: disable_sriov:
atomic_dec(&pf_loading); atomic_dec(&pf_loading);
free_mem:
dev->persist->num_vfs = 0; dev->persist->num_vfs = 0;
kfree(dev->dev_vfs); kfree(dev->dev_vfs);
return dev_flags & ~MLX4_FLAG_MASTER; return dev_flags & ~MLX4_FLAG_MASTER;
...@@ -2557,7 +2619,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap ...@@ -2557,7 +2619,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap
} }
static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
int total_vfs, int *nvfs, struct mlx4_priv *priv) int total_vfs, int *nvfs, struct mlx4_priv *priv,
int reset_flow)
{ {
struct mlx4_dev *dev; struct mlx4_dev *dev;
unsigned sum = 0; unsigned sum = 0;
...@@ -2679,8 +2742,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -2679,8 +2742,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
goto err_fw; goto err_fw;
if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, u64 dev_flags = mlx4_enable_sriov(dev, pdev,
existing_vfs); total_vfs,
existing_vfs,
reset_flow);
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
dev->flags = dev_flags; dev->flags = dev_flags;
...@@ -2722,7 +2787,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -2722,7 +2787,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
if (dev->flags & MLX4_FLAG_SRIOV) { if (dev->flags & MLX4_FLAG_SRIOV) {
if (!existing_vfs) if (!existing_vfs)
pci_disable_sriov(pdev); pci_disable_sriov(pdev);
if (mlx4_is_master(dev)) if (mlx4_is_master(dev) && !reset_flow)
atomic_dec(&pf_loading); atomic_dec(&pf_loading);
dev->flags &= ~MLX4_FLAG_SRIOV; dev->flags &= ~MLX4_FLAG_SRIOV;
} }
...@@ -2736,7 +2801,8 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -2736,7 +2801,8 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
} }
if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
existing_vfs, reset_flow);
if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
...@@ -2848,6 +2914,17 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -2848,6 +2914,17 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
goto err_steer; goto err_steer;
mlx4_init_quotas(dev); mlx4_init_quotas(dev);
/* When PF resources are ready arm its comm channel to enable
* getting commands
*/
if (mlx4_is_master(dev)) {
err = mlx4_ARM_COMM_CHANNEL(dev);
if (err) {
mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
err);
goto err_steer;
}
}
for (port = 1; port <= dev->caps.num_ports; port++) { for (port = 1; port <= dev->caps.num_ports; port++) {
err = mlx4_init_port_info(dev, port); err = mlx4_init_port_info(dev, port);
...@@ -2866,7 +2943,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -2866,7 +2943,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
priv->removed = 0; priv->removed = 0;
if (mlx4_is_master(dev) && dev->persist->num_vfs) if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
atomic_dec(&pf_loading); atomic_dec(&pf_loading);
kfree(dev_cap); kfree(dev_cap);
...@@ -2925,10 +3002,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -2925,10 +3002,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
err_sriov: err_sriov:
if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
pci_disable_sriov(pdev); pci_disable_sriov(pdev);
dev->flags &= ~MLX4_FLAG_SRIOV;
}
if (mlx4_is_master(dev) && dev->persist->num_vfs) if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
atomic_dec(&pf_loading); atomic_dec(&pf_loading);
kfree(priv->dev.dev_vfs); kfree(priv->dev.dev_vfs);
...@@ -3073,7 +3152,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, ...@@ -3073,7 +3152,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
if (err) if (err)
goto err_release_regions; goto err_release_regions;
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
if (err) if (err)
goto err_catas; goto err_catas;
...@@ -3131,9 +3210,11 @@ static void mlx4_clean_dev(struct mlx4_dev *dev) ...@@ -3131,9 +3210,11 @@ static void mlx4_clean_dev(struct mlx4_dev *dev)
{ {
struct mlx4_dev_persistent *persist = dev->persist; struct mlx4_dev_persistent *persist = dev->persist;
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
memset(priv, 0, sizeof(*priv)); memset(priv, 0, sizeof(*priv));
priv->dev.persist = persist; priv->dev.persist = persist;
priv->dev.flags = flags;
} }
static void mlx4_unload_one(struct pci_dev *pdev) static void mlx4_unload_one(struct pci_dev *pdev)
...@@ -3143,7 +3224,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) ...@@ -3143,7 +3224,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
int pci_dev_data; int pci_dev_data;
int p, i; int p, i;
int active_vfs = 0;
if (priv->removed) if (priv->removed)
return; return;
...@@ -3157,14 +3237,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) ...@@ -3157,14 +3237,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
pci_dev_data = priv->pci_dev_data; pci_dev_data = priv->pci_dev_data;
/* Disabling SR-IOV is not allowed while there are active vf's */
if (mlx4_is_master(dev)) {
active_vfs = mlx4_how_many_lives_vf(dev);
if (active_vfs) {
pr_warn("Removing PF when there are active VF's !!\n");
pr_warn("Will not disable SR-IOV.\n");
}
}
mlx4_stop_sense(dev); mlx4_stop_sense(dev);
mlx4_unregister_device(dev); mlx4_unregister_device(dev);
...@@ -3208,12 +3280,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) ...@@ -3208,12 +3280,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
if (dev->flags & MLX4_FLAG_MSI_X) if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev); pci_disable_msix(pdev);
if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
mlx4_warn(dev, "Disabling SR-IOV\n");
pci_disable_sriov(pdev);
dev->flags &= ~MLX4_FLAG_SRIOV;
dev->persist->num_vfs = 0;
}
if (!mlx4_is_slave(dev)) if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev); mlx4_free_ownership(dev);
...@@ -3235,11 +3301,21 @@ static void mlx4_remove_one(struct pci_dev *pdev) ...@@ -3235,11 +3301,21 @@ static void mlx4_remove_one(struct pci_dev *pdev)
struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
struct mlx4_dev *dev = persist->dev; struct mlx4_dev *dev = persist->dev;
struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_priv *priv = mlx4_priv(dev);
int active_vfs = 0;
mutex_lock(&persist->interface_state_mutex); mutex_lock(&persist->interface_state_mutex);
persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
mutex_unlock(&persist->interface_state_mutex); mutex_unlock(&persist->interface_state_mutex);
/* Disabling SR-IOV is not allowed while there are active vf's */
if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
active_vfs = mlx4_how_many_lives_vf(dev);
if (active_vfs) {
pr_warn("Removing PF when there are active VF's !!\n");
pr_warn("Will not disable SR-IOV.\n");
}
}
/* device marked to be under deletion running now without the lock /* device marked to be under deletion running now without the lock
* letting other tasks to be terminated * letting other tasks to be terminated
*/ */
...@@ -3248,6 +3324,11 @@ static void mlx4_remove_one(struct pci_dev *pdev) ...@@ -3248,6 +3324,11 @@ static void mlx4_remove_one(struct pci_dev *pdev)
else else
mlx4_info(dev, "%s: interface is down\n", __func__); mlx4_info(dev, "%s: interface is down\n", __func__);
mlx4_catas_end(dev); mlx4_catas_end(dev);
if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
mlx4_warn(dev, "Disabling SR-IOV\n");
pci_disable_sriov(pdev);
}
pci_release_regions(pdev); pci_release_regions(pdev);
pci_disable_device(pdev); pci_disable_device(pdev);
kfree(dev->persist); kfree(dev->persist);
...@@ -3287,7 +3368,7 @@ int mlx4_restart_one(struct pci_dev *pdev) ...@@ -3287,7 +3368,7 @@ int mlx4_restart_one(struct pci_dev *pdev)
memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
mlx4_unload_one(pdev); mlx4_unload_one(pdev);
err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
if (err) { if (err) {
mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
__func__, pci_name(pdev), err); __func__, pci_name(pdev), err);
...@@ -3397,7 +3478,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) ...@@ -3397,7 +3478,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
mutex_lock(&persist->interface_state_mutex); mutex_lock(&persist->interface_state_mutex);
if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
priv); priv, 1);
if (ret) { if (ret) {
mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
__func__, ret); __func__, ret);
......
...@@ -85,7 +85,9 @@ enum { ...@@ -85,7 +85,9 @@ enum {
MLX4_CLR_INT_SIZE = 0x00008, MLX4_CLR_INT_SIZE = 0x00008,
MLX4_SLAVE_COMM_BASE = 0x0, MLX4_SLAVE_COMM_BASE = 0x0,
MLX4_COMM_PAGESIZE = 0x1000, MLX4_COMM_PAGESIZE = 0x1000,
MLX4_CLOCK_SIZE = 0x00008 MLX4_CLOCK_SIZE = 0x00008,
MLX4_COMM_CHAN_CAPS = 0x8,
MLX4_COMM_CHAN_FLAGS = 0xc
}; };
enum { enum {
...@@ -120,6 +122,8 @@ enum mlx4_mpt_state { ...@@ -120,6 +122,8 @@ enum mlx4_mpt_state {
}; };
#define MLX4_COMM_TIME 10000 #define MLX4_COMM_TIME 10000
#define MLX4_COMM_OFFLINE_TIME_OUT 30000
enum { enum {
MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_RESET,
MLX4_COMM_CMD_VHCR0, MLX4_COMM_CMD_VHCR0,
...@@ -1162,6 +1166,7 @@ enum { ...@@ -1162,6 +1166,7 @@ enum {
int mlx4_cmd_init(struct mlx4_dev *dev); int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
int mlx4_multi_func_init(struct mlx4_dev *dev); int mlx4_multi_func_init(struct mlx4_dev *dev);
int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev);
void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev); int mlx4_cmd_use_events(struct mlx4_dev *dev);
......
...@@ -280,6 +280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat ...@@ -280,6 +280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
int mlx4_config_dev_retrieval(struct mlx4_dev *dev, int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
struct mlx4_config_dev_params *params); struct mlx4_config_dev_params *params);
void mlx4_cmd_wake_completions(struct mlx4_dev *dev); void mlx4_cmd_wake_completions(struct mlx4_dev *dev);
void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev);
/* /*
* mlx4_get_slave_default_vlan - * mlx4_get_slave_default_vlan -
* return true if VST ( default vlan) * return true if VST ( default vlan)
...@@ -289,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, ...@@ -289,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
u16 *vlan, u8 *qos); u16 *vlan, u8 *qos);
#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17)
#endif /* MLX4_CMD_H */ #endif /* MLX4_CMD_H */
...@@ -208,6 +208,10 @@ enum { ...@@ -208,6 +208,10 @@ enum {
MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1
}; };
enum {
MLX4_VF_CAP_FLAG_RESET = 1 << 0
};
/* bit enums for an 8-bit flags field indicating special use /* bit enums for an 8-bit flags field indicating special use
* QPs which require special handling in qp_reserve_range. * QPs which require special handling in qp_reserve_range.
* Currently, this only includes QPs used by the ETH interface, * Currently, this only includes QPs used by the ETH interface,
...@@ -545,6 +549,7 @@ struct mlx4_caps { ...@@ -545,6 +549,7 @@ struct mlx4_caps {
u8 alloc_res_qp_mask; u8 alloc_res_qp_mask;
u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_base;
u32 dmfs_high_rate_qpn_range; u32 dmfs_high_rate_qpn_range;
u32 vf_caps;
}; };
struct mlx4_buf_list { struct mlx4_buf_list {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment