Commit bc579ae5 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx4-next'

Or Gerlitz says:

====================
mlx4: Fix and enhance the device reset flow

This series from Yishai Hadas fixes the device reset flow and adds SRIOV support.

Reset flows are required whenever a device experiences errors, is unresponsive,
or is not in a deterministic state. In such cases, the driver is expected to
reset the HW and continue operation. When SRIOV is enabled, these requirements
apply both to PF and VF devices.

Currently, the mlx4 reset flow doesn't work properly: when a fatal error is
detected on the FW internal buffer the chip is not reset and stays in its
bad state. There are cases that assumed to be fatal such as non-responsive FW,
errors via closing commands but are not handled today.

The AER mechanism should also be fixed:
- It should use mlx4_load_one instead of __mlx4_init_one which is done
  upon HCA probing.
- It must be aligned with concurrent catas flow, mark device to be in
  an error state, reset chip, etc.
- Port types should be restored to their original values before error occurred.

In addition, there the SRIOV use-case isn't supported.

In above cases when the device state becomes fatal we must act as follows:
1) Reset the chip and mark the HW device state as in fatal error.
2) Wake up any pending commands, preventing new ones to come in.
3) Restart the software stack.

We also address the SRIOV mode as follows: In case the PF detects a fatal error,
it lets VFs know about that, then both itself and VFs are restarted asynchronously.
However, in case only the VF encountered a fatal case or forced to be reset, they
reset the VF stuff and then restart software.

changes from V0:

No need to call pci_disable_device upon permanent PCI error. This will
be done as part of mlx4_remove_one which is called later once we
return PCI_ERS_RESULT_DISCONNECT from the pci error handler.

Initial toggle value should use only the T bit and not the whole byte value.
Not doing so sometimes broke SRIOV as of junky value seen by the VF as a
non-ready comm channel
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7aee42c6 0cd93027
......@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_vfs + 1)
if (slave_id >= dev->dev->persist->num_vfs + 1)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
......
......@@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->ib_dev = &dev->ib_dev;
for (i = 0;
i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
i < min(dev->dev->caps.sqp_demux,
(u16)(dev->dev->persist->num_vfs + 1));
i++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev->dev, i);
......
......@@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->pdev->device;
props->vendor_part_id = dev->dev->persist->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
......@@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}
static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
......@@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
int i;
if (mlx4_is_master(ibdev->dev)) {
for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
......@@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
i, j, dev->pdev->bus->name);
i, j, dev->persist->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name, NULL,
&ibdev->eq_table[eq])) {
......@@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
return NULL;
}
......@@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
......@@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
dev_err(&dev->pdev->dev, "bit map alloc failed\n");
dev_err(&dev->persist->pdev->dev,
"bit map alloc failed\n");
goto err_steer_qp_release;
}
......
......@@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
if (!mfrpl->ibfrpl.page_list)
goto err_free;
mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
pdev->dev,
size, &mfrpl->map,
GFP_KERNEL);
if (!mfrpl->mapped_page_list)
......@@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
int size = page_list->max_page_list_len * sizeof (u64);
dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
dma_free_coherent(&dev->dev->persist->pdev->dev, size,
mfrpl->mapped_page_list,
mfrpl->map);
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
......
......@@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
char base_name[9];
/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
strlcpy(name, pci_name(dev->dev->pdev), max);
strlcpy(name, pci_name(dev->dev->persist->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
......@@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return 0;
for (i = 0; i <= device->dev->num_vfs; ++i)
for (i = 0; i <= device->dev->persist->num_vfs; ++i)
register_one_pkey_tree(device, i);
return 0;
......@@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return;
for (slave = device->dev->num_vfs; slave >= 0; --slave) {
for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {
......
......@@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
buf->nbufs = 1;
buf->npages = 1;
buf->page_shift = get_order(size) + PAGE_SHIFT;
buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev,
size, &t, gfp);
if (!buf->direct.buf)
return -ENOMEM;
......@@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
for (i = 0; i < buf->nbufs; ++i) {
buf->page_list[i].buf =
dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
dma_alloc_coherent(&dev->persist->pdev->dev,
PAGE_SIZE,
&t, gfp);
if (!buf->page_list[i].buf)
goto err_free;
......@@ -657,7 +658,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
int i;
if (buf->nbufs == 1)
dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
dma_free_coherent(&dev->persist->pdev->dev, size,
buf->direct.buf,
buf->direct.map);
else {
if (BITS_PER_LONG == 64 && buf->direct.buf)
......@@ -665,7 +667,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
for (i = 0; i < buf->nbufs; ++i)
if (buf->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
dma_free_coherent(&dev->persist->pdev->dev,
PAGE_SIZE,
buf->page_list[i].buf,
buf->page_list[i].map);
kfree(buf->page_list);
......@@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp
if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
goto out;
pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
if (!pgdir) {
ret = -ENOMEM;
goto out;
......@@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
set_bit(i, db->u.pgdir->bits[o]);
if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
kfree(db->u.pgdir);
......
......@@ -40,16 +40,177 @@ enum {
MLX4_CATAS_POLL_INTERVAL = 5 * HZ,
};
static DEFINE_SPINLOCK(catas_lock);
static LIST_HEAD(catas_list);
static struct work_struct catas_work;
static int internal_err_reset = 1;
module_param(internal_err_reset, int, 0644);
int mlx4_internal_err_reset = 1;
module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
"Reset device on internal errors if non-zero"
" (default 1, in SRIOV mode default is 0)");
"Reset device on internal errors if non-zero (default 1)");
static int read_vendor_id(struct mlx4_dev *dev)
{
u16 vendor_id = 0;
int ret;
ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id);
if (ret) {
mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret);
return ret;
}
if (vendor_id == 0xffff) {
mlx4_err(dev, "PCI can't be accessed to read vendor id\n");
return -EINVAL;
}
return 0;
}
static int mlx4_reset_master(struct mlx4_dev *dev)
{
int err = 0;
if (mlx4_is_master(dev))
mlx4_report_internal_err_comm_event(dev);
if (!pci_channel_offline(dev->persist->pdev)) {
err = read_vendor_id(dev);
/* If PCI can't be accessed to read vendor ID we assume that its
* link was disabled and chip was already reset.
*/
if (err)
return 0;
err = mlx4_reset(dev);
if (err)
mlx4_err(dev, "Fail to reset HCA\n");
}
return err;
}
static int mlx4_reset_slave(struct mlx4_dev *dev)
{
#define COM_CHAN_RST_REQ_OFFSET 0x10
#define COM_CHAN_RST_ACK_OFFSET 0x08
u32 comm_flags;
u32 rst_req;
u32 rst_ack;
unsigned long end;
struct mlx4_priv *priv = mlx4_priv(dev);
if (pci_channel_offline(dev->persist->pdev))
return 0;
comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
if (comm_flags == 0xffffffff) {
mlx4_err(dev, "VF reset is not needed\n");
return 0;
}
if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
mlx4_err(dev, "VF reset is not supported\n");
return -EOPNOTSUPP;
}
rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
COM_CHAN_RST_REQ_OFFSET;
rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
COM_CHAN_RST_ACK_OFFSET;
if (rst_req != rst_ack) {
mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
return -EIO;
}
rst_req ^= 1;
mlx4_warn(dev, "VF is sending reset request to Firmware\n");
comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
__raw_writel((__force u32)cpu_to_be32(comm_flags),
(__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
/* Make sure that our comm channel write doesn't
* get mixed in with writes from another CPU.
*/
mmiowb();
end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
while (time_before(jiffies, end)) {
comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
COM_CHAN_RST_ACK_OFFSET;
/* Reading rst_req again since the communication channel can
* be reset at any time by the PF and all its bits will be
* set to zero.
*/
rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
COM_CHAN_RST_REQ_OFFSET;
if (rst_ack == rst_req) {
mlx4_warn(dev, "VF Reset succeed\n");
return 0;
}
cond_resched();
}
mlx4_err(dev, "Fail to send reset over the communication channel\n");
return -ETIMEDOUT;
}
static int mlx4_comm_internal_err(u32 slave_read)
{
return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
(slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
}
void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
{
int err;
struct mlx4_dev *dev;
if (!mlx4_internal_err_reset)
return;
mutex_lock(&persist->device_state_mutex);
if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
goto out;
dev = persist->dev;
mlx4_err(dev, "device is going to be reset\n");
if (mlx4_is_slave(dev))
err = mlx4_reset_slave(dev);
else
err = mlx4_reset_master(dev);
BUG_ON(err != 0);
dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
mlx4_err(dev, "device was reset successfully\n");
mutex_unlock(&persist->device_state_mutex);
/* At that step HW was already reset, now notify clients */
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
mlx4_cmd_wake_completions(dev);
return;
out:
mutex_unlock(&persist->device_state_mutex);
}
static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
{
int err = 0;
mlx4_enter_error_state(persist);
mutex_lock(&persist->interface_state_mutex);
if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
!(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
err = mlx4_restart_one(persist->pdev);
mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
err);
}
mutex_unlock(&persist->interface_state_mutex);
}
static void dump_err_buf(struct mlx4_dev *dev)
{
......@@ -67,58 +228,40 @@ static void poll_catas(unsigned long dev_ptr)
{
struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
struct mlx4_priv *priv = mlx4_priv(dev);
u32 slave_read;
if (readl(priv->catas_err.map)) {
/* If the device is off-line, we cannot try to recover it */
if (pci_channel_offline(dev->pdev))
mod_timer(&priv->catas_err.timer,
round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
else {
dump_err_buf(dev);
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
if (internal_err_reset) {
spin_lock(&catas_lock);
list_add(&priv->catas_err.list, &catas_list);
spin_unlock(&catas_lock);
queue_work(mlx4_wq, &catas_work);
}
if (mlx4_is_slave(dev)) {
slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
if (mlx4_comm_internal_err(slave_read)) {
mlx4_warn(dev, "Internal error detected on the communication channel\n");
goto internal_err;
}
} else
mod_timer(&priv->catas_err.timer,
round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
} else if (readl(priv->catas_err.map)) {
dump_err_buf(dev);
goto internal_err;
}
if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
mlx4_warn(dev, "Internal error mark was detected on device\n");
goto internal_err;
}
mod_timer(&priv->catas_err.timer,
round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
return;
internal_err:
if (mlx4_internal_err_reset)
queue_work(dev->persist->catas_wq, &dev->persist->catas_work);
}
static void catas_reset(struct work_struct *work)
{
struct mlx4_priv *priv, *tmppriv;
struct mlx4_dev *dev;
struct mlx4_dev_persistent *persist =
container_of(work, struct mlx4_dev_persistent,
catas_work);
LIST_HEAD(tlist);
int ret;
spin_lock_irq(&catas_lock);
list_splice_init(&catas_list, &tlist);
spin_unlock_irq(&catas_lock);
list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
struct pci_dev *pdev = priv->dev.pdev;
/* If the device is off-line, we cannot reset it */
if (pci_channel_offline(pdev))
continue;
ret = mlx4_restart_one(priv->dev.pdev);
/* 'priv' now is not valid */
if (ret)
pr_err("mlx4 %s: Reset failed (%d)\n",
pci_name(pdev), ret);
else {
dev = pci_get_drvdata(pdev);
mlx4_dbg(dev, "Reset succeeded\n");
}
}
mlx4_handle_error_state(persist);
}
void mlx4_start_catas_poll(struct mlx4_dev *dev)
......@@ -126,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
phys_addr_t addr;
/*If we are in SRIOV the default of the module param must be 0*/
if (mlx4_is_mfunc(dev))
internal_err_reset = 0;
INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer);
priv->catas_err.map = NULL;
addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
priv->fw.catas_offset;
if (!mlx4_is_slave(dev)) {
addr = pci_resource_start(dev->persist->pdev,
priv->fw.catas_bar) +
priv->fw.catas_offset;
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
(unsigned long long) addr);
return;
priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
(unsigned long long)addr);
return;
}
}
priv->catas_err.timer.data = (unsigned long) dev;
......@@ -157,15 +299,29 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
del_timer_sync(&priv->catas_err.timer);
if (priv->catas_err.map)
if (priv->catas_err.map) {
iounmap(priv->catas_err.map);
priv->catas_err.map = NULL;
}
spin_lock_irq(&catas_lock);
list_del(&priv->catas_err.list);
spin_unlock_irq(&catas_lock);
if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
flush_workqueue(dev->persist->catas_wq);
}
void __init mlx4_catas_init(void)
int mlx4_catas_init(struct mlx4_dev *dev)
{
INIT_WORK(&catas_work, catas_reset);
INIT_WORK(&dev->persist->catas_work, catas_reset);
dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
if (!dev->persist->catas_wq)
return -ENOMEM;
return 0;
}
void mlx4_catas_end(struct mlx4_dev *dev)
{
if (dev->persist->catas_wq) {
destroy_workqueue(dev->persist->catas_wq);
dev->persist->catas_wq = NULL;
}
}
This diff is collapsed.
......@@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
/* Allocate HW buffers on provided NUMA node.
* dev->numa_node is used in mtt range allocation flow.
*/
set_dev_node(&mdev->dev->pdev->dev, node);
set_dev_node(&mdev->dev->persist->pdev->dev, node);
err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
cq->buf_size, 2 * PAGE_SIZE);
set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
if (err)
goto err_cq;
......
......@@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
(u16) (mdev->dev->caps.fw_ver >> 32),
(u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff),
(u16) (mdev->dev->caps.fw_ver & 0xffff));
strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev),
strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev),
sizeof(drvinfo->bus_info));
drvinfo->n_stats = 0;
drvinfo->regdump_len = 0;
......
......@@ -241,8 +241,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
spin_lock_init(&mdev->uar_lock);
mdev->dev = dev;
mdev->dma_device = &(dev->pdev->dev);
mdev->pdev = dev->pdev;
mdev->dma_device = &dev->persist->pdev->dev;
mdev->pdev = dev->persist->pdev;
mdev->device_up = false;
mdev->LSO_support = !!(dev->caps.flags & (1 << 15));
......
......@@ -2457,7 +2457,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
netif_set_real_num_tx_queues(dev, prof->tx_ring_num);
netif_set_real_num_rx_queues(dev, prof->rx_ring_num);
SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev);
SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev);
dev->dev_port = port - 1;
/*
......
......@@ -387,10 +387,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->rx_info, tmp);
/* Allocate HW buffers on provided NUMA node */
set_dev_node(&mdev->dev->pdev->dev, node);
set_dev_node(&mdev->dev->persist->pdev->dev, node);
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
ring->buf_size, 2 * PAGE_SIZE);
set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
if (err)
goto err_info;
......
......@@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
/* Allocate HW buffers on provided NUMA node */
set_dev_node(&mdev->dev->pdev->dev, node);
set_dev_node(&mdev->dev->persist->pdev->dev, node);
err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
2 * PAGE_SIZE);
set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
if (err) {
en_err(priv, "Failed allocating hwq resources\n");
goto err_bounce;
......
......@@ -237,7 +237,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
struct mlx4_eqe eqe;
/*don't send if we don't have the that slave */
if (dev->num_vfs < slave)
if (dev->persist->num_vfs < slave)
return 0;
memset(&eqe, 0, sizeof eqe);
......@@ -255,7 +255,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
struct mlx4_eqe eqe;
/*don't send if we don't have the that slave */
if (dev->num_vfs < slave)
if (dev->persist->num_vfs < slave)
return 0;
memset(&eqe, 0, sizeof eqe);
......@@ -310,7 +310,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev,
port);
for (i = 0; i < dev->num_vfs + 1; i++)
for (i = 0; i < dev->persist->num_vfs + 1; i++)
if (test_bit(i, slaves_pport.slaves))
set_and_calc_slave_port_state(dev, i, port,
event, &gen_event);
......@@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
i);
mlx4_delete_all_resources_for_slave(dev, i);
/* In case of 'Reset flow' FLR can be generated for
* a slave before mlx4_load_one is done.
* make sure interface is up before trying to delete
* slave resources which weren't allocated yet.
*/
if (dev->persist->interface_state &
MLX4_INTERFACE_STATE_UP)
mlx4_delete_all_resources_for_slave(dev, i);
/*return the slave to running mode*/
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
......@@ -560,7 +566,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
mlx4_priv(dev)->sense.do_sense_port[port] = 1;
if (!mlx4_is_master(dev))
break;
for (i = 0; i < dev->num_vfs + 1; i++) {
for (i = 0; i < dev->persist->num_vfs + 1;
i++) {
if (!test_bit(i, slaves_port.slaves))
continue;
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
......@@ -596,7 +603,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
if (!mlx4_is_master(dev))
break;
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
for (i = 0; i < dev->num_vfs + 1; i++) {
for (i = 0;
i < dev->persist->num_vfs + 1;
i++) {
if (!test_bit(i, slaves_port.slaves))
continue;
if (i == mlx4_master_func_num(dev))
......@@ -865,7 +874,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
if (!priv->eq_table.uar_map[index]) {
priv->eq_table.uar_map[index] =
ioremap(pci_resource_start(dev->pdev, 2) +
ioremap(pci_resource_start(dev->persist->pdev, 2) +
((eq->eqn / 4) << PAGE_SHIFT),
PAGE_SIZE);
if (!priv->eq_table.uar_map[index]) {
......@@ -928,8 +937,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
eq_context = mailbox->buf;
for (i = 0; i < npages; ++i) {
eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
PAGE_SIZE, &t, GFP_KERNEL);
eq->page_list[i].buf = dma_alloc_coherent(&dev->persist->
pdev->dev,
PAGE_SIZE, &t,
GFP_KERNEL);
if (!eq->page_list[i].buf)
goto err_out_free_pages;
......@@ -995,7 +1006,7 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
err_out_free_pages:
for (i = 0; i < npages; ++i)
if (eq->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
......@@ -1044,9 +1055,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
eq->page_list[i].buf,
eq->page_list[i].map);
kfree(eq->page_list);
mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
......@@ -1060,7 +1071,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
int i, vec;
if (eq_table->have_irq)
free_irq(dev->pdev->irq, dev);
free_irq(dev->persist->pdev->irq, dev);
for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
if (eq_table->eq[i].have_irq) {
......@@ -1089,7 +1100,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev,
priv->fw.clr_int_bar) +
priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
if (!priv->clr_base) {
mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n");
......@@ -1212,13 +1224,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
i * MLX4_IRQNAME_SIZE,
MLX4_IRQNAME_SIZE,
"mlx4-comp-%d@pci:%s", i,
pci_name(dev->pdev));
pci_name(dev->persist->pdev));
} else {
snprintf(priv->eq_table.irq_names +
i * MLX4_IRQNAME_SIZE,
MLX4_IRQNAME_SIZE,
"mlx4-async@pci:%s",
pci_name(dev->pdev));
pci_name(dev->persist->pdev));
}
eq_name = priv->eq_table.irq_names +
......@@ -1235,8 +1247,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
snprintf(priv->eq_table.irq_names,
MLX4_IRQNAME_SIZE,
DRV_NAME "@pci:%s",
pci_name(dev->pdev));
err = request_irq(dev->pdev->irq, mlx4_interrupt,
pci_name(dev->persist->pdev));
err = request_irq(dev->persist->pdev->irq, mlx4_interrupt,
IRQF_SHARED, priv->eq_table.irq_names, dev);
if (err)
goto err_out_async;
......
......@@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
int i;
if (chunk->nsg > 0)
pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages,
PCI_DMA_BIDIRECTIONAL);
for (i = 0; i < chunk->npages; ++i)
......@@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
int i;
for (i = 0; i < chunk->npages; ++i)
dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
dma_free_coherent(&dev->persist->pdev->dev,
chunk->mem[i].length,
lowmem_page_address(sg_page(&chunk->mem[i])),
sg_dma_address(&chunk->mem[i]));
}
......@@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
--cur_order;
if (coherent)
ret = mlx4_alloc_icm_coherent(&dev->pdev->dev,
ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
&chunk->mem[chunk->npages],
cur_order, gfp_mask);
else
......@@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
if (coherent)
++chunk->nsg;
else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
......@@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
}
if (!coherent && chunk) {
chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
chunk->npages,
PCI_DMA_BIDIRECTIONAL);
......
......@@ -138,13 +138,13 @@ int mlx4_register_device(struct mlx4_dev *dev)
mutex_lock(&intf_mutex);
dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP;
list_add_tail(&priv->dev_list, &dev_list);
list_for_each_entry(intf, &intf_list, list)
mlx4_add_device(intf, priv);
mutex_unlock(&intf_mutex);
if (!mlx4_is_slave(dev))
mlx4_start_catas_poll(dev);
mlx4_start_catas_poll(dev);
return 0;
}
......@@ -154,14 +154,14 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
if (!mlx4_is_slave(dev))
mlx4_stop_catas_poll(dev);
mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);
list_for_each_entry(intf, &intf_list, list)
mlx4_remove_device(intf, priv);
list_del(&priv->dev_list);
dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
mutex_unlock(&intf_mutex);
}
......
This diff is collapsed.
......@@ -1318,6 +1318,9 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
mutex_unlock(&priv->mcg_table.mutex);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
/* In case device is under an error, return success as a closing command */
err = 0;
return err;
}
......@@ -1347,6 +1350,9 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp,
MLX4_CMD_WRAPPED);
mlx4_free_cmd_mailbox(dev, mailbox);
if (err && !attach &&
dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
err = 0;
return err;
}
......
......@@ -85,7 +85,9 @@ enum {
MLX4_CLR_INT_SIZE = 0x00008,
MLX4_SLAVE_COMM_BASE = 0x0,
MLX4_COMM_PAGESIZE = 0x1000,
MLX4_CLOCK_SIZE = 0x00008
MLX4_CLOCK_SIZE = 0x00008,
MLX4_COMM_CHAN_CAPS = 0x8,
MLX4_COMM_CHAN_FLAGS = 0xc
};
enum {
......@@ -120,6 +122,10 @@ enum mlx4_mpt_state {
};
#define MLX4_COMM_TIME 10000
#define MLX4_COMM_OFFLINE_TIME_OUT 30000
#define MLX4_COMM_CMD_NA_OP 0x0
enum {
MLX4_COMM_CMD_RESET,
MLX4_COMM_CMD_VHCR0,
......@@ -221,19 +227,21 @@ extern int mlx4_debug_level;
#define mlx4_dbg(mdev, format, ...) \
do { \
if (mlx4_debug_level) \
dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format, \
dev_printk(KERN_DEBUG, \
&(mdev)->persist->pdev->dev, format, \
##__VA_ARGS__); \
} while (0)
#define mlx4_err(mdev, format, ...) \
dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
#define mlx4_info(mdev, format, ...) \
dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
#define mlx4_warn(mdev, format, ...) \
dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
extern int mlx4_log_num_mgm_entry_size;
extern int log_mtts_per_seg;
extern int mlx4_internal_err_reset;
#define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
#define ALL_SLAVES 0xff
......@@ -606,7 +614,6 @@ struct mlx4_mgm {
struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
struct mutex hcr_mutex;
struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
......@@ -994,7 +1001,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
void mlx4_start_catas_poll(struct mlx4_dev *dev);
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
void mlx4_catas_init(void);
int mlx4_catas_init(struct mlx4_dev *dev);
void mlx4_catas_end(struct mlx4_dev *dev);
int mlx4_restart_one(struct pci_dev *pdev);
int mlx4_register_device(struct mlx4_dev *dev);
void mlx4_unregister_device(struct mlx4_dev *dev);
......@@ -1160,13 +1168,14 @@ enum {
int mlx4_cmd_init(struct mlx4_dev *dev);
void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
int mlx4_multi_func_init(struct mlx4_dev *dev);
int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev);
void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
int mlx4_cmd_use_events(struct mlx4_dev *dev);
void mlx4_cmd_use_polling(struct mlx4_dev *dev);
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
unsigned long timeout);
u16 op, unsigned long timeout);
void mlx4_cq_tasklet_cb(unsigned long data);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
......@@ -1176,7 +1185,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
void mlx4_handle_catas_err(struct mlx4_dev *dev);
void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
enum mlx4_port_type *type);
......
......@@ -708,13 +708,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
if (!mtts)
return -ENOMEM;
dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle,
npages * sizeof (u64), DMA_TO_DEVICE);
for (i = 0; i < npages; ++i)
mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle,
npages * sizeof (u64), DMA_TO_DEVICE);
return 0;
......@@ -1020,13 +1020,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list
/* Make sure MPT status is visible before writing MTT entries */
wmb();
dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
for (i = 0; i < npages; ++i)
fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle,
npages * sizeof(u64), DMA_TO_DEVICE);
fmr->mpt->key = cpu_to_be32(key);
......
......@@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
return -ENOMEM;
if (mlx4_is_slave(dev))
offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
offset = uar->index % ((int)pci_resource_len(dev->persist->pdev,
2) /
dev->caps.uar_page_size);
else
offset = uar->index;
uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT)
+ offset;
uar->map = NULL;
return 0;
}
......
......@@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port)
slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
dev, &exclusive_ports);
slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
dev->num_vfs + 1);
dev->persist->num_vfs + 1);
}
vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs))
return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1;
return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs;
......@@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port)
slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
dev, &exclusive_ports);
slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
dev->num_vfs + 1);
dev->persist->num_vfs + 1);
}
gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
if (slave_gid <= gids % vfs)
return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1);
......@@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave)
int num_eth_ports, err;
int i;
if (slave < 0 || slave > dev->num_vfs)
if (slave < 0 || slave > dev->persist->num_vfs)
return;
actv_ports = mlx4_get_active_ports(dev, slave);
......@@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
return -EINVAL;
slaves_pport = mlx4_phys_to_slaves_pport(dev, port);
num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
num_vfs = bitmap_weight(slaves_pport.slaves,
dev->persist->num_vfs + 1) - 1;
for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid,
......@@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
dev, &exclusive_ports);
num_vfs_before += bitmap_weight(
slaves_pport_actv.slaves,
dev->num_vfs + 1);
dev->persist->num_vfs + 1);
}
/* candidate_slave_gid isn't necessarily the correct slave, but
......@@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
dev, &exclusive_ports);
slave_gid += bitmap_weight(
slaves_pport_actv.slaves,
dev->num_vfs + 1);
dev->persist->num_vfs + 1);
}
}
*slave_id = slave_gid;
......
......@@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev)
goto out;
}
pcie_cap = pci_pcie_cap(dev->pdev);
pcie_cap = pci_pcie_cap(dev->persist->pdev);
for (i = 0; i < 64; ++i) {
if (i == 22 || i == 23)
continue;
if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) {
if (pci_read_config_dword(dev->persist->pdev, i * 4,
hca_header + i)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n");
goto out;
}
}
reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE,
reset = ioremap(pci_resource_start(dev->persist->pdev, 0) +
MLX4_RESET_BASE,
MLX4_RESET_SIZE);
if (!reset) {
err = -ENOMEM;
......@@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev)
end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
do {
if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) &&
vendor != 0xffff)
if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID,
&vendor) && vendor != 0xffff)
break;
msleep(1);
......@@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev)
/* Now restore the PCI headers */
if (pcie_cap) {
devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL,
if (pcie_capability_write_word(dev->persist->pdev,
PCI_EXP_DEVCTL,
devctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n");
goto out;
}
linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL,
if (pcie_capability_write_word(dev->persist->pdev,
PCI_EXP_LNKCTL,
linkctl)) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n");
......@@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev)
if (i * 4 == PCI_COMMAND)
continue;
if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) {
if (pci_write_config_dword(dev->persist->pdev, i * 4,
hca_header[i])) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n",
i);
......@@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev)
}
}
if (pci_write_config_dword(dev->pdev, PCI_COMMAND,
if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND,
hca_header[PCI_COMMAND / 4])) {
err = -ENODEV;
mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n");
......
......@@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
int allocated, free, reserved, guaranteed, from_free;
int from_rsvd;
if (slave > dev->num_vfs)
if (slave > dev->persist->num_vfs)
return -EINVAL;
spin_lock(&res_alloc->alloc_lock);
allocated = (port > 0) ?
res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
res_alloc->allocated[(port - 1) *
(dev->persist->num_vfs + 1) + slave] :
res_alloc->allocated[slave];
free = (port > 0) ? res_alloc->res_port_free[port - 1] :
res_alloc->res_free;
......@@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
if (!err) {
/* grant the request */
if (port > 0) {
res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
res_alloc->allocated[(port - 1) *
(dev->persist->num_vfs + 1) + slave] += count;
res_alloc->res_port_free[port - 1] -= count;
res_alloc->res_port_rsvd[port - 1] -= from_rsvd;
} else {
......@@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
&priv->mfunc.master.res_tracker.res_alloc[res_type];
int allocated, guaranteed, from_rsvd;
if (slave > dev->num_vfs)
if (slave > dev->persist->num_vfs)
return;
spin_lock(&res_alloc->alloc_lock);
allocated = (port > 0) ?
res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
res_alloc->allocated[(port - 1) *
(dev->persist->num_vfs + 1) + slave] :
res_alloc->allocated[slave];
guaranteed = res_alloc->guaranteed[slave];
......@@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
}
if (port > 0) {
res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
res_alloc->allocated[(port - 1) *
(dev->persist->num_vfs + 1) + slave] -= count;
res_alloc->res_port_free[port - 1] += count;
res_alloc->res_port_rsvd[port - 1] += from_rsvd;
} else {
......@@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev,
enum mlx4_resource res_type,
int vf, int num_instances)
{
res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
res_alloc->guaranteed[vf] = num_instances /
(2 * (dev->persist->num_vfs + 1));
res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
if (vf == mlx4_master_func_num(dev)) {
res_alloc->res_free = num_instances;
......@@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
struct resource_allocator *res_alloc =
&priv->mfunc.master.res_tracker.res_alloc[i];
res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) *
sizeof(int), GFP_KERNEL);
res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) *
sizeof(int), GFP_KERNEL);
if (i == RES_MAC || i == RES_VLAN)
res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
(dev->num_vfs + 1) * sizeof(int),
GFP_KERNEL);
(dev->persist->num_vfs
+ 1) *
sizeof(int), GFP_KERNEL);
else
res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
res_alloc->allocated = kzalloc((dev->persist->
num_vfs + 1) *
sizeof(int), GFP_KERNEL);
if (!res_alloc->quota || !res_alloc->guaranteed ||
!res_alloc->allocated)
goto no_mem_err;
spin_lock_init(&res_alloc->alloc_lock);
for (t = 0; t < dev->num_vfs + 1; t++) {
for (t = 0; t < dev->persist->num_vfs + 1; t++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev, t);
switch (i) {
......
......@@ -279,6 +279,8 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
struct mlx4_config_dev_params *params);
void mlx4_cmd_wake_completions(struct mlx4_dev *dev);
void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev);
/*
* mlx4_get_slave_default_vlan -
* return true if VST ( default vlan)
......@@ -288,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
u16 *vlan, u8 *qos);
#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17)
#endif /* MLX4_CMD_H */
......@@ -208,6 +208,10 @@ enum {
MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1
};
enum {
MLX4_VF_CAP_FLAG_RESET = 1 << 0
};
/* bit enums for an 8-bit flags field indicating special use
* QPs which require special handling in qp_reserve_range.
* Currently, this only includes QPs used by the ETH interface,
......@@ -411,6 +415,16 @@ enum {
MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4,
};
enum {
MLX4_DEVICE_STATE_UP = 1 << 0,
MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1,
};
enum {
MLX4_INTERFACE_STATE_UP = 1 << 0,
MLX4_INTERFACE_STATE_DELETION = 1 << 1,
};
#define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
......@@ -535,6 +549,7 @@ struct mlx4_caps {
u8 alloc_res_qp_mask;
u32 dmfs_high_rate_qpn_base;
u32 dmfs_high_rate_qpn_range;
u32 vf_caps;
};
struct mlx4_buf_list {
......@@ -744,8 +759,23 @@ struct mlx4_vf_dev {
u8 n_ports;
};
struct mlx4_dev {
struct mlx4_dev_persistent {
struct pci_dev *pdev;
struct mlx4_dev *dev;
int nvfs[MLX4_MAX_PORTS + 1];
int num_vfs;
enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
struct work_struct catas_work;
struct workqueue_struct *catas_wq;
struct mutex device_state_mutex; /* protect HW state */
u8 state;
struct mutex interface_state_mutex; /* protect SW state */
u8 interface_state;
};
struct mlx4_dev {
struct mlx4_dev_persistent *persist;
unsigned long flags;
unsigned long num_slaves;
struct mlx4_caps caps;
......@@ -754,13 +784,11 @@ struct mlx4_dev {
struct radix_tree_root qp_table_tree;
u8 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
int num_vfs;
int numa_node;
int oper_log_mgm_entry_size;
u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
struct mlx4_vf_dev *dev_vfs;
int nvfs[MLX4_MAX_PORTS + 1];
};
struct mlx4_eqe {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment