Commit b898ce7b authored by Saeed Mahameed's avatar Saeed Mahameed

net/mlx5: cmdif, Avoid skipping reclaim pages if FW is not accessible

In case of pci is offline reclaim_pages_cmd() will still try to call
the FW to release FW pages, cmd_exec() in this case will return a silent
success without actually calling the FW.

This is wrong and will cause page leaks, what we should do is to detect
pci offline or command interface un-available before tying to access the
FW and manually release the FW pages in the driver.

In this patch we share the code to check for FW command interface
availability and we call it in sensitive places e.g. reclaim_pages_cmd().

Alternative fix:
 1. Remove MLX5_CMD_OP_MANAGE_PAGES form mlx5_internal_err_ret_value,
    command success simulation list.
 2. Always Release FW pages even if cmd_exec fails in reclaim_pages_cmd().
Reviewed-by: default avatarMoshe Shemesh <moshe@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 410bd754
...@@ -902,6 +902,13 @@ static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) ...@@ -902,6 +902,13 @@ static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
return idx; return idx;
} }
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
{
return pci_channel_offline(dev->pdev) ||
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
}
static void cmd_work_handler(struct work_struct *work) static void cmd_work_handler(struct work_struct *work)
{ {
struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
...@@ -967,10 +974,7 @@ static void cmd_work_handler(struct work_struct *work) ...@@ -967,10 +974,7 @@ static void cmd_work_handler(struct work_struct *work)
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
/* Skip sending command to fw if internal error */ /* Skip sending command to fw if internal error */
if (pci_channel_offline(dev->pdev) || if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
cmd->state != MLX5_CMDIF_STATE_UP ||
!opcode_allowed(&dev->cmd, ent->op)) {
u8 status = 0; u8 status = 0;
u32 drv_synd; u32 drv_synd;
...@@ -1800,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, ...@@ -1800,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
u8 token; u8 token;
opcode = MLX5_GET(mbox_in, in, opcode); opcode = MLX5_GET(mbox_in, in, opcode);
if (pci_channel_offline(dev->pdev) || if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
dev->cmd.state != MLX5_CMDIF_STATE_UP ||
!opcode_allowed(&dev->cmd, opcode)) {
err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, status, status);
MLX5_SET(mbox_out, out, syndrome, drv_synd); MLX5_SET(mbox_out, out, syndrome, drv_synd);
......
...@@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, ...@@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
u32 npages; u32 npages;
u32 i = 0; u32 i = 0;
if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) if (!mlx5_cmd_is_down(dev))
return mlx5_cmd_exec(dev, in, in_size, out, out_size); return mlx5_cmd_exec(dev, in, in_size, out, out_size);
/* No hard feelings, we want our pages back! */ /* No hard feelings, we want our pages back! */
......
...@@ -935,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, ...@@ -935,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
void *out, int out_size); void *out, int out_size);
void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment