Commit 6de04224 authored by Yishai Hadas's avatar Yishai Hadas Committed by Alex Williamson

vfio/mlx5: Let firmware knows upon leaving PRE_COPY back to RUNNING

Let firmware knows upon leaving PRE_COPY back to RUNNING as of some
error in the target/migration cancellation.

This will let firmware cleaning its internal resources that were turned
on upon PRE_COPY.

The flow is based on the device specification in this area.
Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Reviewed-by: default avatarKevin Tian <kevin.tian@intel.com>
Acked-by: default avatarLeon Romanovsky <leon@kernel.org>
Link: https://lore.kernel.org/r/20240205124828.232701-6-yishaih@nvidia.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent d8d577b5
...@@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -108,8 +108,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
if (ret) if (ret)
return ret; return ret;
if (mvdev->saving_migf->state == /* Upon cleanup, ignore previous pre_copy error state */
MLX5_MIGF_STATE_PRE_COPY_ERROR) { if (mvdev->saving_migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR &&
!(query_flags & MLX5VF_QUERY_CLEANUP)) {
/* /*
* In case we had a PRE_COPY error, only query full * In case we had a PRE_COPY error, only query full
* image for final image * image for final image
...@@ -200,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) ...@@ -200,7 +201,7 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
/* Must be done outside the lock to let it progress */ /* Must be done outside the lock to let it progress */
set_tracker_error(mvdev); set_tracker_error(mvdev);
mutex_lock(&mvdev->state_mutex); mutex_lock(&mvdev->state_mutex);
mlx5vf_disable_fds(mvdev); mlx5vf_disable_fds(mvdev, NULL);
_mlx5vf_free_page_tracker_resources(mvdev); _mlx5vf_free_page_tracker_resources(mvdev);
mlx5vf_state_mutex_unlock(mvdev); mlx5vf_state_mutex_unlock(mvdev);
} }
...@@ -639,6 +640,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -639,6 +640,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5_vhca_data_buffer *header_buf = NULL;
struct mlx5vf_async_data *async_data; struct mlx5vf_async_data *async_data;
bool pre_copy_cleanup = false;
int err; int err;
lockdep_assert_held(&mvdev->state_mutex); lockdep_assert_held(&mvdev->state_mutex);
...@@ -649,6 +651,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -649,6 +651,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (err) if (err)
return err; return err;
if ((migf->state == MLX5_MIGF_STATE_PRE_COPY ||
migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) && !track && !inc)
pre_copy_cleanup = true;
if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
/* /*
* In case we had a PRE_COPY error, SAVE is triggered only for * In case we had a PRE_COPY error, SAVE is triggered only for
...@@ -667,7 +673,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -667,7 +673,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
async_data = &migf->async_data; async_data = &migf->async_data;
async_data->buf = buf; async_data->buf = buf;
async_data->stop_copy_chunk = !track; async_data->stop_copy_chunk = (!track && !pre_copy_cleanup);
async_data->out = kvzalloc(out_size, GFP_KERNEL); async_data->out = kvzalloc(out_size, GFP_KERNEL);
if (!async_data->out) { if (!async_data->out) {
err = -ENOMEM; err = -ENOMEM;
......
...@@ -197,6 +197,7 @@ struct mlx5vf_pci_core_device { ...@@ -197,6 +197,7 @@ struct mlx5vf_pci_core_device {
enum { enum {
MLX5VF_QUERY_INC = (1UL << 0), MLX5VF_QUERY_INC = (1UL << 0),
MLX5VF_QUERY_FINAL = (1UL << 1), MLX5VF_QUERY_FINAL = (1UL << 1),
MLX5VF_QUERY_CLEANUP = (1UL << 2),
}; };
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
...@@ -232,7 +233,8 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, ...@@ -232,7 +233,8 @@ int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
unsigned long offset); unsigned long offset);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
enum mlx5_vf_migf_state *last_save_state);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf, void mlx5vf_mig_file_set_save_work(struct mlx5_vf_migration_file *migf,
u8 chunk_num, size_t next_required_umem_size); u8 chunk_num, size_t next_required_umem_size);
......
...@@ -1146,7 +1146,8 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) ...@@ -1146,7 +1146,8 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
enum mlx5_vf_migf_state *last_save_state)
{ {
if (mvdev->resuming_migf) { if (mvdev->resuming_migf) {
mlx5vf_disable_fd(mvdev->resuming_migf); mlx5vf_disable_fd(mvdev->resuming_migf);
...@@ -1157,6 +1158,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) ...@@ -1157,6 +1158,8 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
if (mvdev->saving_migf) { if (mvdev->saving_migf) {
mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
cancel_work_sync(&mvdev->saving_migf->async_data.work); cancel_work_sync(&mvdev->saving_migf->async_data.work);
if (last_save_state)
*last_save_state = mvdev->saving_migf->state;
mlx5vf_disable_fd(mvdev->saving_migf); mlx5vf_disable_fd(mvdev->saving_migf);
wake_up_interruptible(&mvdev->saving_migf->poll_wait); wake_up_interruptible(&mvdev->saving_migf->poll_wait);
mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
...@@ -1217,12 +1220,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -1217,12 +1220,34 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
return migf->filp; return migf->filp;
} }
if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || if (cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) {
(cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || mlx5vf_disable_fds(mvdev, NULL);
return NULL;
}
if ((cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
(cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
new == VFIO_DEVICE_STATE_RUNNING_P2P)) { new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
mlx5vf_disable_fds(mvdev); struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
return NULL; struct mlx5_vhca_data_buffer *buf;
enum mlx5_vf_migf_state state;
size_t size;
ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &size, NULL,
MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP);
if (ret)
return ERR_PTR(ret);
buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE);
if (IS_ERR(buf))
return ERR_CAST(buf);
/* pre_copy cleanup */
ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, false);
if (ret) {
mlx5vf_put_data_buffer(buf);
return ERR_PTR(ret);
}
mlx5vf_disable_fds(mvdev, &state);
return (state != MLX5_MIGF_STATE_ERROR) ? NULL : ERR_PTR(-EIO);
} }
if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) {
...@@ -1244,7 +1269,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -1244,7 +1269,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
} }
mlx5vf_disable_fds(mvdev); mlx5vf_disable_fds(mvdev, NULL);
return NULL; return NULL;
} }
...@@ -1289,7 +1314,7 @@ void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev) ...@@ -1289,7 +1314,7 @@ void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev)
mvdev->deferred_reset = false; mvdev->deferred_reset = false;
spin_unlock(&mvdev->reset_lock); spin_unlock(&mvdev->reset_lock);
mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING; mvdev->mig_state = VFIO_DEVICE_STATE_RUNNING;
mlx5vf_disable_fds(mvdev); mlx5vf_disable_fds(mvdev, NULL);
goto again; goto again;
} }
mutex_unlock(&mvdev->state_mutex); mutex_unlock(&mvdev->state_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment