Commit 821b8f6b authored by Yishai Hadas's avatar Yishai Hadas Committed by Alex Williamson

vfio/mlx5: Enforce PRE_COPY support

Enable live migration only once the firmware supports PRE_COPY.

PRE_COPY has been supported by the firmware for a long time already [1]
and is required to achieve a low downtime upon live migration.

This lets us clean up some old code that is not applicable those days
while PRE_COPY is fully supported by the firmware.

[1] The minimum firmware version that supports PRE_COPY is 28.36.1010,
it was released in January 2023.

No firmware without PRE_COPY support ever available to users.
Signed-off-by: default avatarYishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20240306105624.114830-1-yishaih@nvidia.comSigned-off-by: default avatarAlex Williamson <alex.williamson@redhat.com>
parent 626f534d
...@@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, ...@@ -233,6 +233,10 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
if (!MLX5_CAP_GEN(mvdev->mdev, migration)) if (!MLX5_CAP_GEN(mvdev->mdev, migration))
goto end; goto end;
if (!(MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)))
goto end;
mvdev->vf_id = pci_iov_vf_id(pdev); mvdev->vf_id = pci_iov_vf_id(pdev);
if (mvdev->vf_id < 0) if (mvdev->vf_id < 0)
goto end; goto end;
...@@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, ...@@ -262,17 +266,14 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
mvdev->migrate_cap = 1; mvdev->migrate_cap = 1;
mvdev->core_device.vdev.migration_flags = mvdev->core_device.vdev.migration_flags =
VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_STOP_COPY |
VFIO_MIGRATION_P2P; VFIO_MIGRATION_P2P |
VFIO_MIGRATION_PRE_COPY;
mvdev->core_device.vdev.mig_ops = mig_ops; mvdev->core_device.vdev.mig_ops = mig_ops;
init_completion(&mvdev->tracker_comp); init_completion(&mvdev->tracker_comp);
if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
mvdev->core_device.vdev.log_ops = log_ops; mvdev->core_device.vdev.log_ops = log_ops;
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
mvdev->core_device.vdev.migration_flags |=
VFIO_MIGRATION_PRE_COPY;
if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks)) if (MLX5_CAP_GEN_2(mvdev->mdev, migration_in_chunks))
mvdev->chunk_mode = 1; mvdev->chunk_mode = 1;
...@@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) ...@@ -414,6 +415,50 @@ void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
kfree(buf); kfree(buf);
} }
static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
unsigned int npages)
{
unsigned int to_alloc = npages;
struct page **page_list;
unsigned long filled;
unsigned int to_fill;
int ret;
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
if (!page_list)
return -ENOMEM;
do {
filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
page_list);
if (!filled) {
ret = -ENOMEM;
goto err;
}
to_alloc -= filled;
ret = sg_alloc_append_table_from_pages(
&buf->table, page_list, filled, 0,
filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
GFP_KERNEL_ACCOUNT);
if (ret)
goto err;
buf->allocated_length += filled * PAGE_SIZE;
/* clean input for another bulk allocation */
memset(page_list, 0, filled * sizeof(*page_list));
to_fill = min_t(unsigned int, to_alloc,
PAGE_SIZE / sizeof(*page_list));
} while (to_alloc > 0);
kvfree(page_list);
return 0;
err:
kvfree(page_list);
return ret;
}
struct mlx5_vhca_data_buffer * struct mlx5_vhca_data_buffer *
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
size_t length, size_t length,
...@@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, ...@@ -680,22 +725,20 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
goto err_out; goto err_out;
} }
if (MLX5VF_PRE_COPY_SUPP(mvdev)) { if (async_data->stop_copy_chunk) {
if (async_data->stop_copy_chunk) { u8 header_idx = buf->stop_copy_chunk_num ?
u8 header_idx = buf->stop_copy_chunk_num ? buf->stop_copy_chunk_num - 1 : 0;
buf->stop_copy_chunk_num - 1 : 0;
header_buf = migf->buf_header[header_idx]; header_buf = migf->buf_header[header_idx];
migf->buf_header[header_idx] = NULL; migf->buf_header[header_idx] = NULL;
} }
if (!header_buf) { if (!header_buf) {
header_buf = mlx5vf_get_data_buffer(migf, header_buf = mlx5vf_get_data_buffer(migf,
sizeof(struct mlx5_vf_migration_header), DMA_NONE); sizeof(struct mlx5_vf_migration_header), DMA_NONE);
if (IS_ERR(header_buf)) { if (IS_ERR(header_buf)) {
err = PTR_ERR(header_buf); err = PTR_ERR(header_buf);
goto err_free; goto err_free;
}
} }
} }
......
...@@ -13,9 +13,6 @@ ...@@ -13,9 +13,6 @@
#include <linux/mlx5/cq.h> #include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h> #include <linux/mlx5/qp.h>
#define MLX5VF_PRE_COPY_SUPP(mvdev) \
((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
enum mlx5_vf_migf_state { enum mlx5_vf_migf_state {
MLX5_MIGF_STATE_ERROR = 1, MLX5_MIGF_STATE_ERROR = 1,
MLX5_MIGF_STATE_PRE_COPY_ERROR, MLX5_MIGF_STATE_PRE_COPY_ERROR,
...@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state { ...@@ -25,7 +22,6 @@ enum mlx5_vf_migf_state {
}; };
enum mlx5_vf_load_state { enum mlx5_vf_load_state {
MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
MLX5_VF_LOAD_STATE_READ_HEADER, MLX5_VF_LOAD_STATE_READ_HEADER,
MLX5_VF_LOAD_STATE_PREP_HEADER_DATA, MLX5_VF_LOAD_STATE_PREP_HEADER_DATA,
MLX5_VF_LOAD_STATE_READ_HEADER_DATA, MLX5_VF_LOAD_STATE_READ_HEADER_DATA,
...@@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer * ...@@ -228,8 +224,6 @@ struct mlx5_vhca_data_buffer *
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
size_t length, enum dma_data_direction dma_dir); size_t length, enum dma_data_direction dma_dir);
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
unsigned int npages);
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
unsigned long offset); unsigned long offset);
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
......
...@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, ...@@ -65,50 +65,6 @@ mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
return NULL; return NULL;
} }
int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
unsigned int npages)
{
unsigned int to_alloc = npages;
struct page **page_list;
unsigned long filled;
unsigned int to_fill;
int ret;
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
if (!page_list)
return -ENOMEM;
do {
filled = alloc_pages_bulk_array(GFP_KERNEL_ACCOUNT, to_fill,
page_list);
if (!filled) {
ret = -ENOMEM;
goto err;
}
to_alloc -= filled;
ret = sg_alloc_append_table_from_pages(
&buf->table, page_list, filled, 0,
filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
GFP_KERNEL_ACCOUNT);
if (ret)
goto err;
buf->allocated_length += filled * PAGE_SIZE;
/* clean input for another bulk allocation */
memset(page_list, 0, filled * sizeof(*page_list));
to_fill = min_t(unsigned int, to_alloc,
PAGE_SIZE / sizeof(*page_list));
} while (to_alloc > 0);
kvfree(page_list);
return 0;
err:
kvfree(page_list);
return ret;
}
static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
{ {
mutex_lock(&migf->lock); mutex_lock(&migf->lock);
...@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, ...@@ -777,36 +733,6 @@ mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
return 0; return 0;
} }
static int
mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
loff_t requested_length,
const char __user **buf, size_t *len,
loff_t *pos, ssize_t *done)
{
int ret;
if (requested_length > MAX_LOAD_SIZE)
return -ENOMEM;
if (vhca_buf->allocated_length < requested_length) {
ret = mlx5vf_add_migration_pages(
vhca_buf,
DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
PAGE_SIZE));
if (ret)
return ret;
}
while (*len) {
ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
done);
if (ret)
return ret;
}
return 0;
}
static ssize_t static ssize_t
mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
struct mlx5_vhca_data_buffer *vhca_buf, struct mlx5_vhca_data_buffer *vhca_buf,
...@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, ...@@ -1038,13 +964,6 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
break; break;
} }
case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
ret = mlx5vf_resume_read_image_no_header(vhca_buf,
requested_length,
&buf, &len, pos, &done);
if (ret)
goto out_unlock;
break;
case MLX5_VF_LOAD_STATE_READ_IMAGE: case MLX5_VF_LOAD_STATE_READ_IMAGE:
ret = mlx5vf_resume_read_image(migf, vhca_buf, ret = mlx5vf_resume_read_image(migf, vhca_buf,
migf->record_size, migf->record_size,
...@@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) ...@@ -1114,21 +1033,16 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
} }
migf->buf[0] = buf; migf->buf[0] = buf;
if (MLX5VF_PRE_COPY_SUPP(mvdev)) { buf = mlx5vf_alloc_data_buffer(migf,
buf = mlx5vf_alloc_data_buffer(migf, sizeof(struct mlx5_vf_migration_header), DMA_NONE);
sizeof(struct mlx5_vf_migration_header), DMA_NONE); if (IS_ERR(buf)) {
if (IS_ERR(buf)) { ret = PTR_ERR(buf);
ret = PTR_ERR(buf); goto out_buf;
goto out_buf;
}
migf->buf_header[0] = buf;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
} else {
/* Initial state will be to read the image */
migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
} }
migf->buf_header[0] = buf;
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
stream_open(migf->filp->f_inode, migf->filp); stream_open(migf->filp->f_inode, migf->filp);
mutex_init(&migf->lock); mutex_init(&migf->lock);
INIT_LIST_HEAD(&migf->buf_list); INIT_LIST_HEAD(&migf->buf_list);
...@@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, ...@@ -1262,13 +1176,6 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
} }
if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
ret = mlx5vf_cmd_load_vhca_state(mvdev,
mvdev->resuming_migf,
mvdev->resuming_migf->buf[0]);
if (ret)
return ERR_PTR(ret);
}
mlx5vf_disable_fds(mvdev, NULL); mlx5vf_disable_fds(mvdev, NULL);
return NULL; return NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment