Commit 77fa2fbe authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
 "vhost,virtio,vdpa: features, fixes, cleanups.

  vdpa/mlx5:
   - VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK
   - new maintainer

  vdpa:
   - support for vq descriptor mappings
   - decouple reset of iotlb mapping from device reset

  and fixes, cleanups all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (34 commits)
  vdpa_sim: implement .reset_map support
  vdpa/mlx5: implement .reset_map driver op
  vhost-vdpa: clean iotlb map during reset for older userspace
  vdpa: introduce .compat_reset operation callback
  vhost-vdpa: introduce IOTLB_PERSIST backend feature bit
  vhost-vdpa: reset vendor specific mapping to initial state in .release
  vdpa: introduce .reset_map operation callback
  virtio_pci: add check for common cfg size
  virtio-blk: fix implicit overflow on virtio_max_dma_size
  virtio_pci: add build offset check for the new common cfg items
  virtio: add definition of VIRTIO_F_NOTIF_CONFIG_DATA feature bit
  vduse: make vduse_class constant
  vhost-scsi: Spelling s/preceeding/preceding/g
  virtio: kdoc for struct virtio_pci_modern_device
  vdpa: Update sysfs ABI documentation
  MAINTAINERS: Add myself as mlx5_vdpa driver
  virtio-balloon: correct the comment of virtballoon_migratepage()
  mlx5_vdpa: offer VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK
  vdpa/mlx5: Update cvq iotlb mapping on ASID change
  vdpa/mlx5: Make iotlb helper functions more generic
  ...
parents 1cfb7511 86f6c224
What: /sys/bus/vdpa/driver_autoprobe What: /sys/bus/vdpa/drivers_autoprobe
Date: March 2020 Date: March 2020
Contact: virtualization@lists.linux-foundation.org Contact: virtualization@lists.linux-foundation.org
Description: Description:
...@@ -17,7 +17,7 @@ Description: ...@@ -17,7 +17,7 @@ Description:
Writing a device name to this file will cause the kernel binds Writing a device name to this file will cause the kernel binds
devices to a compatible driver. devices to a compatible driver.
This can be useful when /sys/bus/vdpa/driver_autoprobe is This can be useful when /sys/bus/vdpa/drivers_autoprobe is
disabled. disabled.
What: /sys/bus/vdpa/drivers/.../bind What: /sys/bus/vdpa/drivers/.../bind
......
...@@ -13790,6 +13790,12 @@ F: drivers/infiniband/hw/mlx5/ ...@@ -13790,6 +13790,12 @@ F: drivers/infiniband/hw/mlx5/
F: include/linux/mlx5/ F: include/linux/mlx5/
F: include/uapi/rdma/mlx5-abi.h F: include/uapi/rdma/mlx5-abi.h
MELLANOX MLX5 VDPA DRIVER
M: Dragos Tatulea <dtatulea@nvidia.com>
L: virtualization@lists.linux-foundation.org
S: Supported
F: drivers/vdpa/mlx5/
MELLANOX MLXCPLD I2C AND MUX DRIVER MELLANOX MLXCPLD I2C AND MUX DRIVER
M: Vadim Pasternak <vadimp@nvidia.com> M: Vadim Pasternak <vadimp@nvidia.com>
M: Michael Shych <michaelsh@nvidia.com> M: Michael Shych <michaelsh@nvidia.com>
......
...@@ -1311,6 +1311,7 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -1311,6 +1311,7 @@ static int virtblk_probe(struct virtio_device *vdev)
u16 min_io_size; u16 min_io_size;
u8 physical_block_exp, alignment_offset; u8 physical_block_exp, alignment_offset;
unsigned int queue_depth; unsigned int queue_depth;
size_t max_dma_size;
if (!vdev->config->get) { if (!vdev->config->get) {
dev_err(&vdev->dev, "%s failure: config access disabled\n", dev_err(&vdev->dev, "%s failure: config access disabled\n",
...@@ -1409,7 +1410,8 @@ static int virtblk_probe(struct virtio_device *vdev) ...@@ -1409,7 +1410,8 @@ static int virtblk_probe(struct virtio_device *vdev)
/* No real sector limit. */ /* No real sector limit. */
blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_max_hw_sectors(q, UINT_MAX);
max_size = virtio_max_dma_size(vdev); max_dma_size = virtio_max_dma_size(vdev);
max_size = max_dma_size > U32_MAX ? U32_MAX : max_dma_size;
/* Host can optionally specify maximum segment size and number of /* Host can optionally specify maximum segment size and number of
* segments. */ * segments. */
......
...@@ -31,11 +31,9 @@ struct mlx5_vdpa_mr { ...@@ -31,11 +31,9 @@ struct mlx5_vdpa_mr {
struct list_head head; struct list_head head;
unsigned long num_directs; unsigned long num_directs;
unsigned long num_klms; unsigned long num_klms;
/* state of dvq mr */
bool initialized;
/* serialize mkey creation and destruction */ struct vhost_iotlb *iotlb;
struct mutex mkey_mtx;
bool user_mr; bool user_mr;
}; };
...@@ -74,11 +72,12 @@ struct mlx5_vdpa_wq_ent { ...@@ -74,11 +72,12 @@ struct mlx5_vdpa_wq_ent {
enum { enum {
MLX5_VDPA_DATAVQ_GROUP, MLX5_VDPA_DATAVQ_GROUP,
MLX5_VDPA_CVQ_GROUP, MLX5_VDPA_CVQ_GROUP,
MLX5_VDPA_DATAVQ_DESC_GROUP,
MLX5_VDPA_NUMVQ_GROUPS MLX5_VDPA_NUMVQ_GROUPS
}; };
enum { enum {
MLX5_VDPA_NUM_AS = MLX5_VDPA_NUMVQ_GROUPS MLX5_VDPA_NUM_AS = 2
}; };
struct mlx5_vdpa_dev { struct mlx5_vdpa_dev {
...@@ -93,7 +92,9 @@ struct mlx5_vdpa_dev { ...@@ -93,7 +92,9 @@ struct mlx5_vdpa_dev {
u16 max_idx; u16 max_idx;
u32 generation; u32 generation;
struct mlx5_vdpa_mr mr; struct mlx5_vdpa_mr *mr[MLX5_VDPA_NUM_AS];
/* serialize mr access */
struct mutex mr_mtx;
struct mlx5_control_vq cvq; struct mlx5_control_vq cvq;
struct workqueue_struct *wq; struct workqueue_struct *wq;
unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS]; unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
...@@ -114,12 +115,19 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev); ...@@ -114,12 +115,19 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev);
int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
int inlen); int inlen);
int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
bool *change_map, unsigned int asid); struct vhost_iotlb *iotlb);
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev);
unsigned int asid); void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev,
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); struct mlx5_vdpa_mr *mr);
void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid); void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *mr,
unsigned int asid);
int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb,
unsigned int asid);
int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev);
int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
#define mlx5_vdpa_warn(__dev, format, ...) \ #define mlx5_vdpa_warn(__dev, format, ...) \
dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \ dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \
......
...@@ -301,10 +301,13 @@ static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct ...@@ -301,10 +301,13 @@ static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
sg_free_table(&mr->sg_head); sg_free_table(&mr->sg_head);
} }
static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm, static int add_direct_chain(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *mr,
u64 start,
u64 size,
u8 perm,
struct vhost_iotlb *iotlb) struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n; struct mlx5_vdpa_direct_mr *n;
LIST_HEAD(tmp); LIST_HEAD(tmp);
...@@ -354,9 +357,10 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 ...@@ -354,9 +357,10 @@ static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8
* indirect memory key that provides access to the enitre address space given * indirect memory key that provides access to the enitre address space given
* by iotlb. * by iotlb.
*/ */
static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) static int create_user_mr(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *mr,
struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr;
struct mlx5_vdpa_direct_mr *dmr; struct mlx5_vdpa_direct_mr *dmr;
struct mlx5_vdpa_direct_mr *n; struct mlx5_vdpa_direct_mr *n;
struct vhost_iotlb_map *map; struct vhost_iotlb_map *map;
...@@ -384,7 +388,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb ...@@ -384,7 +388,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb
LOG_MAX_KLM_SIZE); LOG_MAX_KLM_SIZE);
mr->num_klms += nnuls; mr->num_klms += nnuls;
} }
err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
if (err) if (err)
goto err_chain; goto err_chain;
} }
...@@ -393,7 +397,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb ...@@ -393,7 +397,7 @@ static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb
pperm = map->perm; pperm = map->perm;
} }
} }
err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb); err = add_direct_chain(mvdev, mr, ps, pe - ps, pperm, iotlb);
if (err) if (err)
goto err_chain; goto err_chain;
...@@ -450,20 +454,23 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) ...@@ -450,20 +454,23 @@ static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
mlx5_vdpa_destroy_mkey(mvdev, mr->mkey); mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
} }
static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) static int dup_iotlb(struct vhost_iotlb *dst, struct vhost_iotlb *src)
{ {
struct vhost_iotlb_map *map; struct vhost_iotlb_map *map;
u64 start = 0, last = ULLONG_MAX; u64 start = 0, last = ULLONG_MAX;
int err; int err;
if (dst == src)
return -EINVAL;
if (!src) { if (!src) {
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW); err = vhost_iotlb_add_range(dst, start, last, start, VHOST_ACCESS_RW);
return err; return err;
} }
for (map = vhost_iotlb_itree_first(src, start, last); map; for (map = vhost_iotlb_itree_first(src, start, last); map;
map = vhost_iotlb_itree_next(map, start, last)) { map = vhost_iotlb_itree_next(map, start, last)) {
err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last, err = vhost_iotlb_add_range(dst, map->start, map->last,
map->addr, map->perm); map->addr, map->perm);
if (err) if (err)
return err; return err;
...@@ -471,9 +478,9 @@ static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src) ...@@ -471,9 +478,9 @@ static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
return 0; return 0;
} }
static void prune_iotlb(struct mlx5_vdpa_dev *mvdev) static void prune_iotlb(struct vhost_iotlb *iotlb)
{ {
vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX); vhost_iotlb_del_range(iotlb, 0, ULLONG_MAX);
} }
static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr) static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
...@@ -489,133 +496,169 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr ...@@ -489,133 +496,169 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
} }
} }
static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) static void _mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
{ {
if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) if (mr->user_mr)
return; destroy_user_mr(mvdev, mr);
else
destroy_dma_mr(mvdev, mr);
prune_iotlb(mvdev); vhost_iotlb_free(mr->iotlb);
} }
static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid) void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *mr)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr; if (!mr)
if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
return; return;
if (!mr->initialized) mutex_lock(&mvdev->mr_mtx);
return;
if (mr->user_mr) _mlx5_vdpa_destroy_mr(mvdev, mr);
destroy_user_mr(mvdev, mr);
else for (int i = 0; i < MLX5_VDPA_NUM_AS; i++) {
destroy_dma_mr(mvdev, mr); if (mvdev->mr[i] == mr)
mvdev->mr[i] = NULL;
}
mr->initialized = false; mutex_unlock(&mvdev->mr_mtx);
kfree(mr);
} }
void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid) void mlx5_vdpa_update_mr(struct mlx5_vdpa_dev *mvdev,
struct mlx5_vdpa_mr *new_mr,
unsigned int asid)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_mr *old_mr = mvdev->mr[asid];
mutex_lock(&mr->mkey_mtx); mutex_lock(&mvdev->mr_mtx);
_mlx5_vdpa_destroy_dvq_mr(mvdev, asid); mvdev->mr[asid] = new_mr;
_mlx5_vdpa_destroy_cvq_mr(mvdev, asid); if (old_mr) {
_mlx5_vdpa_destroy_mr(mvdev, old_mr);
kfree(old_mr);
}
mutex_unlock(&mr->mkey_mtx); mutex_unlock(&mvdev->mr_mtx);
}
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
{
mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
} }
static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev, void mlx5_vdpa_destroy_mr_resources(struct mlx5_vdpa_dev *mvdev)
struct vhost_iotlb *iotlb,
unsigned int asid)
{ {
if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid) for (int i = 0; i < MLX5_VDPA_NUM_AS; i++)
return 0; mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[i]);
return dup_iotlb(mvdev, iotlb); prune_iotlb(mvdev->cvq.iotlb);
} }
static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev, static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb, struct mlx5_vdpa_mr *mr,
unsigned int asid) struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err; int err;
if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
return 0;
if (mr->initialized)
return 0;
if (iotlb) if (iotlb)
err = create_user_mr(mvdev, iotlb); err = create_user_mr(mvdev, mr, iotlb);
else else
err = create_dma_mr(mvdev, mr); err = create_dma_mr(mvdev, mr);
if (err) if (err)
return err; return err;
mr->initialized = true; mr->iotlb = vhost_iotlb_alloc(0, 0);
if (!mr->iotlb) {
err = -ENOMEM;
goto err_mr;
}
err = dup_iotlb(mr->iotlb, iotlb);
if (err)
goto err_iotlb;
return 0; return 0;
err_iotlb:
vhost_iotlb_free(mr->iotlb);
err_mr:
if (iotlb)
destroy_user_mr(mvdev, mr);
else
destroy_dma_mr(mvdev, mr);
return err;
} }
static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
struct vhost_iotlb *iotlb, unsigned int asid) struct vhost_iotlb *iotlb)
{ {
struct mlx5_vdpa_mr *mr;
int err; int err;
err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid); mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (err) if (!mr)
return err; return ERR_PTR(-ENOMEM);
mutex_lock(&mvdev->mr_mtx);
err = _mlx5_vdpa_create_mr(mvdev, mr, iotlb);
mutex_unlock(&mvdev->mr_mtx);
err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
if (err) if (err)
goto out_err; goto out_err;
return 0; return mr;
out_err: out_err:
_mlx5_vdpa_destroy_dvq_mr(mvdev, asid); kfree(mr);
return ERR_PTR(err);
return err;
} }
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, int mlx5_vdpa_update_cvq_iotlb(struct mlx5_vdpa_dev *mvdev,
unsigned int asid) struct vhost_iotlb *iotlb,
unsigned int asid)
{ {
int err; int err;
mutex_lock(&mvdev->mr.mkey_mtx); if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); return 0;
mutex_unlock(&mvdev->mr.mkey_mtx);
spin_lock(&mvdev->cvq.iommu_lock);
prune_iotlb(mvdev->cvq.iotlb);
err = dup_iotlb(mvdev->cvq.iotlb, iotlb);
spin_unlock(&mvdev->cvq.iommu_lock);
return err; return err;
} }
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, int mlx5_vdpa_create_dma_mr(struct mlx5_vdpa_dev *mvdev)
bool *change_map, unsigned int asid)
{ {
struct mlx5_vdpa_mr *mr = &mvdev->mr; struct mlx5_vdpa_mr *mr;
int err = 0;
mr = mlx5_vdpa_create_mr(mvdev, NULL);
if (IS_ERR(mr))
return PTR_ERR(mr);
*change_map = false; mlx5_vdpa_update_mr(mvdev, mr, 0);
mutex_lock(&mr->mkey_mtx);
if (mr->initialized) { return mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, 0);
mlx5_vdpa_info(mvdev, "memory map update\n"); }
*change_map = true;
int mlx5_vdpa_reset_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
{
if (asid >= MLX5_VDPA_NUM_AS)
return -EINVAL;
mlx5_vdpa_destroy_mr(mvdev, mvdev->mr[asid]);
if (asid == 0 && MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
if (mlx5_vdpa_create_dma_mr(mvdev))
mlx5_vdpa_warn(mvdev, "create DMA MR failed\n");
} else {
mlx5_vdpa_update_cvq_iotlb(mvdev, NULL, asid);
} }
if (!*change_map)
err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
mutex_unlock(&mr->mkey_mtx);
return err; return 0;
} }
...@@ -256,7 +256,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -256,7 +256,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
mlx5_vdpa_warn(mvdev, "resources already allocated\n"); mlx5_vdpa_warn(mvdev, "resources already allocated\n");
return -EINVAL; return -EINVAL;
} }
mutex_init(&mvdev->mr.mkey_mtx); mutex_init(&mvdev->mr_mtx);
res->uar = mlx5_get_uars_page(mdev); res->uar = mlx5_get_uars_page(mdev);
if (IS_ERR(res->uar)) { if (IS_ERR(res->uar)) {
err = PTR_ERR(res->uar); err = PTR_ERR(res->uar);
...@@ -301,7 +301,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -301,7 +301,7 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
err_uctx: err_uctx:
mlx5_put_uars_page(mdev, res->uar); mlx5_put_uars_page(mdev, res->uar);
err_uars: err_uars:
mutex_destroy(&mvdev->mr.mkey_mtx); mutex_destroy(&mvdev->mr_mtx);
return err; return err;
} }
...@@ -318,6 +318,6 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev) ...@@ -318,6 +318,6 @@ void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev)
dealloc_pd(mvdev, res->pdn, res->uid); dealloc_pd(mvdev, res->pdn, res->uid);
destroy_uctx(mvdev, res->uid); destroy_uctx(mvdev, res->uid);
mlx5_put_uars_page(mvdev->mdev, res->uar); mlx5_put_uars_page(mvdev->mdev, res->uar);
mutex_destroy(&mvdev->mr.mkey_mtx); mutex_destroy(&mvdev->mr_mtx);
res->valid = false; res->valid = false;
} }
This diff is collapsed.
...@@ -139,7 +139,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim, ...@@ -139,7 +139,7 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim,
vq->vring.notify = NULL; vq->vring.notify = NULL;
} }
static void vdpasim_do_reset(struct vdpasim *vdpasim) static void vdpasim_do_reset(struct vdpasim *vdpasim, u32 flags)
{ {
int i; int i;
...@@ -151,11 +151,13 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim) ...@@ -151,11 +151,13 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
&vdpasim->iommu_lock); &vdpasim->iommu_lock);
} }
for (i = 0; i < vdpasim->dev_attr.nas; i++) { if (flags & VDPA_RESET_F_CLEAN_MAP) {
vhost_iotlb_reset(&vdpasim->iommu[i]); for (i = 0; i < vdpasim->dev_attr.nas; i++) {
vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX, vhost_iotlb_reset(&vdpasim->iommu[i]);
0, VHOST_MAP_RW); vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX,
vdpasim->iommu_pt[i] = true; 0, VHOST_MAP_RW);
vdpasim->iommu_pt[i] = true;
}
} }
vdpasim->running = true; vdpasim->running = true;
...@@ -259,8 +261,12 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr, ...@@ -259,8 +261,12 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
if (!vdpasim->iommu_pt) if (!vdpasim->iommu_pt)
goto err_iommu; goto err_iommu;
for (i = 0; i < vdpasim->dev_attr.nas; i++) for (i = 0; i < vdpasim->dev_attr.nas; i++) {
vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0); vhost_iotlb_init(&vdpasim->iommu[i], max_iotlb_entries, 0);
vhost_iotlb_add_range(&vdpasim->iommu[i], 0, ULONG_MAX, 0,
VHOST_MAP_RW);
vdpasim->iommu_pt[i] = true;
}
for (i = 0; i < dev_attr->nvqs; i++) for (i = 0; i < dev_attr->nvqs; i++)
vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0], vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
...@@ -480,18 +486,23 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) ...@@ -480,18 +486,23 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
mutex_unlock(&vdpasim->mutex); mutex_unlock(&vdpasim->mutex);
} }
static int vdpasim_reset(struct vdpa_device *vdpa) static int vdpasim_compat_reset(struct vdpa_device *vdpa, u32 flags)
{ {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
mutex_lock(&vdpasim->mutex); mutex_lock(&vdpasim->mutex);
vdpasim->status = 0; vdpasim->status = 0;
vdpasim_do_reset(vdpasim); vdpasim_do_reset(vdpasim, flags);
mutex_unlock(&vdpasim->mutex); mutex_unlock(&vdpasim->mutex);
return 0; return 0;
} }
static int vdpasim_reset(struct vdpa_device *vdpa)
{
return vdpasim_compat_reset(vdpa, 0);
}
static int vdpasim_suspend(struct vdpa_device *vdpa) static int vdpasim_suspend(struct vdpa_device *vdpa)
{ {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
...@@ -637,6 +648,25 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid, ...@@ -637,6 +648,25 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
return ret; return ret;
} }
static int vdpasim_reset_map(struct vdpa_device *vdpa, unsigned int asid)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
if (asid >= vdpasim->dev_attr.nas)
return -EINVAL;
spin_lock(&vdpasim->iommu_lock);
if (vdpasim->iommu_pt[asid])
goto out;
vhost_iotlb_reset(&vdpasim->iommu[asid]);
vhost_iotlb_add_range(&vdpasim->iommu[asid], 0, ULONG_MAX,
0, VHOST_MAP_RW);
vdpasim->iommu_pt[asid] = true;
out:
spin_unlock(&vdpasim->iommu_lock);
return 0;
}
static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm) static int vdpasim_bind_mm(struct vdpa_device *vdpa, struct mm_struct *mm)
{ {
struct vdpasim *vdpasim = vdpa_to_sim(vdpa); struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
...@@ -749,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { ...@@ -749,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.get_status = vdpasim_get_status, .get_status = vdpasim_get_status,
.set_status = vdpasim_set_status, .set_status = vdpasim_set_status,
.reset = vdpasim_reset, .reset = vdpasim_reset,
.compat_reset = vdpasim_compat_reset,
.suspend = vdpasim_suspend, .suspend = vdpasim_suspend,
.resume = vdpasim_resume, .resume = vdpasim_resume,
.get_config_size = vdpasim_get_config_size, .get_config_size = vdpasim_get_config_size,
...@@ -759,6 +790,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = { ...@@ -759,6 +790,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
.set_group_asid = vdpasim_set_group_asid, .set_group_asid = vdpasim_set_group_asid,
.dma_map = vdpasim_dma_map, .dma_map = vdpasim_dma_map,
.dma_unmap = vdpasim_dma_unmap, .dma_unmap = vdpasim_dma_unmap,
.reset_map = vdpasim_reset_map,
.bind_mm = vdpasim_bind_mm, .bind_mm = vdpasim_bind_mm,
.unbind_mm = vdpasim_unbind_mm, .unbind_mm = vdpasim_unbind_mm,
.free = vdpasim_free, .free = vdpasim_free,
...@@ -787,6 +819,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { ...@@ -787,6 +819,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_status = vdpasim_get_status, .get_status = vdpasim_get_status,
.set_status = vdpasim_set_status, .set_status = vdpasim_set_status,
.reset = vdpasim_reset, .reset = vdpasim_reset,
.compat_reset = vdpasim_compat_reset,
.suspend = vdpasim_suspend, .suspend = vdpasim_suspend,
.resume = vdpasim_resume, .resume = vdpasim_resume,
.get_config_size = vdpasim_get_config_size, .get_config_size = vdpasim_get_config_size,
...@@ -796,6 +829,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { ...@@ -796,6 +829,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
.get_iova_range = vdpasim_get_iova_range, .get_iova_range = vdpasim_get_iova_range,
.set_group_asid = vdpasim_set_group_asid, .set_group_asid = vdpasim_set_group_asid,
.set_map = vdpasim_set_map, .set_map = vdpasim_set_map,
.reset_map = vdpasim_reset_map,
.bind_mm = vdpasim_bind_mm, .bind_mm = vdpasim_bind_mm,
.unbind_mm = vdpasim_unbind_mm, .unbind_mm = vdpasim_unbind_mm,
.free = vdpasim_free, .free = vdpasim_free,
......
...@@ -134,7 +134,6 @@ static DEFINE_MUTEX(vduse_lock); ...@@ -134,7 +134,6 @@ static DEFINE_MUTEX(vduse_lock);
static DEFINE_IDR(vduse_idr); static DEFINE_IDR(vduse_idr);
static dev_t vduse_major; static dev_t vduse_major;
static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev; static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev; static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq; static struct workqueue_struct *vduse_irq_wq;
...@@ -1528,6 +1527,16 @@ static const struct kobj_type vq_type = { ...@@ -1528,6 +1527,16 @@ static const struct kobj_type vq_type = {
.default_groups = vq_groups, .default_groups = vq_groups,
}; };
static char *vduse_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
}
static const struct class vduse_class = {
.name = "vduse",
.devnode = vduse_devnode,
};
static void vduse_dev_deinit_vqs(struct vduse_dev *dev) static void vduse_dev_deinit_vqs(struct vduse_dev *dev)
{ {
int i; int i;
...@@ -1638,7 +1647,7 @@ static int vduse_destroy_dev(char *name) ...@@ -1638,7 +1647,7 @@ static int vduse_destroy_dev(char *name)
mutex_unlock(&dev->lock); mutex_unlock(&dev->lock);
vduse_dev_reset(dev); vduse_dev_reset(dev);
device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
idr_remove(&vduse_idr, dev->minor); idr_remove(&vduse_idr, dev->minor);
kvfree(dev->config); kvfree(dev->config);
vduse_dev_deinit_vqs(dev); vduse_dev_deinit_vqs(dev);
...@@ -1805,7 +1814,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, ...@@ -1805,7 +1814,7 @@ static int vduse_create_dev(struct vduse_dev_config *config,
dev->minor = ret; dev->minor = ret;
dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
dev->dev = device_create_with_groups(vduse_class, NULL, dev->dev = device_create_with_groups(&vduse_class, NULL,
MKDEV(MAJOR(vduse_major), dev->minor), MKDEV(MAJOR(vduse_major), dev->minor),
dev, vduse_dev_groups, "%s", config->name); dev, vduse_dev_groups, "%s", config->name);
if (IS_ERR(dev->dev)) { if (IS_ERR(dev->dev)) {
...@@ -1821,7 +1830,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, ...@@ -1821,7 +1830,7 @@ static int vduse_create_dev(struct vduse_dev_config *config,
return 0; return 0;
err_vqs: err_vqs:
device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); device_destroy(&vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
err_dev: err_dev:
idr_remove(&vduse_idr, dev->minor); idr_remove(&vduse_idr, dev->minor);
err_idr: err_idr:
...@@ -1934,11 +1943,6 @@ static const struct file_operations vduse_ctrl_fops = { ...@@ -1934,11 +1943,6 @@ static const struct file_operations vduse_ctrl_fops = {
.llseek = noop_llseek, .llseek = noop_llseek,
}; };
static char *vduse_devnode(const struct device *dev, umode_t *mode)
{
return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
}
struct vduse_mgmt_dev { struct vduse_mgmt_dev {
struct vdpa_mgmt_dev mgmt_dev; struct vdpa_mgmt_dev mgmt_dev;
struct device dev; struct device dev;
...@@ -2082,11 +2086,9 @@ static int vduse_init(void) ...@@ -2082,11 +2086,9 @@ static int vduse_init(void)
int ret; int ret;
struct device *dev; struct device *dev;
vduse_class = class_create("vduse"); ret = class_register(&vduse_class);
if (IS_ERR(vduse_class)) if (ret)
return PTR_ERR(vduse_class); return ret;
vduse_class->devnode = vduse_devnode;
ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
if (ret) if (ret)
...@@ -2099,7 +2101,7 @@ static int vduse_init(void) ...@@ -2099,7 +2101,7 @@ static int vduse_init(void)
if (ret) if (ret)
goto err_ctrl_cdev; goto err_ctrl_cdev;
dev = device_create(vduse_class, NULL, vduse_major, NULL, "control"); dev = device_create(&vduse_class, NULL, vduse_major, NULL, "control");
if (IS_ERR(dev)) { if (IS_ERR(dev)) {
ret = PTR_ERR(dev); ret = PTR_ERR(dev);
goto err_device; goto err_device;
...@@ -2141,13 +2143,13 @@ static int vduse_init(void) ...@@ -2141,13 +2143,13 @@ static int vduse_init(void)
err_wq: err_wq:
cdev_del(&vduse_cdev); cdev_del(&vduse_cdev);
err_cdev: err_cdev:
device_destroy(vduse_class, vduse_major); device_destroy(&vduse_class, vduse_major);
err_device: err_device:
cdev_del(&vduse_ctrl_cdev); cdev_del(&vduse_ctrl_cdev);
err_ctrl_cdev: err_ctrl_cdev:
unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
err_chardev_region: err_chardev_region:
class_destroy(vduse_class); class_unregister(&vduse_class);
return ret; return ret;
} }
module_init(vduse_init); module_init(vduse_init);
...@@ -2159,10 +2161,10 @@ static void vduse_exit(void) ...@@ -2159,10 +2161,10 @@ static void vduse_exit(void)
destroy_workqueue(vduse_irq_bound_wq); destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq); destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev); cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major); device_destroy(&vduse_class, vduse_major);
cdev_del(&vduse_ctrl_cdev); cdev_del(&vduse_ctrl_cdev);
unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX); unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
class_destroy(vduse_class); class_unregister(&vduse_class);
} }
module_exit(vduse_exit); module_exit(vduse_exit);
......
...@@ -1158,7 +1158,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) ...@@ -1158,7 +1158,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
/* /*
* Set prot_iter to data_iter and truncate it to * Set prot_iter to data_iter and truncate it to
* prot_bytes, and advance data_iter past any * prot_bytes, and advance data_iter past any
* preceeding prot_bytes that may be present. * preceding prot_bytes that may be present.
* *
* Also fix up the exp_data_len to reflect only the * Also fix up the exp_data_len to reflect only the
* actual data payload length. * actual data payload length.
......
...@@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v, ...@@ -131,6 +131,15 @@ static struct vhost_vdpa_as *vhost_vdpa_find_alloc_as(struct vhost_vdpa *v,
return vhost_vdpa_alloc_as(v, asid); return vhost_vdpa_alloc_as(v, asid);
} }
static void vhost_vdpa_reset_map(struct vhost_vdpa *v, u32 asid)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (ops->reset_map)
ops->reset_map(vdpa, asid);
}
static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
{ {
struct vhost_vdpa_as *as = asid_to_as(v, asid); struct vhost_vdpa_as *as = asid_to_as(v, asid);
...@@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) ...@@ -140,6 +149,14 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid)
hlist_del(&as->hash_link); hlist_del(&as->hash_link);
vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid);
/*
* Devices with vendor specific IOMMU may need to restore
* iotlb to the initial or default state, which cannot be
* cleaned up in the all range unmap call above. Give them
* a chance to clean up or reset the map to the desired
* state.
*/
vhost_vdpa_reset_map(v, asid);
kfree(as); kfree(as);
return 0; return 0;
...@@ -210,13 +227,24 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) ...@@ -210,13 +227,24 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
irq_bypass_unregister_producer(&vq->call_ctx.producer); irq_bypass_unregister_producer(&vq->call_ctx.producer);
} }
static int vhost_vdpa_reset(struct vhost_vdpa *v) static int _compat_vdpa_reset(struct vhost_vdpa *v)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
u32 flags = 0;
v->in_batch = 0; if (v->vdev.vqs) {
flags |= !vhost_backend_has_feature(v->vdev.vqs[0],
VHOST_BACKEND_F_IOTLB_PERSIST) ?
VDPA_RESET_F_CLEAN_MAP : 0;
}
return vdpa_reset(vdpa); return vdpa_reset(vdpa, flags);
}
static int vhost_vdpa_reset(struct vhost_vdpa *v)
{
v->in_batch = 0;
return _compat_vdpa_reset(v);
} }
static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) static long vhost_vdpa_bind_mm(struct vhost_vdpa *v)
...@@ -295,7 +323,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) ...@@ -295,7 +323,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
vhost_vdpa_unsetup_vq_irq(v, i); vhost_vdpa_unsetup_vq_irq(v, i);
if (status == 0) { if (status == 0) {
ret = vdpa_reset(vdpa); ret = _compat_vdpa_reset(v);
if (ret) if (ret)
return ret; return ret;
} else } else
...@@ -389,6 +417,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v) ...@@ -389,6 +417,14 @@ static bool vhost_vdpa_can_resume(const struct vhost_vdpa *v)
return ops->resume; return ops->resume;
} }
static bool vhost_vdpa_has_desc_group(const struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
return ops->get_vq_desc_group;
}
static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
...@@ -414,6 +450,15 @@ static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v) ...@@ -414,6 +450,15 @@ static u64 vhost_vdpa_get_backend_features(const struct vhost_vdpa *v)
return ops->get_backend_features(vdpa); return ops->get_backend_features(vdpa);
} }
static bool vhost_vdpa_has_persistent_map(const struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
return (!ops->set_map && !ops->dma_map) || ops->reset_map ||
vhost_vdpa_get_backend_features(v) & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
}
static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
...@@ -605,6 +650,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -605,6 +650,16 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
else if (copy_to_user(argp, &s, sizeof(s))) else if (copy_to_user(argp, &s, sizeof(s)))
return -EFAULT; return -EFAULT;
return 0; return 0;
case VHOST_VDPA_GET_VRING_DESC_GROUP:
if (!vhost_vdpa_has_desc_group(v))
return -EOPNOTSUPP;
s.index = idx;
s.num = ops->get_vq_desc_group(vdpa, idx);
if (s.num >= vdpa->ngroups)
return -EIO;
else if (copy_to_user(argp, &s, sizeof(s)))
return -EFAULT;
return 0;
case VHOST_VDPA_SET_GROUP_ASID: case VHOST_VDPA_SET_GROUP_ASID:
if (copy_from_user(&s, argp, sizeof(s))) if (copy_from_user(&s, argp, sizeof(s)))
return -EFAULT; return -EFAULT;
...@@ -690,6 +745,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -690,6 +745,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if (copy_from_user(&features, featurep, sizeof(features))) if (copy_from_user(&features, featurep, sizeof(features)))
return -EFAULT; return -EFAULT;
if (features & ~(VHOST_VDPA_BACKEND_FEATURES | if (features & ~(VHOST_VDPA_BACKEND_FEATURES |
BIT_ULL(VHOST_BACKEND_F_DESC_ASID) |
BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST) |
BIT_ULL(VHOST_BACKEND_F_SUSPEND) | BIT_ULL(VHOST_BACKEND_F_SUSPEND) |
BIT_ULL(VHOST_BACKEND_F_RESUME) | BIT_ULL(VHOST_BACKEND_F_RESUME) |
BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK)))
...@@ -700,6 +757,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -700,6 +757,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) && if ((features & BIT_ULL(VHOST_BACKEND_F_RESUME)) &&
!vhost_vdpa_can_resume(v)) !vhost_vdpa_can_resume(v))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
!(features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)))
return -EINVAL;
if ((features & BIT_ULL(VHOST_BACKEND_F_DESC_ASID)) &&
!vhost_vdpa_has_desc_group(v))
return -EOPNOTSUPP;
if ((features & BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST)) &&
!vhost_vdpa_has_persistent_map(v))
return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features); vhost_set_backend_features(&v->vdev, features);
return 0; return 0;
} }
...@@ -753,6 +819,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -753,6 +819,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND); features |= BIT_ULL(VHOST_BACKEND_F_SUSPEND);
if (vhost_vdpa_can_resume(v)) if (vhost_vdpa_can_resume(v))
features |= BIT_ULL(VHOST_BACKEND_F_RESUME); features |= BIT_ULL(VHOST_BACKEND_F_RESUME);
if (vhost_vdpa_has_desc_group(v))
features |= BIT_ULL(VHOST_BACKEND_F_DESC_ASID);
if (vhost_vdpa_has_persistent_map(v))
features |= BIT_ULL(VHOST_BACKEND_F_IOTLB_PERSIST);
features |= vhost_vdpa_get_backend_features(v); features |= vhost_vdpa_get_backend_features(v);
if (copy_to_user(featurep, &features, sizeof(features))) if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT; r = -EFAULT;
...@@ -1285,6 +1355,7 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) ...@@ -1285,6 +1355,7 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
vhost_vdpa_free_domain(v); vhost_vdpa_free_domain(v);
vhost_dev_cleanup(&v->vdev); vhost_dev_cleanup(&v->vdev);
kfree(v->vdev.vqs); kfree(v->vdev.vqs);
v->vdev.vqs = NULL;
} }
static int vhost_vdpa_open(struct inode *inode, struct file *filep) static int vhost_vdpa_open(struct inode *inode, struct file *filep)
......
...@@ -745,7 +745,7 @@ static void report_free_page_func(struct work_struct *work) ...@@ -745,7 +745,7 @@ static void report_free_page_func(struct work_struct *work)
* 2) update the host about the old page removed from vb->pages list; * 2) update the host about the old page removed from vb->pages list;
* *
* This function preforms the balloon page migration task. * This function preforms the balloon page migration task.
* Called through balloon_mapping->a_ops->migratepage * Called through movable_operations->migrate_page
*/ */
static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
struct page *newpage, struct page *page, enum migrate_mode mode) struct page *newpage, struct page *page, enum migrate_mode mode)
......
...@@ -39,6 +39,39 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features) ...@@ -39,6 +39,39 @@ static void vp_transport_features(struct virtio_device *vdev, u64 features)
__virtio_set_bit(vdev, VIRTIO_F_RING_RESET); __virtio_set_bit(vdev, VIRTIO_F_RING_RESET);
} }
static int __vp_check_common_size_one_feature(struct virtio_device *vdev, u32 fbit,
u32 offset, const char *fname)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
if (!__virtio_test_bit(vdev, fbit))
return 0;
if (likely(vp_dev->mdev.common_len >= offset))
return 0;
dev_err(&vdev->dev,
"virtio: common cfg size(%zu) does not match the feature %s\n",
vp_dev->mdev.common_len, fname);
return -EINVAL;
}
#define vp_check_common_size_one_feature(vdev, fbit, field) \
__vp_check_common_size_one_feature(vdev, fbit, \
offsetofend(struct virtio_pci_modern_common_cfg, field), #fbit)
static int vp_check_common_size(struct virtio_device *vdev)
{
if (vp_check_common_size_one_feature(vdev, VIRTIO_F_NOTIF_CONFIG_DATA, queue_notify_data))
return -EINVAL;
if (vp_check_common_size_one_feature(vdev, VIRTIO_F_RING_RESET, queue_reset))
return -EINVAL;
return 0;
}
/* virtio config->finalize_features() implementation */ /* virtio config->finalize_features() implementation */
static int vp_finalize_features(struct virtio_device *vdev) static int vp_finalize_features(struct virtio_device *vdev)
{ {
...@@ -57,6 +90,9 @@ static int vp_finalize_features(struct virtio_device *vdev) ...@@ -57,6 +90,9 @@ static int vp_finalize_features(struct virtio_device *vdev)
return -EINVAL; return -EINVAL;
} }
if (vp_check_common_size(vdev))
return -EINVAL;
vp_modern_set_features(&vp_dev->mdev, vdev->features); vp_modern_set_features(&vp_dev->mdev, vdev->features);
return 0; return 0;
......
...@@ -203,6 +203,10 @@ static inline void check_offsets(void) ...@@ -203,6 +203,10 @@ static inline void check_offsets(void)
offsetof(struct virtio_pci_common_cfg, queue_used_lo)); offsetof(struct virtio_pci_common_cfg, queue_used_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI != BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
offsetof(struct virtio_pci_common_cfg, queue_used_hi)); offsetof(struct virtio_pci_common_cfg, queue_used_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NDATA !=
offsetof(struct virtio_pci_modern_common_cfg, queue_notify_data));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_RESET !=
offsetof(struct virtio_pci_modern_common_cfg, queue_reset));
} }
/* /*
...@@ -292,7 +296,7 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) ...@@ -292,7 +296,7 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev)
mdev->common = vp_modern_map_capability(mdev, common, mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4, sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_modern_common_cfg), 0, sizeof(struct virtio_pci_modern_common_cfg),
NULL, NULL); &mdev->common_len, NULL);
if (!mdev->common) if (!mdev->common)
goto err_map_common; goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1, mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
......
...@@ -100,7 +100,7 @@ static void virtio_vdpa_reset(struct virtio_device *vdev) ...@@ -100,7 +100,7 @@ static void virtio_vdpa_reset(struct virtio_device *vdev)
{ {
struct vdpa_device *vdpa = vd_get_vdpa(vdev); struct vdpa_device *vdpa = vd_get_vdpa(vdev);
vdpa_reset(vdpa); vdpa_reset(vdpa, 0);
} }
static bool virtio_vdpa_notify(struct virtqueue *vq) static bool virtio_vdpa_notify(struct virtqueue *vq)
......
...@@ -1232,7 +1232,13 @@ struct mlx5_ifc_virtio_emulation_cap_bits { ...@@ -1232,7 +1232,13 @@ struct mlx5_ifc_virtio_emulation_cap_bits {
u8 max_emulated_devices[0x8]; u8 max_emulated_devices[0x8];
u8 max_num_virtio_queues[0x18]; u8 max_num_virtio_queues[0x18];
u8 reserved_at_a0[0x60]; u8 reserved_at_a0[0x20];
u8 reserved_at_c0[0x13];
u8 desc_group_mkey_supported[0x1];
u8 reserved_at_d4[0xc];
u8 reserved_at_e0[0x20];
u8 umem_1_buffer_param_a[0x20]; u8 umem_1_buffer_param_a[0x20];
......
...@@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits { ...@@ -74,7 +74,11 @@ struct mlx5_ifc_virtio_q_bits {
u8 reserved_at_320[0x8]; u8 reserved_at_320[0x8];
u8 pd[0x18]; u8 pd[0x18];
u8 reserved_at_340[0xc0]; u8 reserved_at_340[0x20];
u8 desc_group_mkey[0x20];
u8 reserved_at_380[0x80];
}; };
struct mlx5_ifc_virtio_net_q_object_bits { struct mlx5_ifc_virtio_net_q_object_bits {
...@@ -141,6 +145,7 @@ enum { ...@@ -141,6 +145,7 @@ enum {
MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0, MLX5_VIRTQ_MODIFY_MASK_STATE = (u64)1 << 0,
MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_PARAMS = (u64)1 << 3,
MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4, MLX5_VIRTQ_MODIFY_MASK_DIRTY_BITMAP_DUMP_ENABLE = (u64)1 << 4,
MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY = (u64)1 << 14,
}; };
enum { enum {
......
...@@ -204,6 +204,16 @@ struct vdpa_map_file { ...@@ -204,6 +204,16 @@ struct vdpa_map_file {
* @vdev: vdpa device * @vdev: vdpa device
* @idx: virtqueue index * @idx: virtqueue index
* Returns u32: group id for this virtqueue * Returns u32: group id for this virtqueue
* @get_vq_desc_group: Get the group id for the descriptor table of
* a specific virtqueue (optional)
* @vdev: vdpa device
* @idx: virtqueue index
* Returns u32: group id for the descriptor table
* portion of this virtqueue. Could be different
* than the one from @get_vq_group, in which case
* the access to the descriptor table can be
* confined to a separate asid, isolating from
* the virtqueue's buffer address access.
* @get_device_features: Get virtio features supported by the device * @get_device_features: Get virtio features supported by the device
* @vdev: vdpa device * @vdev: vdpa device
* Returns the virtio features support by the * Returns the virtio features support by the
...@@ -242,6 +252,17 @@ struct vdpa_map_file { ...@@ -242,6 +252,17 @@ struct vdpa_map_file {
* @reset: Reset device * @reset: Reset device
* @vdev: vdpa device * @vdev: vdpa device
* Returns integer: success (0) or error (< 0) * Returns integer: success (0) or error (< 0)
* @compat_reset: Reset device with compatibility quirks to
* accommodate older userspace. Only needed by
* parent driver which used to have bogus reset
* behaviour, and has to maintain such behaviour
* for compatibility with older userspace.
* Historically compliant driver only has to
* implement .reset, Historically non-compliant
* driver should implement both.
* @vdev: vdpa device
* @flags: compatibility quirks for reset
* Returns integer: success (0) or error (< 0)
* @suspend: Suspend the device (optional) * @suspend: Suspend the device (optional)
* @vdev: vdpa device * @vdev: vdpa device
* Returns integer: success (0) or error (< 0) * Returns integer: success (0) or error (< 0)
...@@ -317,6 +338,15 @@ struct vdpa_map_file { ...@@ -317,6 +338,15 @@ struct vdpa_map_file {
* @iova: iova to be unmapped * @iova: iova to be unmapped
* @size: size of the area * @size: size of the area
* Returns integer: success (0) or error (< 0) * Returns integer: success (0) or error (< 0)
* @reset_map: Reset device memory mapping to the default
* state (optional)
* Needed for devices that are using device
* specific DMA translation and prefer mapping
* to be decoupled from the virtio life cycle,
* i.e. device .reset op does not reset mapping
* @vdev: vdpa device
* @asid: address space identifier
* Returns integer: success (0) or error (< 0)
* @get_vq_dma_dev: Get the dma device for a specific * @get_vq_dma_dev: Get the dma device for a specific
* virtqueue (optional) * virtqueue (optional)
* @vdev: vdpa device * @vdev: vdpa device
...@@ -360,6 +390,7 @@ struct vdpa_config_ops { ...@@ -360,6 +390,7 @@ struct vdpa_config_ops {
/* Device ops */ /* Device ops */
u32 (*get_vq_align)(struct vdpa_device *vdev); u32 (*get_vq_align)(struct vdpa_device *vdev);
u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx); u32 (*get_vq_group)(struct vdpa_device *vdev, u16 idx);
u32 (*get_vq_desc_group)(struct vdpa_device *vdev, u16 idx);
u64 (*get_device_features)(struct vdpa_device *vdev); u64 (*get_device_features)(struct vdpa_device *vdev);
u64 (*get_backend_features)(const struct vdpa_device *vdev); u64 (*get_backend_features)(const struct vdpa_device *vdev);
int (*set_driver_features)(struct vdpa_device *vdev, u64 features); int (*set_driver_features)(struct vdpa_device *vdev, u64 features);
...@@ -373,6 +404,8 @@ struct vdpa_config_ops { ...@@ -373,6 +404,8 @@ struct vdpa_config_ops {
u8 (*get_status)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status); void (*set_status)(struct vdpa_device *vdev, u8 status);
int (*reset)(struct vdpa_device *vdev); int (*reset)(struct vdpa_device *vdev);
int (*compat_reset)(struct vdpa_device *vdev, u32 flags);
#define VDPA_RESET_F_CLEAN_MAP 1
int (*suspend)(struct vdpa_device *vdev); int (*suspend)(struct vdpa_device *vdev);
int (*resume)(struct vdpa_device *vdev); int (*resume)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev);
...@@ -394,6 +427,7 @@ struct vdpa_config_ops { ...@@ -394,6 +427,7 @@ struct vdpa_config_ops {
u64 iova, u64 size, u64 pa, u32 perm, void *opaque); u64 iova, u64 size, u64 pa, u32 perm, void *opaque);
int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid, int (*dma_unmap)(struct vdpa_device *vdev, unsigned int asid,
u64 iova, u64 size); u64 iova, u64 size);
int (*reset_map)(struct vdpa_device *vdev, unsigned int asid);
int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group, int (*set_group_asid)(struct vdpa_device *vdev, unsigned int group,
unsigned int asid); unsigned int asid);
struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx); struct device *(*get_vq_dma_dev)(struct vdpa_device *vdev, u16 idx);
...@@ -485,14 +519,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) ...@@ -485,14 +519,17 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev)
return vdev->dma_dev; return vdev->dma_dev;
} }
static inline int vdpa_reset(struct vdpa_device *vdev) static inline int vdpa_reset(struct vdpa_device *vdev, u32 flags)
{ {
const struct vdpa_config_ops *ops = vdev->config; const struct vdpa_config_ops *ops = vdev->config;
int ret; int ret;
down_write(&vdev->cf_lock); down_write(&vdev->cf_lock);
vdev->features_valid = false; vdev->features_valid = false;
ret = ops->reset(vdev); if (ops->compat_reset && flags)
ret = ops->compat_reset(vdev, flags);
else
ret = ops->reset(vdev);
up_write(&vdev->cf_lock); up_write(&vdev->cf_lock);
return ret; return ret;
} }
......
...@@ -12,37 +12,48 @@ struct virtio_pci_modern_common_cfg { ...@@ -12,37 +12,48 @@ struct virtio_pci_modern_common_cfg {
__le16 queue_reset; /* read-write */ __le16 queue_reset; /* read-write */
}; };
/**
* struct virtio_pci_modern_device - info for modern PCI virtio
* @pci_dev: Ptr to the PCI device struct
* @common: Position of the common capability in the PCI config
* @device: Device-specific data (non-legacy mode)
* @notify_base: Base of vq notifications (non-legacy mode)
* @notify_pa: Physical base of vq notifications
* @isr: Where to read and clear interrupt
* @notify_len: So we can sanity-check accesses
* @device_len: So we can sanity-check accesses
* @notify_map_cap: Capability for when we need to map notifications per-vq
* @notify_offset_multiplier: Multiply queue_notify_off by this value
* (non-legacy mode).
* @modern_bars: Bitmask of BARs
* @id: Device and vendor id
* @device_id_check: Callback defined before vp_modern_probe() to be used to
* verify the PCI device is a vendor's expected device rather
* than the standard virtio PCI device
* Returns the found device id or ERRNO
* @dma_mask: Optional mask instead of the traditional DMA_BIT_MASK(64),
* for vendor devices with DMA space address limitations
*/
struct virtio_pci_modern_device { struct virtio_pci_modern_device {
struct pci_dev *pci_dev; struct pci_dev *pci_dev;
struct virtio_pci_common_cfg __iomem *common; struct virtio_pci_common_cfg __iomem *common;
/* Device-specific data (non-legacy mode) */
void __iomem *device; void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base; void __iomem *notify_base;
/* Physical base of vq notifications */
resource_size_t notify_pa; resource_size_t notify_pa;
/* Where to read and clear interrupt */
u8 __iomem *isr; u8 __iomem *isr;
/* So we can sanity-check accesses. */
size_t notify_len; size_t notify_len;
size_t device_len; size_t device_len;
size_t common_len;
/* Capability for when we need to map notifications per-vq. */
int notify_map_cap; int notify_map_cap;
/* Multiply queue_notify_off by this value. (non-legacy mode). */
u32 notify_offset_multiplier; u32 notify_offset_multiplier;
int modern_bars; int modern_bars;
struct virtio_device_id id; struct virtio_device_id id;
/* optional check for vendor virtio device, returns dev_id or -ERRNO */
int (*device_id_check)(struct pci_dev *pdev); int (*device_id_check)(struct pci_dev *pdev);
/* optional mask for devices with limited DMA space */
u64 dma_mask; u64 dma_mask;
}; };
......
...@@ -219,4 +219,12 @@ ...@@ -219,4 +219,12 @@
*/ */
#define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E) #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E)
/* Get the group for the descriptor table including driver & device areas
* of a virtqueue: read index, write group in num.
* The virtqueue index is stored in the index field of vhost_vring_state.
* The group ID of the descriptor table for this specific virtqueue
* is returned via num field of vhost_vring_state.
*/
#define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \
struct vhost_vring_state)
#endif #endif
...@@ -185,5 +185,12 @@ struct vhost_vdpa_iova_range { ...@@ -185,5 +185,12 @@ struct vhost_vdpa_iova_range {
* DRIVER_OK * DRIVER_OK
*/ */
#define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6 #define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6
/* Device may expose the virtqueue's descriptor area, driver area and
* device area to a different group for ASID binding than where its
* buffers may reside. Requires VHOST_BACKEND_F_IOTLB_ASID.
*/
#define VHOST_BACKEND_F_DESC_ASID 0x7
/* IOTLB don't flush memory mapping across device reset */
#define VHOST_BACKEND_F_IOTLB_PERSIST 0x8
#endif #endif
...@@ -105,6 +105,11 @@ ...@@ -105,6 +105,11 @@
*/ */
#define VIRTIO_F_NOTIFICATION_DATA 38 #define VIRTIO_F_NOTIFICATION_DATA 38
/* This feature indicates that the driver uses the data provided by the device
* as a virtqueue identifier in available buffer notifications.
*/
#define VIRTIO_F_NOTIF_CONFIG_DATA 39
/* /*
* This feature indicates that the driver can reset a queue individually. * This feature indicates that the driver can reset a queue individually.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment