Commit 3c81bdd9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull vhost infrastructure updates from Michael S. Tsirkin:
 "This reworks vhost core dropping unnecessary RCU uses in favor of VQ
  mutexes which are used on fast path anyway.  This fixes worst-case
  latency for users which change the memory mappings a lot.  Memory
  allocation for vhost-net now supports fallback on vmalloc (same as for
  vhost-scsi) this makes it possible to create the device on systems
  where memory is very fragmented, with slightly lower performance"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vhost: move memory pointer to VQs
  vhost: move acked_features to VQs
  vhost: replace rcu with mutex
  vhost-net: extend device allocation to vmalloc
parents 7ec6131b 47283bef
......@@ -17,6 +17,7 @@
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/net.h>
#include <linux/if_packet.h>
......@@ -373,7 +374,7 @@ static void handle_tx(struct vhost_net *net)
% UIO_MAXIOV == nvq->done_idx))
break;
head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
head = vhost_get_vq_desc(vq, vq->iov,
ARRAY_SIZE(vq->iov),
&out, &in,
NULL, NULL);
......@@ -505,7 +506,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
r = -ENOBUFS;
goto err;
}
r = vhost_get_vq_desc(vq->dev, vq, vq->iov + seg,
r = vhost_get_vq_desc(vq, vq->iov + seg,
ARRAY_SIZE(vq->iov) - seg, &out,
&in, log, log_num);
if (unlikely(r < 0))
......@@ -584,9 +585,9 @@ static void handle_rx(struct vhost_net *net)
vhost_hlen = nvq->vhost_hlen;
sock_hlen = nvq->sock_hlen;
vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
mergeable = vhost_has_feature(&net->dev, VIRTIO_NET_F_MRG_RXBUF);
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
while ((sock_len = peek_head_len(sock->sk))) {
sock_len += sock_hlen;
......@@ -699,18 +700,30 @@ static void handle_rx_net(struct vhost_work *work)
handle_rx(net);
}
static void vhost_net_free(void *addr)
{
if (is_vmalloc_addr(addr))
vfree(addr);
else
kfree(addr);
}
static int vhost_net_open(struct inode *inode, struct file *f)
{
struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
struct vhost_net *n;
struct vhost_dev *dev;
struct vhost_virtqueue **vqs;
int i;
if (!n)
return -ENOMEM;
n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
if (!n) {
n = vmalloc(sizeof *n);
if (!n)
return -ENOMEM;
}
vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
kfree(n);
vhost_net_free(n);
return -ENOMEM;
}
......@@ -827,7 +840,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
* since jobs can re-queue themselves. */
vhost_net_flush(n);
kfree(n->dev.vqs);
kfree(n);
vhost_net_free(n);
return 0;
}
......@@ -1038,15 +1051,13 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
mutex_unlock(&n->dev.mutex);
return -EFAULT;
}
n->dev.acked_features = features;
smp_wmb();
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features;
n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen;
mutex_unlock(&n->vqs[i].vq.mutex);
}
vhost_net_flush(n);
mutex_unlock(&n->dev.mutex);
return 0;
}
......
......@@ -606,7 +606,7 @@ tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
again:
vhost_disable_notify(&vs->dev, vq);
head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
head = vhost_get_vq_desc(vq, vq->iov,
ARRAY_SIZE(vq->iov), &out, &in,
NULL, NULL);
if (head < 0) {
......@@ -945,7 +945,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
vhost_disable_notify(&vs->dev, vq);
for (;;) {
head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
head = vhost_get_vq_desc(vq, vq->iov,
ARRAY_SIZE(vq->iov), &out, &in,
NULL, NULL);
pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
......@@ -1373,6 +1373,9 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
{
struct vhost_virtqueue *vq;
int i;
if (features & ~VHOST_SCSI_FEATURES)
return -EOPNOTSUPP;
......@@ -1382,9 +1385,13 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
mutex_unlock(&vs->dev.mutex);
return -EFAULT;
}
vs->dev.acked_features = features;
smp_wmb();
vhost_scsi_flush(vs);
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
vq->acked_features = features;
mutex_unlock(&vq->mutex);
}
mutex_unlock(&vs->dev.mutex);
return 0;
}
......@@ -1591,10 +1598,6 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
return;
mutex_lock(&vs->dev.mutex);
if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) {
mutex_unlock(&vs->dev.mutex);
return;
}
if (plug)
reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
......@@ -1603,8 +1606,9 @@ tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
mutex_lock(&vq->mutex);
tcm_vhost_send_evt(vs, tpg, lun,
VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
if (vhost_has_feature(vq, VIRTIO_SCSI_F_HOTPLUG))
tcm_vhost_send_evt(vs, tpg, lun,
VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
mutex_unlock(&vq->mutex);
mutex_unlock(&vs->dev.mutex);
}
......
......@@ -53,7 +53,7 @@ static void handle_vq(struct vhost_test *n)
vhost_disable_notify(&n->dev, vq);
for (;;) {
head = vhost_get_vq_desc(&n->dev, vq, vq->iov,
head = vhost_get_vq_desc(vq, vq->iov,
ARRAY_SIZE(vq->iov),
&out, &in,
NULL, NULL);
......@@ -241,15 +241,18 @@ static long vhost_test_reset_owner(struct vhost_test *n)
static int vhost_test_set_features(struct vhost_test *n, u64 features)
{
struct vhost_virtqueue *vq;
mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
!vhost_log_access_ok(&n->dev)) {
mutex_unlock(&n->dev.mutex);
return -EFAULT;
}
n->dev.acked_features = features;
smp_wmb();
vhost_test_flush(n);
vq = &n->vqs[VHOST_TEST_VQ];
mutex_lock(&vq->mutex);
vq->acked_features = features;
mutex_unlock(&vq->mutex);
mutex_unlock(&n->dev.mutex);
return 0;
}
......
This diff is collapsed.
......@@ -104,20 +104,18 @@ struct vhost_virtqueue {
struct iovec *indirect;
struct vring_used_elem *heads;
/* Protected by virtqueue mutex. */
struct vhost_memory *memory;
void *private_data;
unsigned acked_features;
/* Log write descriptors */
void __user *log_base;
struct vhost_log *log;
};
struct vhost_dev {
/* Readers use RCU to access memory table pointer
* log base pointer and features.
* Writers use mutex below.*/
struct vhost_memory __rcu *memory;
struct vhost_memory *memory;
struct mm_struct *mm;
struct mutex mutex;
unsigned acked_features;
struct vhost_virtqueue **vqs;
int nvqs;
struct file *log_file;
......@@ -140,7 +138,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp);
int vhost_vq_access_ok(struct vhost_virtqueue *vq);
int vhost_log_access_ok(struct vhost_dev *);
int vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
int vhost_get_vq_desc(struct vhost_virtqueue *,
struct iovec iov[], unsigned int iov_count,
unsigned int *out_num, unsigned int *in_num,
struct vhost_log *log, unsigned int *log_num);
......@@ -174,13 +172,8 @@ enum {
(1ULL << VHOST_F_LOG_ALL),
};
static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
static inline int vhost_has_feature(struct vhost_virtqueue *vq, int bit)
{
unsigned acked_features;
/* TODO: check that we are running from vhost_worker or dev mutex is
* held? */
acked_features = rcu_dereference_index_check(dev->acked_features, 1);
return acked_features & (1 << bit);
return vq->acked_features & (1 << bit);
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment