Commit 1801314e authored by Jason Wang's avatar Jason Wang Committed by Khalid Elmously

vhost: introduce vhost_exceeds_weight()

We used to have vhost_exceeds_weight() for vhost-net to:

- prevent vhost kthread from hogging the cpu
- balance the time spent between TX and RX

This function could be useful for vsock and scsi as well. So move it
to vhost.c. Device must specify a weight which counts the number of
requests, or it can also specific a byte_weight which counts the
number of bytes that has been processed.
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Reviewed-by: default avatarStefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>

CVE-2019-3900

(backported from commit e82b9b07)
[tyhicks: Backport to Xenial:
 - Adjust handle_tx() instead of handle_tx_{copy,zerocopy}() due to
   missing commit 0d20bdf3 ("vhost_net: split out datacopy logic")
 - Considerable context adjustments throughout the patch due to a lack
   of missing the iov_limit member of the vhost_dev struct which was
   added later in commit b46a0bf7 ("vhost: fix OOB in get_rx_bufs()")
 - Context adjustment in call to vhost_log_write() in hunk #3 of net.c due to
   missing and unneeded commit cc5e7107 ("vhost: log dirty page correctly")
 - Context adjustment in hunk #3 of net.c due to using break instead of goto
   out
 - Context adjustment in hunk #4 of net.c due to missing and unneeded commit
   c67df11f ("vhost_net: try batch dequing from skb array")
 - Don't patch vsock.c since Xenial doesn't have vhost vsock support
 - Adjust context in vhost_dev_init() to account for different local variables
 - Adjust context in struct vhost_dev to account for different struct members]
Signed-off-by: default avatarTyler Hicks <tyhicks@canonical.com>
Acked-by: default avatarStefan Bader <stefan.bader@canonical.com>
Acked-by: default avatarConnor Kuehl <connor.kuehl@canonical.com>
Signed-off-by: default avatarKhalid Elmously <khalid.elmously@canonical.com>
parent edc26183
...@@ -302,12 +302,6 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net) ...@@ -302,12 +302,6 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net)
min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2); min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2);
} }
static bool vhost_exceeds_weight(int pkts, int total_len)
{
return total_len >= VHOST_NET_WEIGHT ||
pkts >= VHOST_NET_PKT_WEIGHT;
}
/* Expects to be always run from workqueue - which acts as /* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */ * read-size critical section for our kind of RCU. */
static void handle_tx(struct vhost_net *net) static void handle_tx(struct vhost_net *net)
...@@ -431,10 +425,9 @@ static void handle_tx(struct vhost_net *net) ...@@ -431,10 +425,9 @@ static void handle_tx(struct vhost_net *net)
else else
vhost_zerocopy_signal_used(net, vq); vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net); vhost_net_tx_packet(net);
if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) { if (unlikely(vhost_exceeds_weight(vq, ++sent_pkts,
vhost_poll_queue(&vq->poll); total_len)))
break; break;
}
} }
out: out:
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
...@@ -655,10 +648,8 @@ static void handle_rx(struct vhost_net *net) ...@@ -655,10 +648,8 @@ static void handle_rx(struct vhost_net *net)
if (unlikely(vq_log)) if (unlikely(vq_log))
vhost_log_write(vq, vq_log, log, vhost_len); vhost_log_write(vq, vq_log, log, vhost_len);
total_len += vhost_len; total_len += vhost_len;
if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) { if (unlikely(vhost_exceeds_weight(vq, ++recv_pkts, total_len)))
vhost_poll_queue(&vq->poll);
break; break;
}
} }
out: out:
mutex_unlock(&vq->mutex); mutex_unlock(&vq->mutex);
...@@ -728,7 +719,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) ...@@ -728,7 +719,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
n->vqs[i].vhost_hlen = 0; n->vqs[i].vhost_hlen = 0;
n->vqs[i].sock_hlen = 0; n->vqs[i].sock_hlen = 0;
} }
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
VHOST_NET_WEIGHT, VHOST_NET_PKT_WEIGHT);
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
......
...@@ -58,6 +58,12 @@ ...@@ -58,6 +58,12 @@
#define VHOST_SCSI_PREALLOC_UPAGES 2048 #define VHOST_SCSI_PREALLOC_UPAGES 2048
#define VHOST_SCSI_PREALLOC_PROT_SGLS 512 #define VHOST_SCSI_PREALLOC_PROT_SGLS 512
/* Max number of requests before requeueing the job.
* Using this limit prevents one virtqueue from starving others with
* request.
*/
#define VHOST_SCSI_WEIGHT 256
struct vhost_scsi_inflight { struct vhost_scsi_inflight {
/* Wait for the flush operation to finish */ /* Wait for the flush operation to finish */
struct completion comp; struct completion comp;
...@@ -1443,7 +1449,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) ...@@ -1443,7 +1449,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vqs[i] = &vs->vqs[i].vq; vqs[i] = &vs->vqs[i].vq;
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
} }
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, VHOST_SCSI_WEIGHT, 0);
vhost_scsi_init_inflight(vs, NULL); vhost_scsi_init_inflight(vs, NULL);
......
...@@ -370,8 +370,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) ...@@ -370,8 +370,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
vhost_vq_free_iovecs(dev->vqs[i]); vhost_vq_free_iovecs(dev->vqs[i]);
} }
bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
int pkts, int total_len)
{
struct vhost_dev *dev = vq->dev;
if ((dev->byte_weight && total_len >= dev->byte_weight) ||
pkts >= dev->weight) {
vhost_poll_queue(&vq->poll);
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
void vhost_dev_init(struct vhost_dev *dev, void vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue **vqs, int nvqs) struct vhost_virtqueue **vqs, int nvqs,
int weight, int byte_weight)
{ {
struct vhost_virtqueue *vq; struct vhost_virtqueue *vq;
int i; int i;
...@@ -386,6 +402,8 @@ void vhost_dev_init(struct vhost_dev *dev, ...@@ -386,6 +402,8 @@ void vhost_dev_init(struct vhost_dev *dev,
spin_lock_init(&dev->work_lock); spin_lock_init(&dev->work_lock);
INIT_LIST_HEAD(&dev->work_list); INIT_LIST_HEAD(&dev->work_list);
dev->worker = NULL; dev->worker = NULL;
dev->weight = weight;
dev->byte_weight = byte_weight;
for (i = 0; i < dev->nvqs; ++i) { for (i = 0; i < dev->nvqs; ++i) {
vq = dev->vqs[i]; vq = dev->vqs[i];
......
...@@ -127,9 +127,13 @@ struct vhost_dev { ...@@ -127,9 +127,13 @@ struct vhost_dev {
spinlock_t work_lock; spinlock_t work_lock;
struct list_head work_list; struct list_head work_list;
struct task_struct *worker; struct task_struct *worker;
int weight;
int byte_weight;
}; };
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
int nvqs, int weight, int byte_weight);
long vhost_dev_set_owner(struct vhost_dev *dev); long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev);
long vhost_dev_check_owner(struct vhost_dev *); long vhost_dev_check_owner(struct vhost_dev *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment