Commit dbfa18c5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio bug fixes from Michael Tsirkin:
 "A bunch of fixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  tools/virtio: use canonical ftrace path
  vhost_vdpa: support PACKED when setting-getting vring_base
  vhost: support PACKED when setting-getting vring_base
  vhost: Fix worker hangs due to missed wake up calls
  vhost: Fix crash during early vhost_transport_send_pkt calls
  vhost_net: revert upend_idx only on retriable error
  vhost_vdpa: tell vqs about the negotiated
  vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister
  tools/virtio: Add .gitignore for ringtest
  tools/virtio: Fix arm64 ringtest compilation error
  vduse: avoid empty string for dev name
  vhost: use kzalloc() instead of kmalloc() followed by memset()
parents 7e8c948b 07496eea
......@@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
mlx5_vdpa_remove_debugfs(ndev->debugfs);
ndev->debugfs = NULL;
unregister_link_notifier(ndev);
_vdpa_unregister_device(dev);
wq = mvdev->wq;
mvdev->wq = NULL;
destroy_workqueue(wq);
_vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
}
......
......@@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
if (config->vq_num > 0xffff)
return false;
if (!config->name[0])
return false;
if (!device_is_allowed(config->device_id))
return false;
......
......@@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) {
bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
if (zcopy_used) {
if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
vhost_net_ubuf_put(ubufs);
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
% UIO_MAXIOV;
if (retry)
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
% UIO_MAXIOV;
else
vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
}
if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
if (retry) {
vhost_discard_vq_desc(vq, 1);
vhost_net_enable_vq(net, vq);
break;
......
......@@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_dev *d = &v->vdev;
u64 actual_features;
u64 features;
int i;
/*
* It's not allowed to change the features after they have
......@@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (vdpa_set_features(vdpa, features))
return -EINVAL;
/* let the vqs know what has been configured */
actual_features = ops->get_driver_features(vdpa);
for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq = d->vqs[i];
mutex_lock(&vq->mutex);
vq->acked_features = actual_features;
mutex_unlock(&vq->mutex);
}
return 0;
}
......@@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (r)
return r;
vq->last_avail_idx = vq_state.split.avail_index;
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq->last_avail_idx = vq_state.packed.last_avail_idx |
(vq_state.packed.last_avail_counter << 15);
vq->last_used_idx = vq_state.packed.last_used_idx |
(vq_state.packed.last_used_counter << 15);
} else {
vq->last_avail_idx = vq_state.split.avail_index;
}
break;
}
......@@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break;
case VHOST_SET_VRING_BASE:
vq_state.split.avail_index = vq->last_avail_idx;
if (ops->set_vq_state(vdpa, idx, &vq_state))
r = -EINVAL;
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
} else {
vq_state.split.avail_index = vq->last_avail_idx;
}
r = ops->set_vq_state(vdpa, idx, &vq_state);
break;
case VHOST_SET_VRING_CALL:
......
......@@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev)
{
struct vhost_flush_struct flush;
if (dev->worker) {
if (dev->worker.vtsk) {
init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work);
......@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{
if (!dev->worker)
if (!dev->worker.vtsk)
return;
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
......@@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
* sure it was not in the list.
* test_and_set_bit() implies a memory barrier.
*/
llist_add(&work->node, &dev->worker->work_list);
vhost_task_wake(dev->worker->vtsk);
llist_add(&work->node, &dev->worker.work_list);
vhost_task_wake(dev->worker.vtsk);
}
}
EXPORT_SYMBOL_GPL(vhost_work_queue);
......@@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
/* A lockless hint for busy polling code to exit the loop */
bool vhost_has_work(struct vhost_dev *dev)
{
return dev->worker && !llist_empty(&dev->worker->work_list);
return !llist_empty(&dev->worker.work_list);
}
EXPORT_SYMBOL_GPL(vhost_has_work);
......@@ -341,6 +341,8 @@ static bool vhost_worker(void *data)
node = llist_del_all(&worker->work_list);
if (node) {
__set_current_state(TASK_RUNNING);
node = llist_reverse_order(node);
/* make sure flag is seen after deletion */
smp_wmb();
......@@ -456,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->umem = NULL;
dev->iotlb = NULL;
dev->mm = NULL;
dev->worker = NULL;
memset(&dev->worker, 0, sizeof(dev->worker));
init_llist_head(&dev->worker.work_list);
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
......@@ -530,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev)
static void vhost_worker_free(struct vhost_dev *dev)
{
struct vhost_worker *worker = dev->worker;
if (!worker)
if (!dev->worker.vtsk)
return;
dev->worker = NULL;
WARN_ON(!llist_empty(&worker->work_list));
vhost_task_stop(worker->vtsk);
kfree(worker);
WARN_ON(!llist_empty(&dev->worker.work_list));
vhost_task_stop(dev->worker.vtsk);
dev->worker.kcov_handle = 0;
dev->worker.vtsk = NULL;
}
static int vhost_worker_create(struct vhost_dev *dev)
{
struct vhost_worker *worker;
struct vhost_task *vtsk;
char name[TASK_COMM_LEN];
int ret;
worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
if (!worker)
return -ENOMEM;
dev->worker = worker;
worker->kcov_handle = kcov_common_handle();
init_llist_head(&worker->work_list);
snprintf(name, sizeof(name), "vhost-%d", current->pid);
vtsk = vhost_task_create(vhost_worker, worker, name);
if (!vtsk) {
ret = -ENOMEM;
goto free_worker;
}
vtsk = vhost_task_create(vhost_worker, &dev->worker, name);
if (!vtsk)
return -ENOMEM;
worker->vtsk = vtsk;
dev->worker.kcov_handle = kcov_common_handle();
dev->worker.vtsk = vtsk;
vhost_task_start(vtsk);
return 0;
free_worker:
kfree(worker);
dev->worker = NULL;
return ret;
}
/* Caller should have device mutex */
......@@ -1614,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
if (s.num > 0xffff) {
r = -EINVAL;
break;
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq->last_avail_idx = s.num & 0xffff;
vq->last_used_idx = (s.num >> 16) & 0xffff;
} else {
if (s.num > 0xffff) {
r = -EINVAL;
break;
}
vq->last_avail_idx = s.num;
}
vq->last_avail_idx = s.num;
/* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx;
break;
case VHOST_GET_VRING_BASE:
s.index = idx;
s.num = vq->last_avail_idx;
if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
else
s.num = vq->last_avail_idx;
if (copy_to_user(argp, &s, sizeof s))
r = -EFAULT;
break;
......@@ -2563,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
/* Create a new message. */
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
{
struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
/* Make sure all padding within the structure is initialized. */
struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node)
return NULL;
/* Make sure all padding within the structure is initialized. */
memset(&node->msg, 0, sizeof node->msg);
node->vq = vq;
node->msg.type = type;
return node;
......
......@@ -92,13 +92,17 @@ struct vhost_virtqueue {
/* The routine to call when the Guest pings us, or timeout. */
vhost_work_fn_t handle_kick;
/* Last available index we saw. */
/* Last available index we saw.
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_avail_idx;
/* Caches available index value from user. */
u16 avail_idx;
/* Last index we used. */
/* Last index we used.
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_used_idx;
/* Used flags */
......@@ -154,7 +158,7 @@ struct vhost_dev {
struct vhost_virtqueue **vqs;
int nvqs;
struct eventfd_ctx *log_ctx;
struct vhost_worker *worker;
struct vhost_worker worker;
struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock;
......
......@@ -28,10 +28,6 @@ static int vhost_task_fn(void *data)
for (;;) {
bool did_work;
/* mb paired w/ vhost_task_stop */
if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags))
break;
if (!dead && signal_pending(current)) {
struct ksignal ksig;
/*
......@@ -48,11 +44,17 @@ static int vhost_task_fn(void *data)
clear_thread_flag(TIF_SIGPENDING);
}
/* mb paired w/ vhost_task_stop */
set_current_state(TASK_INTERRUPTIBLE);
if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) {
__set_current_state(TASK_RUNNING);
break;
}
did_work = vtsk->fn(vtsk->data);
if (!did_work) {
set_current_state(TASK_INTERRUPTIBLE);
if (!did_work)
schedule();
}
}
complete(&vtsk->exited);
......
# SPDX-License-Identifier: GPL-2.0-only
/noring
/ptr_ring
/ring
/virtio_ring_0_9
/virtio_ring_inorder
/virtio_ring_poll
......@@ -8,6 +8,7 @@
#ifndef MAIN_H
#define MAIN_H
#include <assert.h>
#include <stdbool.h>
extern int param;
......@@ -95,6 +96,8 @@ extern unsigned ring_size;
#define cpu_relax() asm ("rep; nop" ::: "memory")
#elif defined(__s390x__)
#define cpu_relax() barrier()
#elif defined(__aarch64__)
#define cpu_relax() asm ("yield" ::: "memory")
#else
#define cpu_relax() assert(0)
#endif
......@@ -112,6 +115,8 @@ static inline void busy_wait(void)
#if defined(__x86_64__) || defined(__i386__)
#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#elif defined(__aarch64__)
#define smp_mb() asm volatile("dmb ish" ::: "memory")
#else
/*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
......@@ -136,10 +141,16 @@ static inline void busy_wait(void)
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
#define smp_wmb() barrier()
#elif defined(__aarch64__)
#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
#else
#define smp_wmb() smp_release()
#endif
#ifndef __always_inline
#define __always_inline inline __attribute__((always_inline))
#endif
static __always_inline
void __read_once_size(const volatile void *p, void *res, int size)
{
......
......@@ -95,7 +95,7 @@ Run
1) Enable ftrace in the guest
<Example>
# echo 1 > /sys/kernel/debug/tracing/events/sched/enable
# echo 1 > /sys/kernel/tracing/events/sched/enable
2) Run trace agent in the guest
This agent must be operated as root.
......
......@@ -18,8 +18,9 @@
#define PIPE_DEF_BUFS 16
#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
#define PIPE_MAX_SIZE (1024*1024)
#define READ_PATH_FMT \
"/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
#define TRACEFS "/sys/kernel/tracing"
#define DEBUGFS "/sys/kernel/debug/tracing"
#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw"
#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
......@@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path)
if (this_is_write_path)
/* write(output) path */
ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
else
else {
/* read(input) path */
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num);
if (ret > 0 && access(buf, F_OK) != 0)
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num);
}
if (ret <= 0) {
pr_err("Failed to generate %s path(CPU#%d):%d\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment