Commit dbfa18c5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio bug fixes from Michael Tsirkin:
 "A bunch of fixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  tools/virtio: use canonical ftrace path
  vhost_vdpa: support PACKED when setting-getting vring_base
  vhost: support PACKED when setting-getting vring_base
  vhost: Fix worker hangs due to missed wake up calls
  vhost: Fix crash during early vhost_transport_send_pkt calls
  vhost_net: revert upend_idx only on retriable error
  vhost_vdpa: tell vqs about the negotiated
  vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister
  tools/virtio: Add .gitignore for ringtest
  tools/virtio: Fix arm64 ringtest compilation error
  vduse: avoid empty string for dev name
  vhost: use kzalloc() instead of kmalloc() followed by memset()
parents 7e8c948b 07496eea
...@@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * ...@@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
mlx5_vdpa_remove_debugfs(ndev->debugfs); mlx5_vdpa_remove_debugfs(ndev->debugfs);
ndev->debugfs = NULL; ndev->debugfs = NULL;
unregister_link_notifier(ndev); unregister_link_notifier(ndev);
_vdpa_unregister_device(dev);
wq = mvdev->wq; wq = mvdev->wq;
mvdev->wq = NULL; mvdev->wq = NULL;
destroy_workqueue(wq); destroy_workqueue(wq);
_vdpa_unregister_device(dev);
mgtdev->ndev = NULL; mgtdev->ndev = NULL;
} }
......
...@@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config) ...@@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
if (config->vq_num > 0xffff) if (config->vq_num > 0xffff)
return false; return false;
if (!config->name[0])
return false;
if (!device_is_allowed(config->device_id)) if (!device_is_allowed(config->device_id))
return false; return false;
......
...@@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) ...@@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
err = sock->ops->sendmsg(sock, &msg, len); err = sock->ops->sendmsg(sock, &msg, len);
if (unlikely(err < 0)) { if (unlikely(err < 0)) {
bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
if (zcopy_used) { if (zcopy_used) {
if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS) if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
vhost_net_ubuf_put(ubufs); vhost_net_ubuf_put(ubufs);
nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) if (retry)
% UIO_MAXIOV; nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
% UIO_MAXIOV;
else
vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
} }
if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) { if (retry) {
vhost_discard_vq_desc(vq, 1); vhost_discard_vq_desc(vq, 1);
vhost_net_enable_vq(net, vq); vhost_net_enable_vq(net, vq);
break; break;
......
...@@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) ...@@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
{ {
struct vdpa_device *vdpa = v->vdpa; struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config; const struct vdpa_config_ops *ops = vdpa->config;
struct vhost_dev *d = &v->vdev;
u64 actual_features;
u64 features; u64 features;
int i;
/* /*
* It's not allowed to change the features after they have * It's not allowed to change the features after they have
...@@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) ...@@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
if (vdpa_set_features(vdpa, features)) if (vdpa_set_features(vdpa, features))
return -EINVAL; return -EINVAL;
/* let the vqs know what has been configured */
actual_features = ops->get_driver_features(vdpa);
for (i = 0; i < d->nvqs; ++i) {
struct vhost_virtqueue *vq = d->vqs[i];
mutex_lock(&vq->mutex);
vq->acked_features = actual_features;
mutex_unlock(&vq->mutex);
}
return 0; return 0;
} }
...@@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
if (r) if (r)
return r; return r;
vq->last_avail_idx = vq_state.split.avail_index; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
vq->last_avail_idx = vq_state.packed.last_avail_idx |
(vq_state.packed.last_avail_counter << 15);
vq->last_used_idx = vq_state.packed.last_used_idx |
(vq_state.packed.last_used_counter << 15);
} else {
vq->last_avail_idx = vq_state.split.avail_index;
}
break; break;
} }
...@@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, ...@@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
break; break;
case VHOST_SET_VRING_BASE: case VHOST_SET_VRING_BASE:
vq_state.split.avail_index = vq->last_avail_idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
if (ops->set_vq_state(vdpa, idx, &vq_state)) vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
r = -EINVAL; vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
} else {
vq_state.split.avail_index = vq->last_avail_idx;
}
r = ops->set_vq_state(vdpa, idx, &vq_state);
break; break;
case VHOST_SET_VRING_CALL: case VHOST_SET_VRING_CALL:
......
...@@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev) ...@@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev)
{ {
struct vhost_flush_struct flush; struct vhost_flush_struct flush;
if (dev->worker) { if (dev->worker.vtsk) {
init_completion(&flush.wait_event); init_completion(&flush.wait_event);
vhost_work_init(&flush.work, vhost_flush_work); vhost_work_init(&flush.work, vhost_flush_work);
...@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush); ...@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush);
void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
{ {
if (!dev->worker) if (!dev->worker.vtsk)
return; return;
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
...@@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) ...@@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
* sure it was not in the list. * sure it was not in the list.
* test_and_set_bit() implies a memory barrier. * test_and_set_bit() implies a memory barrier.
*/ */
llist_add(&work->node, &dev->worker->work_list); llist_add(&work->node, &dev->worker.work_list);
vhost_task_wake(dev->worker->vtsk); vhost_task_wake(dev->worker.vtsk);
} }
} }
EXPORT_SYMBOL_GPL(vhost_work_queue); EXPORT_SYMBOL_GPL(vhost_work_queue);
...@@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue); ...@@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
/* A lockless hint for busy polling code to exit the loop */ /* A lockless hint for busy polling code to exit the loop */
bool vhost_has_work(struct vhost_dev *dev) bool vhost_has_work(struct vhost_dev *dev)
{ {
return dev->worker && !llist_empty(&dev->worker->work_list); return !llist_empty(&dev->worker.work_list);
} }
EXPORT_SYMBOL_GPL(vhost_has_work); EXPORT_SYMBOL_GPL(vhost_has_work);
...@@ -341,6 +341,8 @@ static bool vhost_worker(void *data) ...@@ -341,6 +341,8 @@ static bool vhost_worker(void *data)
node = llist_del_all(&worker->work_list); node = llist_del_all(&worker->work_list);
if (node) { if (node) {
__set_current_state(TASK_RUNNING);
node = llist_reverse_order(node); node = llist_reverse_order(node);
/* make sure flag is seen after deletion */ /* make sure flag is seen after deletion */
smp_wmb(); smp_wmb();
...@@ -456,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev, ...@@ -456,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->umem = NULL; dev->umem = NULL;
dev->iotlb = NULL; dev->iotlb = NULL;
dev->mm = NULL; dev->mm = NULL;
dev->worker = NULL; memset(&dev->worker, 0, sizeof(dev->worker));
init_llist_head(&dev->worker.work_list);
dev->iov_limit = iov_limit; dev->iov_limit = iov_limit;
dev->weight = weight; dev->weight = weight;
dev->byte_weight = byte_weight; dev->byte_weight = byte_weight;
...@@ -530,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev) ...@@ -530,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev)
static void vhost_worker_free(struct vhost_dev *dev) static void vhost_worker_free(struct vhost_dev *dev)
{ {
struct vhost_worker *worker = dev->worker; if (!dev->worker.vtsk)
if (!worker)
return; return;
dev->worker = NULL; WARN_ON(!llist_empty(&dev->worker.work_list));
WARN_ON(!llist_empty(&worker->work_list)); vhost_task_stop(dev->worker.vtsk);
vhost_task_stop(worker->vtsk); dev->worker.kcov_handle = 0;
kfree(worker); dev->worker.vtsk = NULL;
} }
static int vhost_worker_create(struct vhost_dev *dev) static int vhost_worker_create(struct vhost_dev *dev)
{ {
struct vhost_worker *worker;
struct vhost_task *vtsk; struct vhost_task *vtsk;
char name[TASK_COMM_LEN]; char name[TASK_COMM_LEN];
int ret;
worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
if (!worker)
return -ENOMEM;
dev->worker = worker;
worker->kcov_handle = kcov_common_handle();
init_llist_head(&worker->work_list);
snprintf(name, sizeof(name), "vhost-%d", current->pid); snprintf(name, sizeof(name), "vhost-%d", current->pid);
vtsk = vhost_task_create(vhost_worker, worker, name); vtsk = vhost_task_create(vhost_worker, &dev->worker, name);
if (!vtsk) { if (!vtsk)
ret = -ENOMEM; return -ENOMEM;
goto free_worker;
}
worker->vtsk = vtsk; dev->worker.kcov_handle = kcov_common_handle();
dev->worker.vtsk = vtsk;
vhost_task_start(vtsk); vhost_task_start(vtsk);
return 0; return 0;
free_worker:
kfree(worker);
dev->worker = NULL;
return ret;
} }
/* Caller should have device mutex */ /* Caller should have device mutex */
...@@ -1614,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg ...@@ -1614,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT; r = -EFAULT;
break; break;
} }
if (s.num > 0xffff) { if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
r = -EINVAL; vq->last_avail_idx = s.num & 0xffff;
break; vq->last_used_idx = (s.num >> 16) & 0xffff;
} else {
if (s.num > 0xffff) {
r = -EINVAL;
break;
}
vq->last_avail_idx = s.num;
} }
vq->last_avail_idx = s.num;
/* Forget the cached index value. */ /* Forget the cached index value. */
vq->avail_idx = vq->last_avail_idx; vq->avail_idx = vq->last_avail_idx;
break; break;
case VHOST_GET_VRING_BASE: case VHOST_GET_VRING_BASE:
s.index = idx; s.index = idx;
s.num = vq->last_avail_idx; if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
else
s.num = vq->last_avail_idx;
if (copy_to_user(argp, &s, sizeof s)) if (copy_to_user(argp, &s, sizeof s))
r = -EFAULT; r = -EFAULT;
break; break;
...@@ -2563,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify); ...@@ -2563,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
/* Create a new message. */ /* Create a new message. */
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
{ {
struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL); /* Make sure all padding within the structure is initialized. */
struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
if (!node) if (!node)
return NULL; return NULL;
/* Make sure all padding within the structure is initialized. */
memset(&node->msg, 0, sizeof node->msg);
node->vq = vq; node->vq = vq;
node->msg.type = type; node->msg.type = type;
return node; return node;
......
...@@ -92,13 +92,17 @@ struct vhost_virtqueue { ...@@ -92,13 +92,17 @@ struct vhost_virtqueue {
/* The routine to call when the Guest pings us, or timeout. */ /* The routine to call when the Guest pings us, or timeout. */
vhost_work_fn_t handle_kick; vhost_work_fn_t handle_kick;
/* Last available index we saw. */ /* Last available index we saw.
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_avail_idx; u16 last_avail_idx;
/* Caches available index value from user. */ /* Caches available index value from user. */
u16 avail_idx; u16 avail_idx;
/* Last index we used. */ /* Last index we used.
* Values are limited to 0x7fff, and the high bit is used as
* a wrap counter when using VIRTIO_F_RING_PACKED. */
u16 last_used_idx; u16 last_used_idx;
/* Used flags */ /* Used flags */
...@@ -154,7 +158,7 @@ struct vhost_dev { ...@@ -154,7 +158,7 @@ struct vhost_dev {
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;
struct eventfd_ctx *log_ctx; struct eventfd_ctx *log_ctx;
struct vhost_worker *worker; struct vhost_worker worker;
struct vhost_iotlb *umem; struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb; struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock; spinlock_t iotlb_lock;
......
...@@ -28,10 +28,6 @@ static int vhost_task_fn(void *data) ...@@ -28,10 +28,6 @@ static int vhost_task_fn(void *data)
for (;;) { for (;;) {
bool did_work; bool did_work;
/* mb paired w/ vhost_task_stop */
if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags))
break;
if (!dead && signal_pending(current)) { if (!dead && signal_pending(current)) {
struct ksignal ksig; struct ksignal ksig;
/* /*
...@@ -48,11 +44,17 @@ static int vhost_task_fn(void *data) ...@@ -48,11 +44,17 @@ static int vhost_task_fn(void *data)
clear_thread_flag(TIF_SIGPENDING); clear_thread_flag(TIF_SIGPENDING);
} }
/* mb paired w/ vhost_task_stop */
set_current_state(TASK_INTERRUPTIBLE);
if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) {
__set_current_state(TASK_RUNNING);
break;
}
did_work = vtsk->fn(vtsk->data); did_work = vtsk->fn(vtsk->data);
if (!did_work) { if (!did_work)
set_current_state(TASK_INTERRUPTIBLE);
schedule(); schedule();
}
} }
complete(&vtsk->exited); complete(&vtsk->exited);
......
# SPDX-License-Identifier: GPL-2.0-only
/noring
/ptr_ring
/ring
/virtio_ring_0_9
/virtio_ring_inorder
/virtio_ring_poll
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#ifndef MAIN_H #ifndef MAIN_H
#define MAIN_H #define MAIN_H
#include <assert.h>
#include <stdbool.h> #include <stdbool.h>
extern int param; extern int param;
...@@ -95,6 +96,8 @@ extern unsigned ring_size; ...@@ -95,6 +96,8 @@ extern unsigned ring_size;
#define cpu_relax() asm ("rep; nop" ::: "memory") #define cpu_relax() asm ("rep; nop" ::: "memory")
#elif defined(__s390x__) #elif defined(__s390x__)
#define cpu_relax() barrier() #define cpu_relax() barrier()
#elif defined(__aarch64__)
#define cpu_relax() asm ("yield" ::: "memory")
#else #else
#define cpu_relax() assert(0) #define cpu_relax() assert(0)
#endif #endif
...@@ -112,6 +115,8 @@ static inline void busy_wait(void) ...@@ -112,6 +115,8 @@ static inline void busy_wait(void)
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc") #define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
#elif defined(__aarch64__)
#define smp_mb() asm volatile("dmb ish" ::: "memory")
#else #else
/* /*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
...@@ -136,10 +141,16 @@ static inline void busy_wait(void) ...@@ -136,10 +141,16 @@ static inline void busy_wait(void)
#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__) #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
#define smp_wmb() barrier() #define smp_wmb() barrier()
#elif defined(__aarch64__)
#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
#else #else
#define smp_wmb() smp_release() #define smp_wmb() smp_release()
#endif #endif
#ifndef __always_inline
#define __always_inline inline __attribute__((always_inline))
#endif
static __always_inline static __always_inline
void __read_once_size(const volatile void *p, void *res, int size) void __read_once_size(const volatile void *p, void *res, int size)
{ {
......
...@@ -95,7 +95,7 @@ Run ...@@ -95,7 +95,7 @@ Run
1) Enable ftrace in the guest 1) Enable ftrace in the guest
<Example> <Example>
# echo 1 > /sys/kernel/debug/tracing/events/sched/enable # echo 1 > /sys/kernel/tracing/events/sched/enable
2) Run trace agent in the guest 2) Run trace agent in the guest
This agent must be operated as root. This agent must be operated as root.
......
...@@ -18,8 +18,9 @@ ...@@ -18,8 +18,9 @@
#define PIPE_DEF_BUFS 16 #define PIPE_DEF_BUFS 16
#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) #define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
#define PIPE_MAX_SIZE (1024*1024) #define PIPE_MAX_SIZE (1024*1024)
#define READ_PATH_FMT \ #define TRACEFS "/sys/kernel/tracing"
"/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" #define DEBUGFS "/sys/kernel/debug/tracing"
#define READ_PATH_FMT "%s/per_cpu/cpu%d/trace_pipe_raw"
#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" #define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
#define CTL_PATH "/dev/virtio-ports/agent-ctl-path" #define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
...@@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path) ...@@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path)
if (this_is_write_path) if (this_is_write_path)
/* write(output) path */ /* write(output) path */
ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
else else {
/* read(input) path */ /* read(input) path */
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num);
if (ret > 0 && access(buf, F_OK) != 0)
ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num);
}
if (ret <= 0) { if (ret <= 0) {
pr_err("Failed to generate %s path(CPU#%d):%d\n", pr_err("Failed to generate %s path(CPU#%d):%d\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment