Commit 7f466032 authored by Jason Wang's avatar Jason Wang Committed by Michael S. Tsirkin

vhost: access vq metadata through kernel virtual address

It was noticed that the copy_to/from_user() friends that was used to
access virtqueue metdata tends to be very expensive for dataplane
implementation like vhost since it involves lots of software checks,
speculation barriers, hardware feature toggling (e.g SMAP). The
extra cost will be more obvious when transferring small packets since
the time spent on metadata accessing become more significant.

This patch tries to eliminate those overheads by accessing them
through direct mapping of those pages. Invalidation callbacks is
implemented for co-operation with general VM management (swap, KSM,
THP or NUMA balancing). We will try to get the direct mapping of vq
metadata before each round of packet processing if it doesn't
exist. If we fail, we will simplely fallback to copy_to/from_user()
friends.

This invalidation and direct mapping access are synchronized through
spinlock and RCU. All matedata accessing through direct map is
protected by RCU, and the setup or invalidation are done under
spinlock.

This method might does not work for high mem page which requires
temporary mapping so we just fallback to normal
copy_to/from_user() and may not for arch that has virtual tagged cache
since extra cache flushing is needed to eliminate the alias. This will
result complex logic and bad performance. For those archs, this patch
simply go for copy_to/from_user() friends. This is done by ruling out
kernel mapping codes through ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE.

Note that this is only done when device IOTLB is not enabled. We
could use similar method to optimize IOTLB in the future.

Tests shows at most about 23% improvement on TX PPS when using
virtio-user + vhost_net + xdp1 + TAP on 2.6GHz Broadwell:

        SMAP on | SMAP off
Before: 5.2Mpps | 7.1Mpps
After:  6.4Mpps | 8.2Mpps

Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David Miller <davem@davemloft.net>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: linux-mm@kvack.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-parisc@vger.kernel.org
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent feebcaea
This diff is collapsed.
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
#include <linux/virtio_config.h> #include <linux/virtio_config.h>
#include <linux/virtio_ring.h> #include <linux/virtio_ring.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/pagemap.h>
#include <linux/mmu_notifier.h>
#include <asm/cacheflush.h>
struct vhost_work; struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work); typedef void (*vhost_work_fn_t)(struct vhost_work *work);
...@@ -80,6 +83,21 @@ enum vhost_uaddr_type { ...@@ -80,6 +83,21 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3, VHOST_NUM_ADDRS = 3,
}; };
struct vhost_map {
int npages;
void *addr;
struct page **pages;
};
struct vhost_uaddr {
unsigned long uaddr;
size_t size;
bool write;
};
#define VHOST_ARCH_CAN_ACCEL_UACCESS defined(CONFIG_MMU_NOTIFIER) && \
ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
/* The virtqueue structure describes a queue attached to a device. */ /* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue { struct vhost_virtqueue {
struct vhost_dev *dev; struct vhost_dev *dev;
...@@ -90,7 +108,22 @@ struct vhost_virtqueue { ...@@ -90,7 +108,22 @@ struct vhost_virtqueue {
struct vring_desc __user *desc; struct vring_desc __user *desc;
struct vring_avail __user *avail; struct vring_avail __user *avail;
struct vring_used __user *used; struct vring_used __user *used;
#if VHOST_ARCH_CAN_ACCEL_UACCESS
/* Read by memory accessors, modified by meta data
* prefetching, MMU notifier and vring ioctl().
* Synchonrized through mmu_lock (writers) and RCU (writers
* and readers).
*/
struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
/* Read by MMU notifier, modified by vring ioctl(),
* synchronized through MMU notifier
* registering/unregistering.
*/
struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
#endif
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick; struct file *kick;
struct eventfd_ctx *call_ctx; struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx; struct eventfd_ctx *error_ctx;
...@@ -145,6 +178,8 @@ struct vhost_virtqueue { ...@@ -145,6 +178,8 @@ struct vhost_virtqueue {
bool user_be; bool user_be;
#endif #endif
u32 busyloop_timeout; u32 busyloop_timeout;
spinlock_t mmu_lock;
int invalidate_count;
}; };
struct vhost_msg_node { struct vhost_msg_node {
...@@ -158,6 +193,9 @@ struct vhost_msg_node { ...@@ -158,6 +193,9 @@ struct vhost_msg_node {
struct vhost_dev { struct vhost_dev {
struct mm_struct *mm; struct mm_struct *mm;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
#endif
struct mutex mutex; struct mutex mutex;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment