Commit 108d9c71 authored by David S. Miller's avatar David S. Miller

Merge branch 'virtio_net-XDP-adjust_head'

John Fastabend says:

====================
XDP adjust head support for virtio

This series adds adjust head support for virtio. The following is my
test setup. I use qemu + virtio as follows,

./x86_64-softmmu/qemu-system-x86_64 \
  -hda /var/lib/libvirt/images/Fedora-test0.img \
  -m 4096  -enable-kvm -smp 2 -netdev tap,id=hn0,queues=4,vhost=on \
  -device virtio-net-pci,netdev=hn0,mq=on,guest_tso4=off,guest_tso6=off,guest_ecn=off,guest_ufo=off,vectors=9

In order to use XDP with virtio until LRO is supported TSO must be
turned off in the host. The important fields in the above command line
are the following,

  guest_tso4=off,guest_tso6=off,guest_ecn=off,guest_ufo=off

Also note it is possible to conusme more queues than can be supported
because when XDP is enabled for retransmit XDP attempts to use a queue
per cpu. My standard queue count is 'queues=4'.

After loading the VM I run the relevant XDP test programs in,

  ./sammples/bpf

For this series I tested xdp1, xdp2, and xdp_tx_iptunnel. I usually test
with iperf (-d option to get bidirectional traffic), ping, and pktgen.
I also have a modified xdp1 that returns XDP_PASS on any packet to ensure
the normal traffic path to the stack continues to work with XDP loaded.

It would be great to automate this soon. At the moment I do it by hand
which is starting to get tedious.

v2: original series dropped trace points after merge.
====================
Acked-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 152bff37 2de2f7f4
...@@ -41,6 +41,9 @@ module_param(gso, bool, 0444); ...@@ -41,6 +41,9 @@ module_param(gso, bool, 0444);
#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) #define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
#define GOOD_COPY_LEN 128 #define GOOD_COPY_LEN 128
/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
#define VIRTIO_XDP_HEADROOM 256
/* RX packet size EWMA. The average packet size is used to determine the packet /* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled * buffer size when refilling RX rings. As the entire RX ring may be refilled
* at once, the weight is chosen so that the EWMA will be insensitive to short- * at once, the weight is chosen so that the EWMA will be insensitive to short-
...@@ -340,15 +343,19 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, ...@@ -340,15 +343,19 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
static bool virtnet_xdp_xmit(struct virtnet_info *vi, static bool virtnet_xdp_xmit(struct virtnet_info *vi,
struct receive_queue *rq, struct receive_queue *rq,
struct send_queue *sq,
struct xdp_buff *xdp, struct xdp_buff *xdp,
void *data) void *data)
{ {
struct virtio_net_hdr_mrg_rxbuf *hdr; struct virtio_net_hdr_mrg_rxbuf *hdr;
unsigned int num_sg, len; unsigned int num_sg, len;
struct send_queue *sq;
unsigned int qp;
void *xdp_sent; void *xdp_sent;
int err; int err;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */ /* Free up any pending old buffers before queueing new ones. */
while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
if (vi->mergeable_rx_bufs) { if (vi->mergeable_rx_bufs) {
...@@ -363,6 +370,7 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, ...@@ -363,6 +370,7 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi,
} }
if (vi->mergeable_rx_bufs) { if (vi->mergeable_rx_bufs) {
xdp->data -= sizeof(struct virtio_net_hdr_mrg_rxbuf);
/* Zero header and leave csum up to XDP layers */ /* Zero header and leave csum up to XDP layers */
hdr = xdp->data; hdr = xdp->data;
memset(hdr, 0, vi->hdr_len); memset(hdr, 0, vi->hdr_len);
...@@ -379,7 +387,9 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, ...@@ -379,7 +387,9 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi,
num_sg = 2; num_sg = 2;
sg_init_table(sq->sg, 2); sg_init_table(sq->sg, 2);
sg_set_buf(sq->sg, hdr, vi->hdr_len); sg_set_buf(sq->sg, hdr, vi->hdr_len);
skb_to_sgvec(skb, sq->sg + 1, 0, skb->len); skb_to_sgvec(skb, sq->sg + 1,
xdp->data - xdp->data_hard_start,
xdp->data_end - xdp->data);
} }
err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg,
data, GFP_ATOMIC); data, GFP_ATOMIC);
...@@ -398,52 +408,6 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi, ...@@ -398,52 +408,6 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi,
return true; return true;
} }
static u32 do_xdp_prog(struct virtnet_info *vi,
struct receive_queue *rq,
struct bpf_prog *xdp_prog,
void *data, int len)
{
int hdr_padded_len;
struct xdp_buff xdp;
void *buf;
unsigned int qp;
u32 act;
if (vi->mergeable_rx_bufs) {
hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
xdp.data = data + hdr_padded_len;
xdp.data_end = xdp.data + (len - vi->hdr_len);
buf = data;
} else { /* small buffers */
struct sk_buff *skb = data;
xdp.data = skb->data;
xdp.data_end = xdp.data + len;
buf = skb->data;
}
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) {
case XDP_PASS:
return XDP_PASS;
case XDP_TX:
qp = vi->curr_queue_pairs -
vi->xdp_queue_pairs +
smp_processor_id();
xdp.data = buf;
if (unlikely(!virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp,
data)))
trace_xdp_exception(vi->dev, xdp_prog, act);
return XDP_TX;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
case XDP_DROP:
return XDP_DROP;
}
}
static struct sk_buff *receive_small(struct net_device *dev, static struct sk_buff *receive_small(struct net_device *dev,
struct virtnet_info *vi, struct virtnet_info *vi,
struct receive_queue *rq, struct receive_queue *rq,
...@@ -453,30 +417,44 @@ static struct sk_buff *receive_small(struct net_device *dev, ...@@ -453,30 +417,44 @@ static struct sk_buff *receive_small(struct net_device *dev,
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
len -= vi->hdr_len; len -= vi->hdr_len;
skb_trim(skb, len);
rcu_read_lock(); rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog); xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) { if (xdp_prog) {
struct virtio_net_hdr_mrg_rxbuf *hdr = buf; struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
struct xdp_buff xdp;
u32 act; u32 act;
if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
goto err_xdp; goto err_xdp;
act = do_xdp_prog(vi, rq, xdp_prog, skb, len);
xdp.data_hard_start = skb->data;
xdp.data = skb->data + VIRTIO_XDP_HEADROOM;
xdp.data_end = xdp.data + len;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
/* Recalculate length in case bpf program changed it */
__skb_pull(skb, xdp.data - xdp.data_hard_start);
len = xdp.data_end - xdp.data;
break; break;
case XDP_TX: case XDP_TX:
if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, skb)))
trace_xdp_exception(vi->dev, xdp_prog, act);
rcu_read_unlock(); rcu_read_unlock();
goto xdp_xmit; goto xdp_xmit;
case XDP_DROP:
default: default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
case XDP_DROP:
goto err_xdp; goto err_xdp;
} }
} }
rcu_read_unlock(); rcu_read_unlock();
skb_trim(skb, len);
return skb; return skb;
err_xdp: err_xdp:
...@@ -525,7 +503,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, ...@@ -525,7 +503,7 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
unsigned int *len) unsigned int *len)
{ {
struct page *page = alloc_page(GFP_ATOMIC); struct page *page = alloc_page(GFP_ATOMIC);
unsigned int page_off = 0; unsigned int page_off = VIRTIO_XDP_HEADROOM;
if (!page) if (!page)
return NULL; return NULL;
...@@ -561,7 +539,8 @@ static struct page *xdp_linearize_page(struct receive_queue *rq, ...@@ -561,7 +539,8 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
put_page(p); put_page(p);
} }
*len = page_off; /* Headroom does not contribute to packet length */
*len = page_off - VIRTIO_XDP_HEADROOM;
return page; return page;
err_buf: err_buf:
__free_pages(page, 0); __free_pages(page, 0);
...@@ -589,6 +568,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -589,6 +568,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdp_prog = rcu_dereference(rq->xdp_prog); xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) { if (xdp_prog) {
struct page *xdp_page; struct page *xdp_page;
struct xdp_buff xdp;
void *data;
u32 act; u32 act;
/* This happens when rx buffer size is underestimated */ /* This happens when rx buffer size is underestimated */
...@@ -598,7 +579,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -598,7 +579,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
page, offset, &len); page, offset, &len);
if (!xdp_page) if (!xdp_page)
goto err_xdp; goto err_xdp;
offset = 0; offset = VIRTIO_XDP_HEADROOM;
} else { } else {
xdp_page = page; xdp_page = page;
} }
...@@ -611,28 +592,47 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -611,28 +592,47 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type)) if (unlikely(hdr->hdr.gso_type))
goto err_xdp; goto err_xdp;
act = do_xdp_prog(vi, rq, xdp_prog, /* Allow consuming headroom but reserve enough space to push
page_address(xdp_page) + offset, len); * the descriptor on if we get an XDP_TX return code.
*/
data = page_address(xdp_page) + offset;
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
xdp.data = data + vi->hdr_len;
xdp.data_end = xdp.data + (len - vi->hdr_len);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
/* recalculate offset to account for any header
* adjustments. Note other cases do not build an
* skb and avoid using offset
*/
offset = xdp.data -
page_address(xdp_page) - vi->hdr_len;
/* We can only create skb based on xdp_page. */ /* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) { if (unlikely(xdp_page != page)) {
rcu_read_unlock(); rcu_read_unlock();
put_page(page); put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page, head_skb = page_to_skb(vi, rq, xdp_page,
0, len, PAGE_SIZE); offset, len, PAGE_SIZE);
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
return head_skb; return head_skb;
} }
break; break;
case XDP_TX: case XDP_TX:
if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp, data)))
trace_xdp_exception(vi->dev, xdp_prog, act);
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
goto err_xdp; goto err_xdp;
rcu_read_unlock(); rcu_read_unlock();
goto xdp_xmit; goto xdp_xmit;
case XDP_DROP:
default: default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
trace_xdp_exception(vi->dev, xdp_prog, act);
case XDP_DROP:
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0); __free_pages(xdp_page, 0);
ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len); ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
...@@ -782,23 +782,30 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, ...@@ -782,23 +782,30 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
dev_kfree_skb(skb); dev_kfree_skb(skb);
} }
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
}
static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
gfp_t gfp) gfp_t gfp)
{ {
int headroom = GOOD_PACKET_LEN + virtnet_get_headroom(vi);
unsigned int xdp_headroom = virtnet_get_headroom(vi);
struct sk_buff *skb; struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr; struct virtio_net_hdr_mrg_rxbuf *hdr;
int err; int err;
skb = __netdev_alloc_skb_ip_align(vi->dev, GOOD_PACKET_LEN, gfp); skb = __netdev_alloc_skb_ip_align(vi->dev, headroom, gfp);
if (unlikely(!skb)) if (unlikely(!skb))
return -ENOMEM; return -ENOMEM;
skb_put(skb, GOOD_PACKET_LEN); skb_put(skb, headroom);
hdr = skb_vnet_hdr(skb); hdr = skb_vnet_hdr(skb);
sg_init_table(rq->sg, 2); sg_init_table(rq->sg, 2);
sg_set_buf(rq->sg, hdr, vi->hdr_len); sg_set_buf(rq->sg, hdr, vi->hdr_len);
skb_to_sgvec(skb, rq->sg + 1, 0, skb->len); skb_to_sgvec(skb, rq->sg + 1, xdp_headroom, skb->len - xdp_headroom);
err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp); err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
if (err < 0) if (err < 0)
...@@ -866,24 +873,27 @@ static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len) ...@@ -866,24 +873,27 @@ static unsigned int get_mergeable_buf_len(struct ewma_pkt_len *avg_pkt_len)
return ALIGN(len, MERGEABLE_BUFFER_ALIGN); return ALIGN(len, MERGEABLE_BUFFER_ALIGN);
} }
static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp) static int add_recvbuf_mergeable(struct virtnet_info *vi,
struct receive_queue *rq, gfp_t gfp)
{ {
struct page_frag *alloc_frag = &rq->alloc_frag; struct page_frag *alloc_frag = &rq->alloc_frag;
unsigned int headroom = virtnet_get_headroom(vi);
char *buf; char *buf;
unsigned long ctx; unsigned long ctx;
int err; int err;
unsigned int len, hole; unsigned int len, hole;
len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len); len = get_mergeable_buf_len(&rq->mrg_avg_pkt_len);
if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp))) if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
return -ENOMEM; return -ENOMEM;
buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset; buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
buf += headroom; /* advance address leaving hole at front of pkt */
ctx = mergeable_buf_to_ctx(buf, len); ctx = mergeable_buf_to_ctx(buf, len);
get_page(alloc_frag->page); get_page(alloc_frag->page);
alloc_frag->offset += len; alloc_frag->offset += len + headroom;
hole = alloc_frag->size - alloc_frag->offset; hole = alloc_frag->size - alloc_frag->offset;
if (hole < len) { if (hole < len + headroom) {
/* To avoid internal fragmentation, if there is very likely not /* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to * enough space for another buffer, add the remaining space to
* the current buffer. This extra space is not included in * the current buffer. This extra space is not included in
...@@ -917,7 +927,7 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq, ...@@ -917,7 +927,7 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
gfp |= __GFP_COLD; gfp |= __GFP_COLD;
do { do {
if (vi->mergeable_rx_bufs) if (vi->mergeable_rx_bufs)
err = add_recvbuf_mergeable(rq, gfp); err = add_recvbuf_mergeable(vi, rq, gfp);
else if (vi->big_packets) else if (vi->big_packets)
err = add_recvbuf_big(vi, rq, gfp); err = add_recvbuf_big(vi, rq, gfp);
else else
...@@ -1305,7 +1315,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) ...@@ -1305,7 +1315,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
rtnl_unlock(); rtnl_unlock();
} }
static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{ {
struct scatterlist sg; struct scatterlist sg;
struct net_device *dev = vi->dev; struct net_device *dev = vi->dev;
...@@ -1331,6 +1341,16 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) ...@@ -1331,6 +1341,16 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
return 0; return 0;
} }
static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
{
int err;
rtnl_lock();
err = _virtnet_set_queues(vi, queue_pairs);
rtnl_unlock();
return err;
}
static int virtnet_close(struct net_device *dev) static int virtnet_close(struct net_device *dev)
{ {
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
...@@ -1583,7 +1603,7 @@ static int virtnet_set_channels(struct net_device *dev, ...@@ -1583,7 +1603,7 @@ static int virtnet_set_channels(struct net_device *dev,
return -EINVAL; return -EINVAL;
get_online_cpus(); get_online_cpus();
err = virtnet_set_queues(vi, queue_pairs); err = _virtnet_set_queues(vi, queue_pairs);
if (!err) { if (!err) {
netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_tx_queues(dev, queue_pairs);
netif_set_real_num_rx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs);
...@@ -1673,19 +1693,91 @@ static const struct ethtool_ops virtnet_ethtool_ops = { ...@@ -1673,19 +1693,91 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
.set_settings = virtnet_set_settings, .set_settings = virtnet_set_settings,
}; };
static void virtnet_freeze_down(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
int i;
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
netif_device_detach(vi->dev);
cancel_delayed_work_sync(&vi->refill);
if (netif_running(vi->dev)) {
for (i = 0; i < vi->max_queue_pairs; i++)
napi_disable(&vi->rq[i].napi);
}
}
static int init_vqs(struct virtnet_info *vi);
static void _remove_vq_common(struct virtnet_info *vi);
static int virtnet_restore_up(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
int err, i;
err = init_vqs(vi);
if (err)
return err;
virtio_device_ready(vdev);
if (netif_running(vi->dev)) {
for (i = 0; i < vi->curr_queue_pairs; i++)
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
for (i = 0; i < vi->max_queue_pairs; i++)
virtnet_napi_enable(&vi->rq[i]);
}
netif_device_attach(vi->dev);
return err;
}
static int virtnet_reset(struct virtnet_info *vi)
{
struct virtio_device *dev = vi->vdev;
int ret;
virtio_config_disable(dev);
dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
virtnet_freeze_down(dev);
_remove_vq_common(vi);
dev->config->reset(dev);
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
ret = virtio_finalize_features(dev);
if (ret)
goto err;
ret = virtnet_restore_up(dev);
if (ret)
goto err;
ret = _virtnet_set_queues(vi, vi->curr_queue_pairs);
if (ret)
goto err;
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
virtio_config_enable(dev);
return 0;
err:
virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
return ret;
}
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
{ {
unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
struct bpf_prog *old_prog; struct bpf_prog *old_prog;
u16 xdp_qp = 0, curr_qp; u16 oxdp_qp, xdp_qp = 0, curr_qp;
int i, err; int i, err;
if (prog && prog->xdp_adjust_head) {
netdev_warn(dev, "Does not support bpf_xdp_adjust_head()\n");
return -EOPNOTSUPP;
}
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
...@@ -1715,21 +1807,32 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) ...@@ -1715,21 +1807,32 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
return -ENOMEM; return -ENOMEM;
} }
err = virtnet_set_queues(vi, curr_qp + xdp_qp); if (prog) {
prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
err = _virtnet_set_queues(vi, curr_qp + xdp_qp);
if (err) { if (err) {
dev_warn(&dev->dev, "XDP Device queue allocation failure.\n"); dev_warn(&dev->dev, "XDP Device queue allocation failure.\n");
return err; goto virtio_queue_err;
} }
if (prog) { oxdp_qp = vi->xdp_queue_pairs;
prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
if (IS_ERR(prog)) { /* Changing the headroom in buffers is a disruptive operation because
virtnet_set_queues(vi, curr_qp); * existing buffers must be flushed and reallocated. This will happen
return PTR_ERR(prog); * when a xdp program is initially added or xdp is disabled by removing
} * the xdp program resulting in number of XDP queues changing.
*/
if (vi->xdp_queue_pairs != xdp_qp) {
vi->xdp_queue_pairs = xdp_qp;
err = virtnet_reset(vi);
if (err)
goto virtio_reset_err;
} }
vi->xdp_queue_pairs = xdp_qp;
netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
for (i = 0; i < vi->max_queue_pairs; i++) { for (i = 0; i < vi->max_queue_pairs; i++) {
...@@ -1740,6 +1843,21 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) ...@@ -1740,6 +1843,21 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog)
} }
return 0; return 0;
virtio_reset_err:
/* On reset error do our best to unwind XDP changes inflight and return
* error up to user space for resolution. The underlying reset hung on
* us so not much we can do here.
*/
dev_warn(&dev->dev, "XDP reset failure and queues unstable\n");
vi->xdp_queue_pairs = oxdp_qp;
virtio_queue_err:
/* On queue set error we can unwind bpf ref count and user space can
* retry this is most likely an allocation failure.
*/
if (prog)
bpf_prog_sub(prog, vi->max_queue_pairs - 1);
return err;
} }
static bool virtnet_xdp_query(struct net_device *dev) static bool virtnet_xdp_query(struct net_device *dev)
...@@ -1840,12 +1958,11 @@ static void virtnet_free_queues(struct virtnet_info *vi) ...@@ -1840,12 +1958,11 @@ static void virtnet_free_queues(struct virtnet_info *vi)
kfree(vi->sq); kfree(vi->sq);
} }
static void free_receive_bufs(struct virtnet_info *vi) static void _free_receive_bufs(struct virtnet_info *vi)
{ {
struct bpf_prog *old_prog; struct bpf_prog *old_prog;
int i; int i;
rtnl_lock();
for (i = 0; i < vi->max_queue_pairs; i++) { for (i = 0; i < vi->max_queue_pairs; i++) {
while (vi->rq[i].pages) while (vi->rq[i].pages)
__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
...@@ -1855,6 +1972,12 @@ static void free_receive_bufs(struct virtnet_info *vi) ...@@ -1855,6 +1972,12 @@ static void free_receive_bufs(struct virtnet_info *vi)
if (old_prog) if (old_prog)
bpf_prog_put(old_prog); bpf_prog_put(old_prog);
} }
}
static void free_receive_bufs(struct virtnet_info *vi)
{
rtnl_lock();
_free_receive_bufs(vi);
rtnl_unlock(); rtnl_unlock();
} }
...@@ -2293,9 +2416,7 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -2293,9 +2416,7 @@ static int virtnet_probe(struct virtio_device *vdev)
goto free_unregister_netdev; goto free_unregister_netdev;
} }
rtnl_lock();
virtnet_set_queues(vi, vi->curr_queue_pairs); virtnet_set_queues(vi, vi->curr_queue_pairs);
rtnl_unlock();
/* Assume link up if device can't report link status, /* Assume link up if device can't report link status,
otherwise get link status from config. */ otherwise get link status from config. */
...@@ -2327,6 +2448,15 @@ static int virtnet_probe(struct virtio_device *vdev) ...@@ -2327,6 +2448,15 @@ static int virtnet_probe(struct virtio_device *vdev)
return err; return err;
} }
static void _remove_vq_common(struct virtnet_info *vi)
{
vi->vdev->config->reset(vi->vdev);
free_unused_bufs(vi);
_free_receive_bufs(vi);
free_receive_page_frags(vi);
virtnet_del_vqs(vi);
}
static void remove_vq_common(struct virtnet_info *vi) static void remove_vq_common(struct virtnet_info *vi)
{ {
vi->vdev->config->reset(vi->vdev); vi->vdev->config->reset(vi->vdev);
...@@ -2362,21 +2492,9 @@ static void virtnet_remove(struct virtio_device *vdev) ...@@ -2362,21 +2492,9 @@ static void virtnet_remove(struct virtio_device *vdev)
static int virtnet_freeze(struct virtio_device *vdev) static int virtnet_freeze(struct virtio_device *vdev)
{ {
struct virtnet_info *vi = vdev->priv; struct virtnet_info *vi = vdev->priv;
int i;
virtnet_cpu_notif_remove(vi); virtnet_cpu_notif_remove(vi);
virtnet_freeze_down(vdev);
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
netif_device_detach(vi->dev);
cancel_delayed_work_sync(&vi->refill);
if (netif_running(vi->dev)) {
for (i = 0; i < vi->max_queue_pairs; i++)
napi_disable(&vi->rq[i].napi);
}
remove_vq_common(vi); remove_vq_common(vi);
return 0; return 0;
...@@ -2385,28 +2503,12 @@ static int virtnet_freeze(struct virtio_device *vdev) ...@@ -2385,28 +2503,12 @@ static int virtnet_freeze(struct virtio_device *vdev)
static int virtnet_restore(struct virtio_device *vdev) static int virtnet_restore(struct virtio_device *vdev)
{ {
struct virtnet_info *vi = vdev->priv; struct virtnet_info *vi = vdev->priv;
int err, i; int err;
err = init_vqs(vi); err = virtnet_restore_up(vdev);
if (err) if (err)
return err; return err;
virtio_device_ready(vdev);
if (netif_running(vi->dev)) {
for (i = 0; i < vi->curr_queue_pairs; i++)
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
for (i = 0; i < vi->max_queue_pairs; i++)
virtnet_napi_enable(&vi->rq[i]);
}
netif_device_attach(vi->dev);
rtnl_lock();
virtnet_set_queues(vi, vi->curr_queue_pairs); virtnet_set_queues(vi, vi->curr_queue_pairs);
rtnl_unlock();
err = virtnet_cpu_notif_add(vi); err = virtnet_cpu_notif_add(vi);
if (err) if (err)
......
...@@ -100,11 +100,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env) ...@@ -100,11 +100,6 @@ static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
dev->id.device, dev->id.vendor); dev->id.device, dev->id.vendor);
} }
static void add_status(struct virtio_device *dev, unsigned status)
{
dev->config->set_status(dev, dev->config->get_status(dev) | status);
}
void virtio_check_driver_offered_feature(const struct virtio_device *vdev, void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
unsigned int fbit) unsigned int fbit)
{ {
...@@ -145,14 +140,15 @@ void virtio_config_changed(struct virtio_device *dev) ...@@ -145,14 +140,15 @@ void virtio_config_changed(struct virtio_device *dev)
} }
EXPORT_SYMBOL_GPL(virtio_config_changed); EXPORT_SYMBOL_GPL(virtio_config_changed);
static void virtio_config_disable(struct virtio_device *dev) void virtio_config_disable(struct virtio_device *dev)
{ {
spin_lock_irq(&dev->config_lock); spin_lock_irq(&dev->config_lock);
dev->config_enabled = false; dev->config_enabled = false;
spin_unlock_irq(&dev->config_lock); spin_unlock_irq(&dev->config_lock);
} }
EXPORT_SYMBOL_GPL(virtio_config_disable);
static void virtio_config_enable(struct virtio_device *dev) void virtio_config_enable(struct virtio_device *dev)
{ {
spin_lock_irq(&dev->config_lock); spin_lock_irq(&dev->config_lock);
dev->config_enabled = true; dev->config_enabled = true;
...@@ -161,8 +157,15 @@ static void virtio_config_enable(struct virtio_device *dev) ...@@ -161,8 +157,15 @@ static void virtio_config_enable(struct virtio_device *dev)
dev->config_change_pending = false; dev->config_change_pending = false;
spin_unlock_irq(&dev->config_lock); spin_unlock_irq(&dev->config_lock);
} }
EXPORT_SYMBOL_GPL(virtio_config_enable);
void virtio_add_status(struct virtio_device *dev, unsigned int status)
{
dev->config->set_status(dev, dev->config->get_status(dev) | status);
}
EXPORT_SYMBOL_GPL(virtio_add_status);
static int virtio_finalize_features(struct virtio_device *dev) int virtio_finalize_features(struct virtio_device *dev)
{ {
int ret = dev->config->finalize_features(dev); int ret = dev->config->finalize_features(dev);
unsigned status; unsigned status;
...@@ -173,7 +176,7 @@ static int virtio_finalize_features(struct virtio_device *dev) ...@@ -173,7 +176,7 @@ static int virtio_finalize_features(struct virtio_device *dev)
if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
return 0; return 0;
add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); virtio_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
status = dev->config->get_status(dev); status = dev->config->get_status(dev);
if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) { if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
dev_err(&dev->dev, "virtio: device refuses features: %x\n", dev_err(&dev->dev, "virtio: device refuses features: %x\n",
...@@ -182,6 +185,7 @@ static int virtio_finalize_features(struct virtio_device *dev) ...@@ -182,6 +185,7 @@ static int virtio_finalize_features(struct virtio_device *dev)
} }
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(virtio_finalize_features);
static int virtio_dev_probe(struct device *_d) static int virtio_dev_probe(struct device *_d)
{ {
...@@ -193,7 +197,7 @@ static int virtio_dev_probe(struct device *_d) ...@@ -193,7 +197,7 @@ static int virtio_dev_probe(struct device *_d)
u64 driver_features_legacy; u64 driver_features_legacy;
/* We have a driver! */ /* We have a driver! */
add_status(dev, VIRTIO_CONFIG_S_DRIVER); virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
/* Figure out what features the device supports. */ /* Figure out what features the device supports. */
device_features = dev->config->get_features(dev); device_features = dev->config->get_features(dev);
...@@ -247,7 +251,7 @@ static int virtio_dev_probe(struct device *_d) ...@@ -247,7 +251,7 @@ static int virtio_dev_probe(struct device *_d)
return 0; return 0;
err: err:
add_status(dev, VIRTIO_CONFIG_S_FAILED); virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
return err; return err;
} }
...@@ -265,7 +269,7 @@ static int virtio_dev_remove(struct device *_d) ...@@ -265,7 +269,7 @@ static int virtio_dev_remove(struct device *_d)
WARN_ON_ONCE(dev->config->get_status(dev)); WARN_ON_ONCE(dev->config->get_status(dev));
/* Acknowledge the device's existence again. */ /* Acknowledge the device's existence again. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
return 0; return 0;
} }
...@@ -316,7 +320,7 @@ int register_virtio_device(struct virtio_device *dev) ...@@ -316,7 +320,7 @@ int register_virtio_device(struct virtio_device *dev)
dev->config->reset(dev); dev->config->reset(dev);
/* Acknowledge that we've seen the device. */ /* Acknowledge that we've seen the device. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
INIT_LIST_HEAD(&dev->vqs); INIT_LIST_HEAD(&dev->vqs);
...@@ -325,7 +329,7 @@ int register_virtio_device(struct virtio_device *dev) ...@@ -325,7 +329,7 @@ int register_virtio_device(struct virtio_device *dev)
err = device_register(&dev->dev); err = device_register(&dev->dev);
out: out:
if (err) if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED); virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
return err; return err;
} }
EXPORT_SYMBOL_GPL(register_virtio_device); EXPORT_SYMBOL_GPL(register_virtio_device);
...@@ -365,18 +369,18 @@ int virtio_device_restore(struct virtio_device *dev) ...@@ -365,18 +369,18 @@ int virtio_device_restore(struct virtio_device *dev)
dev->config->reset(dev); dev->config->reset(dev);
/* Acknowledge that we've seen the device. */ /* Acknowledge that we've seen the device. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
/* Maybe driver failed before freeze. /* Maybe driver failed before freeze.
* Restore the failed status, for debugging. */ * Restore the failed status, for debugging. */
if (dev->failed) if (dev->failed)
add_status(dev, VIRTIO_CONFIG_S_FAILED); virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
if (!drv) if (!drv)
return 0; return 0;
/* We have a driver! */ /* We have a driver! */
add_status(dev, VIRTIO_CONFIG_S_DRIVER); virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
ret = virtio_finalize_features(dev); ret = virtio_finalize_features(dev);
if (ret) if (ret)
...@@ -389,14 +393,14 @@ int virtio_device_restore(struct virtio_device *dev) ...@@ -389,14 +393,14 @@ int virtio_device_restore(struct virtio_device *dev)
} }
/* Finally, tell the device we're all set */ /* Finally, tell the device we're all set */
add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
virtio_config_enable(dev); virtio_config_enable(dev);
return 0; return 0;
err: err:
add_status(dev, VIRTIO_CONFIG_S_FAILED); virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(virtio_device_restore); EXPORT_SYMBOL_GPL(virtio_device_restore);
......
...@@ -132,12 +132,16 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev) ...@@ -132,12 +132,16 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev)
return container_of(_dev, struct virtio_device, dev); return container_of(_dev, struct virtio_device, dev);
} }
void virtio_add_status(struct virtio_device *dev, unsigned int status);
int register_virtio_device(struct virtio_device *dev); int register_virtio_device(struct virtio_device *dev);
void unregister_virtio_device(struct virtio_device *dev); void unregister_virtio_device(struct virtio_device *dev);
void virtio_break_device(struct virtio_device *dev); void virtio_break_device(struct virtio_device *dev);
void virtio_config_changed(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev);
void virtio_config_disable(struct virtio_device *dev);
void virtio_config_enable(struct virtio_device *dev);
int virtio_finalize_features(struct virtio_device *dev);
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev); int virtio_device_freeze(struct virtio_device *dev);
int virtio_device_restore(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment