Commit fd0bfa8d authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-af-xdp-cleanups'

Björn Töpel says:

====================
This the second follow-up set. The first four patches are uapi
changes:

* Removing rebind support
* Getting rid of structure hole
* Removing explicit cache line alignment
* Stricter bind checks

The last patches do some cleanups, where the umem and refcount_t
changes were suggested by Daniel.

* Add a missing write-barrier and use READ_ONCE for data-dependencies
* Clean up umem and do proper locking
* Convert atomic_t to refcount_t
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents d849f9f9 d3b42f14
...@@ -17,19 +17,33 @@ ...@@ -17,19 +17,33 @@
struct sockaddr_xdp { struct sockaddr_xdp {
__u16 sxdp_family; __u16 sxdp_family;
__u16 sxdp_flags;
__u32 sxdp_ifindex; __u32 sxdp_ifindex;
__u32 sxdp_queue_id; __u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd; __u32 sxdp_shared_umem_fd;
__u16 sxdp_flags; };
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
};
struct xdp_mmap_offsets {
struct xdp_ring_offset rx;
struct xdp_ring_offset tx;
struct xdp_ring_offset fr; /* Fill */
struct xdp_ring_offset cr; /* Completion */
}; };
/* XDP socket options */ /* XDP socket options */
#define XDP_RX_RING 1 #define XDP_MMAP_OFFSETS 1
#define XDP_TX_RING 2 #define XDP_RX_RING 2
#define XDP_UMEM_REG 3 #define XDP_TX_RING 3
#define XDP_UMEM_FILL_RING 4 #define XDP_UMEM_REG 4
#define XDP_UMEM_COMPLETION_RING 5 #define XDP_UMEM_FILL_RING 5
#define XDP_STATISTICS 6 #define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
struct xdp_umem_reg { struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */ __u64 addr; /* Start of packet data area */
...@@ -50,6 +64,7 @@ struct xdp_statistics { ...@@ -50,6 +64,7 @@ struct xdp_statistics {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000 #define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
/* Rx/Tx descriptor */
struct xdp_desc { struct xdp_desc {
__u32 idx; __u32 idx;
__u32 len; __u32 len;
...@@ -58,21 +73,6 @@ struct xdp_desc { ...@@ -58,21 +73,6 @@ struct xdp_desc {
__u8 padding[5]; __u8 padding[5];
}; };
struct xdp_ring { /* UMEM descriptor is __u32 */
__u32 producer __attribute__((aligned(64)));
__u32 consumer __attribute__((aligned(64)));
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] __attribute__((aligned(64)));
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
__u32 desc[0] __attribute__((aligned(64)));
};
#endif /* _LINUX_IF_XDP_H */ #endif /* _LINUX_IF_XDP_H */
...@@ -16,21 +16,10 @@ ...@@ -16,21 +16,10 @@
#define XDP_UMEM_MIN_FRAME_SIZE 2048 #define XDP_UMEM_MIN_FRAME_SIZE 2048
int xdp_umem_create(struct xdp_umem **umem)
{
*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
if (!*umem)
return -ENOMEM;
return 0;
}
static void xdp_umem_unpin_pages(struct xdp_umem *umem) static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{ {
unsigned int i; unsigned int i;
if (umem->pgs) {
for (i = 0; i < umem->npgs; i++) { for (i = 0; i < umem->npgs; i++) {
struct page *page = umem->pgs[i]; struct page *page = umem->pgs[i];
...@@ -40,15 +29,12 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem) ...@@ -40,15 +29,12 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
kfree(umem->pgs); kfree(umem->pgs);
umem->pgs = NULL; umem->pgs = NULL;
}
} }
static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{ {
if (umem->user) {
atomic_long_sub(umem->npgs, &umem->user->locked_vm); atomic_long_sub(umem->npgs, &umem->user->locked_vm);
free_uid(umem->user); free_uid(umem->user);
}
} }
static void xdp_umem_release(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem)
...@@ -66,7 +52,6 @@ static void xdp_umem_release(struct xdp_umem *umem) ...@@ -66,7 +52,6 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL; umem->cq = NULL;
} }
if (umem->pgs) {
xdp_umem_unpin_pages(umem); xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID); task = get_pid_task(umem->pid, PIDTYPE_PID);
...@@ -79,9 +64,6 @@ static void xdp_umem_release(struct xdp_umem *umem) ...@@ -79,9 +64,6 @@ static void xdp_umem_release(struct xdp_umem *umem)
goto out; goto out;
mmput(mm); mmput(mm);
umem->pgs = NULL;
}
xdp_umem_unaccount_pages(umem); xdp_umem_unaccount_pages(umem);
out: out:
kfree(umem); kfree(umem);
...@@ -96,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work) ...@@ -96,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
void xdp_get_umem(struct xdp_umem *umem) void xdp_get_umem(struct xdp_umem *umem)
{ {
atomic_inc(&umem->users); refcount_inc(&umem->users);
} }
void xdp_put_umem(struct xdp_umem *umem) void xdp_put_umem(struct xdp_umem *umem)
...@@ -104,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem) ...@@ -104,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem) if (!umem)
return; return;
if (atomic_dec_and_test(&umem->users)) { if (refcount_dec_and_test(&umem->users)) {
INIT_WORK(&umem->work, xdp_umem_release_deferred); INIT_WORK(&umem->work, xdp_umem_release_deferred);
schedule_work(&umem->work); schedule_work(&umem->work);
} }
...@@ -167,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) ...@@ -167,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0; return 0;
} }
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{ {
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom; u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
u64 addr = mr->addr, size = mr->len; u64 addr = mr->addr, size = mr->len;
unsigned int nframes, nfpp; unsigned int nframes, nfpp;
int size_chk, err; int size_chk, err;
if (!umem)
return -EINVAL;
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) { if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if: /* Strictly speaking we could support this, if:
* - huge pages, or* * - huge pages, or*
...@@ -227,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) ...@@ -227,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->frame_size_log2 = ilog2(frame_size); umem->frame_size_log2 = ilog2(frame_size);
umem->nfpp_mask = nfpp - 1; umem->nfpp_mask = nfpp - 1;
umem->nfpplog2 = ilog2(nfpp); umem->nfpplog2 = ilog2(nfpp);
atomic_set(&umem->users, 1); refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem); err = xdp_umem_account_pages(umem);
if (err) if (err)
...@@ -245,6 +224,24 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) ...@@ -245,6 +224,24 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return err; return err;
} }
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
{
struct xdp_umem *umem;
int err;
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
err = xdp_umem_reg(umem, mr);
if (err) {
kfree(umem);
return ERR_PTR(err);
}
return umem;
}
bool xdp_umem_validate_queues(struct xdp_umem *umem) bool xdp_umem_validate_queues(struct xdp_umem *umem)
{ {
return umem->fq && umem->cq; return umem->fq && umem->cq;
......
...@@ -27,7 +27,7 @@ struct xdp_umem { ...@@ -27,7 +27,7 @@ struct xdp_umem {
struct pid *pid; struct pid *pid;
unsigned long address; unsigned long address;
size_t size; size_t size;
atomic_t users; refcount_t users;
struct work_struct work; struct work_struct work;
}; };
...@@ -50,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem, ...@@ -50,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
} }
bool xdp_umem_validate_queues(struct xdp_umem *umem); bool xdp_umem_validate_queues(struct xdp_umem *umem);
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem); void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem); void xdp_put_umem(struct xdp_umem *umem);
int xdp_umem_create(struct xdp_umem **umem); struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */ #endif /* XDP_UMEM_H_ */
...@@ -142,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, ...@@ -142,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out; goto out;
} }
if (xs->queue_id >= xs->dev->real_num_tx_queues) {
err = -ENXIO;
goto out;
}
skb = sock_alloc_send_skb(sk, len, !need_wait, &err); skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
if (unlikely(!skb)) { if (unlikely(!skb)) {
err = -EAGAIN; err = -EAGAIN;
...@@ -223,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, ...@@ -223,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
if (!q) if (!q)
return -ENOMEM; return -ENOMEM;
/* Make sure queue is ready before it can be seen by others */
smp_wmb();
*queue = q; *queue = q;
return 0; return 0;
} }
static void __xsk_release(struct xdp_sock *xs)
{
/* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
}
static int xsk_release(struct socket *sock) static int xsk_release(struct socket *sock)
{ {
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
...@@ -251,7 +250,9 @@ static int xsk_release(struct socket *sock) ...@@ -251,7 +250,9 @@ static int xsk_release(struct socket *sock)
local_bh_enable(); local_bh_enable();
if (xs->dev) { if (xs->dev) {
__xsk_release(xs); /* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
xs->dev = NULL; xs->dev = NULL;
} }
...@@ -285,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -285,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{ {
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
struct sock *sk = sock->sk; struct sock *sk = sock->sk;
struct net_device *dev, *dev_curr;
struct xdp_sock *xs = xdp_sk(sk); struct xdp_sock *xs = xdp_sk(sk);
struct xdp_umem *old_umem = NULL; struct net_device *dev;
int err = 0; int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp)) if (addr_len < sizeof(struct sockaddr_xdp))
...@@ -296,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -296,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL; return -EINVAL;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
dev_curr = xs->dev; if (xs->dev) {
err = -EBUSY;
goto out_release;
}
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex); dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
if (!dev) { if (!dev) {
err = -ENODEV; err = -ENODEV;
...@@ -308,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -308,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock; goto out_unlock;
} }
if (sxdp->sxdp_queue_id >= dev->num_rx_queues) { if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
(xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
err = -EINVAL; err = -EINVAL;
goto out_unlock; goto out_unlock;
} }
...@@ -343,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -343,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
} }
xdp_get_umem(umem_xs->umem); xdp_get_umem(umem_xs->umem);
old_umem = xs->umem;
xs->umem = umem_xs->umem; xs->umem = umem_xs->umem;
sockfd_put(sock); sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
...@@ -355,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) ...@@ -355,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xskq_set_umem(xs->umem->cq, &xs->umem->props); xskq_set_umem(xs->umem->cq, &xs->umem->props);
} }
/* Rebind? */
if (dev_curr && (dev_curr != dev ||
xs->queue_id != sxdp->sxdp_queue_id)) {
__xsk_release(xs);
if (old_umem)
xdp_put_umem(old_umem);
}
xs->dev = dev; xs->dev = dev;
xs->queue_id = sxdp->sxdp_queue_id; xs->queue_id = sxdp->sxdp_queue_id;
...@@ -410,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, ...@@ -410,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xdp_umem_reg mr; struct xdp_umem_reg mr;
struct xdp_umem *umem; struct xdp_umem *umem;
if (xs->umem)
return -EBUSY;
if (copy_from_user(&mr, optval, sizeof(mr))) if (copy_from_user(&mr, optval, sizeof(mr)))
return -EFAULT; return -EFAULT;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
err = xdp_umem_create(&umem); if (xs->umem) {
mutex_unlock(&xs->mutex);
return -EBUSY;
}
err = xdp_umem_reg(umem, &mr); umem = xdp_umem_create(&mr);
if (err) { if (IS_ERR(umem)) {
kfree(umem);
mutex_unlock(&xs->mutex); mutex_unlock(&xs->mutex);
return err; return PTR_ERR(umem);
} }
/* Make sure umem is ready before it can be seen by others */ /* Make sure umem is ready before it can be seen by others */
smp_wmb(); smp_wmb();
xs->umem = umem; xs->umem = umem;
mutex_unlock(&xs->mutex); mutex_unlock(&xs->mutex);
return 0; return 0;
...@@ -439,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, ...@@ -439,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q; struct xsk_queue **q;
int entries; int entries;
if (!xs->umem)
return -EINVAL;
if (copy_from_user(&entries, optval, sizeof(entries))) if (copy_from_user(&entries, optval, sizeof(entries)))
return -EFAULT; return -EFAULT;
mutex_lock(&xs->mutex); mutex_lock(&xs->mutex);
if (!xs->umem) {
mutex_unlock(&xs->mutex);
return -EINVAL;
}
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq : q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq; &xs->umem->cq;
err = xsk_init_queue(entries, q, true); err = xsk_init_queue(entries, q, true);
...@@ -495,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, ...@@ -495,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0; return 0;
} }
case XDP_MMAP_OFFSETS:
{
struct xdp_mmap_offsets off;
if (len < sizeof(off))
return -EINVAL;
off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.fr.desc = offsetof(struct xdp_umem_ring, desc);
off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.cr.desc = offsetof(struct xdp_umem_ring, desc);
len = sizeof(off);
if (copy_to_user(optval, &off, len))
return -EFAULT;
if (put_user(len, optlen))
return -EFAULT;
return 0;
}
default: default:
break; break;
} }
...@@ -509,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock, ...@@ -509,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk); struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL; struct xsk_queue *q = NULL;
struct xdp_umem *umem;
unsigned long pfn; unsigned long pfn;
struct page *qpg; struct page *qpg;
if (offset == XDP_PGOFF_RX_RING) { if (offset == XDP_PGOFF_RX_RING) {
q = xs->rx; q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) { } else if (offset == XDP_PGOFF_TX_RING) {
q = xs->tx; q = READ_ONCE(xs->tx);
} else { } else {
if (!xs->umem) umem = READ_ONCE(xs->umem);
if (!umem)
return -EINVAL; return -EINVAL;
if (offset == XDP_UMEM_PGOFF_FILL_RING) if (offset == XDP_UMEM_PGOFF_FILL_RING)
q = xs->umem->fq; q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING) else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
q = xs->umem->cq; q = READ_ONCE(umem->cq);
} }
if (!q) if (!q)
......
...@@ -13,6 +13,23 @@ ...@@ -13,6 +13,23 @@
#define RX_BATCH_SIZE 16 #define RX_BATCH_SIZE 16
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
u32 desc[0] ____cacheline_aligned_in_smp;
};
struct xsk_queue { struct xsk_queue {
struct xdp_umem_props umem_props; struct xdp_umem_props umem_props;
u32 ring_mask; u32 ring_mask;
......
...@@ -79,7 +79,10 @@ struct xdp_umem_uqueue { ...@@ -79,7 +79,10 @@ struct xdp_umem_uqueue {
u32 cached_cons; u32 cached_cons;
u32 mask; u32 mask;
u32 size; u32 size;
struct xdp_umem_ring *ring; u32 *producer;
u32 *consumer;
u32 *ring;
void *map;
}; };
struct xdp_umem { struct xdp_umem {
...@@ -94,7 +97,10 @@ struct xdp_uqueue { ...@@ -94,7 +97,10 @@ struct xdp_uqueue {
u32 cached_cons; u32 cached_cons;
u32 mask; u32 mask;
u32 size; u32 size;
struct xdp_rxtx_ring *ring; u32 *producer;
u32 *consumer;
struct xdp_desc *ring;
void *map;
}; };
struct xdpsock { struct xdpsock {
...@@ -155,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb) ...@@ -155,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
return free_entries; return free_entries;
/* Refresh the local tail pointer */ /* Refresh the local tail pointer */
q->cached_cons = q->ring->ptrs.consumer; q->cached_cons = *q->consumer;
return q->size - (q->cached_prod - q->cached_cons); return q->size - (q->cached_prod - q->cached_cons);
} }
...@@ -168,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs) ...@@ -168,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
return free_entries; return free_entries;
/* Refresh the local tail pointer */ /* Refresh the local tail pointer */
q->cached_cons = q->ring->ptrs.consumer + q->size; q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod; return q->cached_cons - q->cached_prod;
} }
...@@ -177,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb) ...@@ -177,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
u32 entries = q->cached_prod - q->cached_cons; u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) { if (entries == 0) {
q->cached_prod = q->ring->ptrs.producer; q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons; entries = q->cached_prod - q->cached_cons;
} }
...@@ -189,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs) ...@@ -189,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
u32 entries = q->cached_prod - q->cached_cons; u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) { if (entries == 0) {
q->cached_prod = q->ring->ptrs.producer; q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons; entries = q->cached_prod - q->cached_cons;
} }
...@@ -208,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq, ...@@ -208,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) { for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask; u32 idx = fq->cached_prod++ & fq->mask;
fq->ring->desc[idx] = d[i].idx; fq->ring[idx] = d[i].idx;
} }
u_smp_wmb(); u_smp_wmb();
fq->ring->ptrs.producer = fq->cached_prod; *fq->producer = fq->cached_prod;
return 0; return 0;
} }
...@@ -229,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d, ...@@ -229,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
for (i = 0; i < nb; i++) { for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask; u32 idx = fq->cached_prod++ & fq->mask;
fq->ring->desc[idx] = d[i]; fq->ring[idx] = d[i];
} }
u_smp_wmb(); u_smp_wmb();
fq->ring->ptrs.producer = fq->cached_prod; *fq->producer = fq->cached_prod;
return 0; return 0;
} }
...@@ -248,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq, ...@@ -248,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
for (i = 0; i < entries; i++) { for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask; idx = cq->cached_cons++ & cq->mask;
d[i] = cq->ring->desc[idx]; d[i] = cq->ring[idx];
} }
if (entries > 0) { if (entries > 0) {
u_smp_wmb(); u_smp_wmb();
cq->ring->ptrs.consumer = cq->cached_cons; *cq->consumer = cq->cached_cons;
} }
return entries; return entries;
...@@ -270,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq, ...@@ -270,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
const struct xdp_desc *descs, const struct xdp_desc *descs,
unsigned int ndescs) unsigned int ndescs)
{ {
struct xdp_rxtx_ring *r = uq->ring; struct xdp_desc *r = uq->ring;
unsigned int i; unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs) if (xq_nb_free(uq, ndescs) < ndescs)
...@@ -279,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq, ...@@ -279,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) { for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask; u32 idx = uq->cached_prod++ & uq->mask;
r->desc[idx].idx = descs[i].idx; r[idx].idx = descs[i].idx;
r->desc[idx].len = descs[i].len; r[idx].len = descs[i].len;
r->desc[idx].offset = descs[i].offset; r[idx].offset = descs[i].offset;
} }
u_smp_wmb(); u_smp_wmb();
r->ptrs.producer = uq->cached_prod; *uq->producer = uq->cached_prod;
return 0; return 0;
} }
static inline int xq_enq_tx_only(struct xdp_uqueue *uq, static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
__u32 idx, unsigned int ndescs) __u32 idx, unsigned int ndescs)
{ {
struct xdp_rxtx_ring *q = uq->ring; struct xdp_desc *r = uq->ring;
unsigned int i; unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs) if (xq_nb_free(uq, ndescs) < ndescs)
...@@ -302,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq, ...@@ -302,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) { for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask; u32 idx = uq->cached_prod++ & uq->mask;
q->desc[idx].idx = idx + i; r[idx].idx = idx + i;
q->desc[idx].len = sizeof(pkt_data) - 1; r[idx].len = sizeof(pkt_data) - 1;
q->desc[idx].offset = 0; r[idx].offset = 0;
} }
u_smp_wmb(); u_smp_wmb();
q->ptrs.producer = uq->cached_prod; *uq->producer = uq->cached_prod;
return 0; return 0;
} }
...@@ -317,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq, ...@@ -317,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
struct xdp_desc *descs, struct xdp_desc *descs,
int ndescs) int ndescs)
{ {
struct xdp_rxtx_ring *r = uq->ring; struct xdp_desc *r = uq->ring;
unsigned int idx; unsigned int idx;
int i, entries; int i, entries;
...@@ -327,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq, ...@@ -327,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
for (i = 0; i < entries; i++) { for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask; idx = uq->cached_cons++ & uq->mask;
descs[i] = r->desc[idx]; descs[i] = r[idx];
} }
if (entries > 0) { if (entries > 0) {
u_smp_wmb(); u_smp_wmb();
r->ptrs.consumer = uq->cached_cons; *uq->consumer = uq->cached_cons;
} }
return entries; return entries;
...@@ -392,8 +398,10 @@ static size_t gen_eth_frame(char *frame) ...@@ -392,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
static struct xdp_umem *xdp_umem_configure(int sfd) static struct xdp_umem *xdp_umem_configure(int sfd)
{ {
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS; int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
struct xdp_mmap_offsets off;
struct xdp_umem_reg mr; struct xdp_umem_reg mr;
struct xdp_umem *umem; struct xdp_umem *umem;
socklen_t optlen;
void *bufs; void *bufs;
umem = calloc(1, sizeof(*umem)); umem = calloc(1, sizeof(*umem));
...@@ -413,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd) ...@@ -413,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size, lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0); sizeof(int)) == 0);
umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) + optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
umem->fq.map = mmap(0, off.fr.desc +
FQ_NUM_DESCS * sizeof(u32), FQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd, MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING); XDP_UMEM_PGOFF_FILL_RING);
lassert(umem->fq.ring != MAP_FAILED); lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1; umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS; umem->fq.size = FQ_NUM_DESCS;
umem->fq.producer = umem->fq.map + off.fr.producer;
umem->fq.consumer = umem->fq.map + off.fr.consumer;
umem->fq.ring = umem->fq.map + off.fr.desc;
umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) + umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u32), CQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd, MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING); XDP_UMEM_PGOFF_COMPLETION_RING);
lassert(umem->cq.ring != MAP_FAILED); lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1; umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS; umem->cq.size = CQ_NUM_DESCS;
umem->cq.producer = umem->cq.map + off.cr.producer;
umem->cq.consumer = umem->cq.map + off.cr.consumer;
umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = (char (*)[FRAME_SIZE])bufs; umem->frames = (char (*)[FRAME_SIZE])bufs;
umem->fd = sfd; umem->fd = sfd;
...@@ -449,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd) ...@@ -449,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
static struct xdpsock *xsk_configure(struct xdp_umem *umem) static struct xdpsock *xsk_configure(struct xdp_umem *umem)
{ {
struct sockaddr_xdp sxdp = {}; struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
int sfd, ndescs = NUM_DESCS; int sfd, ndescs = NUM_DESCS;
struct xdpsock *xsk; struct xdpsock *xsk;
bool shared = true; bool shared = true;
socklen_t optlen;
u32 i; u32 i;
sfd = socket(PF_XDP, SOCK_RAW, 0); sfd = socket(PF_XDP, SOCK_RAW, 0);
...@@ -474,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) ...@@ -474,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
&ndescs, sizeof(int)) == 0); &ndescs, sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING, lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
&ndescs, sizeof(int)) == 0); &ndescs, sizeof(int)) == 0);
optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
/* Rx */ /* Rx */
xsk->rx.ring = mmap(NULL, xsk->rx.map = mmap(NULL,
sizeof(struct xdp_ring) + off.rx.desc +
NUM_DESCS * sizeof(struct xdp_desc), NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd, MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING); XDP_PGOFF_RX_RING);
lassert(xsk->rx.ring != MAP_FAILED); lassert(xsk->rx.map != MAP_FAILED);
if (!shared) { if (!shared) {
for (i = 0; i < NUM_DESCS / 2; i++) for (i = 0; i < NUM_DESCS / 2; i++)
...@@ -491,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem) ...@@ -491,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
} }
/* Tx */ /* Tx */
xsk->tx.ring = mmap(NULL, xsk->tx.map = mmap(NULL,
sizeof(struct xdp_ring) + off.tx.desc +
NUM_DESCS * sizeof(struct xdp_desc), NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd, MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_TX_RING); XDP_PGOFF_TX_RING);
lassert(xsk->tx.ring != MAP_FAILED); lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1; xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS; xsk->rx.size = NUM_DESCS;
xsk->rx.producer = xsk->rx.map + off.rx.producer;
xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1; xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS; xsk->tx.size = NUM_DESCS;
xsk->tx.producer = xsk->tx.map + off.tx.producer;
xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
xsk->tx.ring = xsk->tx.map + off.tx.desc;
sxdp.sxdp_family = PF_XDP; sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex; sxdp.sxdp_ifindex = opt_ifindex;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment