Commit 10f67868 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'xdp_xmit-bulking'

Jesper Dangaard Brouer says:

====================
This patchset change ndo_xdp_xmit API to take a bulk of xdp frames.

When kernel is compiled with CONFIG_RETPOLINE, every indirect function
pointer (branch) call hurts performance. For XDP this have a huge
negative performance impact.

This patchset reduce the needed (indirect) calls to ndo_xdp_xmit, but
also prepares for further optimizations.  The DMA APIs use of indirect
function pointer calls is the primary source the regression.  It is
left for a followup patchset, to use bulking calls towards the DMA API
(via the scatter-gatter calls).

The other advantage of this API change is that drivers can easier
amortize the cost of any sync/locking scheme, over the bulk of
packets.  The assumption of the current API is that the driver
implemementing the NDO will also allocate a dedicated XDP TX queue for
every CPU in the system.  Which is not always possible or practical to
configure. E.g. ixgbe cannot load an XDP program on a machine with
more than 96 CPUs, due to limited hardware TX queues.  E.g. virtio_net
is hard to configure as it requires manually increasing the
queues. E.g. tun driver chooses to use a per XDP frame producer lock
modulo smp_processor_id over avail queues.

I'm considered adding 'flags' to ndo_xdp_xmit, but it's not part of
this patchset.  This will be a followup patchset, once we know if this
will be needed (e.g. for non-map xdp_redirect flush-flag, and if
AF_XDP chooses to use ndo_xdp_xmit for TX).

---
V5: Fixed up issues spotted by Daniel and John

V4: Splitout the patches from 4 to 8 patches.  I cannot split the
driver changes from the NDO change, but I've tried to isolated the NDO
change together with the driver change as much as possible.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents f80acbd2 a570e48f
...@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* @dev: netdev * @dev: netdev
* @xdp: XDP buffer * @xdp: XDP buffer
* *
* Returns Zero if sent, else an error code * Returns number of frames successfully sent. Frames that fail are
* free'ed via XDP return API.
*
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
**/ **/
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{ {
struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id(); unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi; struct i40e_vsi *vsi = np->vsi;
int err; int drops = 0;
int i;
if (test_bit(__I40E_VSI_DOWN, vsi->state)) if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN; return -ENETDOWN;
...@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) ...@@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO; return -ENXIO;
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); for (i = 0; i < n; i++) {
if (err != I40E_XDP_TX) struct xdp_frame *xdpf = frames[i];
return -ENOSPC; int err;
return 0; err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
} }
/** /**
......
...@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); ...@@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi); void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb); bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf); int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
void i40e_xdp_flush(struct net_device *dev); void i40e_xdp_flush(struct net_device *dev);
/** /**
......
...@@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) ...@@ -10017,11 +10017,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
} }
} }
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames)
{ {
struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring; struct ixgbe_ring *ring;
int err; int drops = 0;
int i;
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN; return -ENETDOWN;
...@@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) ...@@ -10033,11 +10035,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (unlikely(!ring)) if (unlikely(!ring))
return -ENXIO; return -ENXIO;
err = ixgbe_xmit_xdp_ring(adapter, xdpf); for (i = 0; i < n; i++) {
if (err != IXGBE_XDP_TX) struct xdp_frame *xdpf = frames[i];
return -ENOSPC; int err;
return 0; err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
} }
static void ixgbe_xdp_flush(struct net_device *dev) static void ixgbe_xdp_flush(struct net_device *dev)
......
...@@ -70,6 +70,7 @@ ...@@ -70,6 +70,7 @@
#include <net/netns/generic.h> #include <net/netns/generic.h>
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/xdp.h>
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/skb_array.h> #include <linux/skb_array.h>
...@@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = { ...@@ -1290,34 +1291,44 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64, .ndo_get_stats64 = tun_net_get_stats64,
}; };
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame) static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{ {
struct tun_struct *tun = netdev_priv(dev); struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile; struct tun_file *tfile;
u32 numqueues; u32 numqueues;
int ret = 0; int drops = 0;
int cnt = n;
int i;
rcu_read_lock(); rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues); numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) { if (!numqueues) {
ret = -ENOSPC; rcu_read_unlock();
goto out; return -ENXIO; /* Caller will free/return all frames */
} }
tfile = rcu_dereference(tun->tfiles[smp_processor_id() % tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]); numqueues]);
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff. spin_lock(&tfile->tx_ring.producer_lock);
*/ for (i = 0; i < n; i++) {
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) { struct xdp_frame *xdp = frames[i];
this_cpu_inc(tun->pcpu_stats->tx_dropped); /* Encode the XDP flag into lowest bit for consumer to differ
ret = -ENOSPC; * XDP buffer from sk_buff.
*/
void *frame = tun_xdp_to_ptr(xdp);
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
xdp_return_frame_rx_napi(xdp);
drops++;
}
} }
spin_unlock(&tfile->tx_ring.producer_lock);
out:
rcu_read_unlock(); rcu_read_unlock();
return ret; return cnt - drops;
} }
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
...@@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) ...@@ -1327,7 +1338,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame)) if (unlikely(!frame))
return -EOVERFLOW; return -EOVERFLOW;
return tun_xdp_xmit(dev, frame); return tun_xdp_xmit(dev, 1, &frame);
} }
static void tun_xdp_flush(struct net_device *dev) static void tun_xdp_flush(struct net_device *dev)
......
...@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev) ...@@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq); virtqueue_kick(sq->vq);
} }
static int __virtnet_xdp_xmit(struct virtnet_info *vi, static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct xdp_frame *xdpf) struct send_queue *sq,
struct xdp_frame *xdpf)
{ {
struct virtio_net_hdr_mrg_rxbuf *hdr; struct virtio_net_hdr_mrg_rxbuf *hdr;
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int err; int err;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
/* virtqueue want to use data area in-front of packet */ /* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0)) if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi, ...@@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
return 0; return 0;
} }
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
struct xdp_frame *xdpf)
{
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
return __virtnet_xdp_xmit_one(vi, sq, xdpf);
}
static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames)
{ {
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq; struct receive_queue *rq = vi->rq;
struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int drops = 0;
int err;
int i;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated. * indicate XDP resources have been successfully allocated.
...@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf) ...@@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!xdp_prog) if (!xdp_prog)
return -ENXIO; return -ENXIO;
return __virtnet_xdp_xmit(vi, xdpf); /* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
if (err) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
} }
static unsigned int virtnet_get_headroom(struct virtnet_info *vi) static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
...@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev, ...@@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp); xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf)) if (unlikely(!xdpf))
goto err_xdp; goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf); err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) { if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp; goto err_xdp;
...@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp); xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf)) if (unlikely(!xdpf))
goto err_xdp; goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf); err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) { if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
......
...@@ -487,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); ...@@ -487,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */ /* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct xdp_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map); void __dev_map_flush(struct bpf_map *map);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map); void __cpu_map_flush(struct bpf_map *map);
struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx); struct net_device *dev_rx);
...@@ -573,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map) ...@@ -573,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{ {
} }
struct xdp_buff;
struct bpf_dtab_netdev;
static inline
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
return 0;
}
static inline static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{ {
...@@ -587,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map) ...@@ -587,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{ {
} }
struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp, struct xdp_buff *xdp,
struct net_device *dev_rx) struct net_device *dev_rx)
......
...@@ -1185,9 +1185,13 @@ struct dev_ifalias { ...@@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the * This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of * netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details. * enum bpf_netdev_command for details.
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp); * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
* This function is used to submit a XDP packet for transmit on a * This function is used to submit @n XDP packets for transmit on a
* netdevice. * netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
* TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev); * void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular * This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit. * xdp tx queue. Must be called on same CPU as xdp_xmit.
...@@ -1375,8 +1379,8 @@ struct net_device_ops { ...@@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom); int needed_headroom);
int (*ndo_bpf)(struct net_device *dev, int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf); struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev, int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame *xdp); struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev); void (*ndo_xdp_flush)(struct net_device *dev);
}; };
......
...@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool); ...@@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
void __page_pool_put_page(struct page_pool *pool, void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct); struct page *page, bool allow_direct);
static inline void page_pool_put_page(struct page_pool *pool, struct page *page) static inline void page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct)
{ {
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker. * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/ */
#ifdef CONFIG_PAGE_POOL #ifdef CONFIG_PAGE_POOL
__page_pool_put_page(pool, page, false); __page_pool_put_page(pool, page, allow_direct);
#endif #endif
} }
/* Very limited use-cases allow recycle direct */ /* Very limited use-cases allow recycle direct */
......
...@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) ...@@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
} }
void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp); void xdp_return_buff(struct xdp_buff *xdp);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
......
...@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, ...@@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index) __entry->map_id, __entry->map_index)
); );
#ifndef __DEVMAP_OBJ_TYPE
#define __DEVMAP_OBJ_TYPE
struct _bpf_dtab_netdev {
struct net_device *dev;
};
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(fwd, map) \ #define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \ (!fwd ? 0 : \
(!map ? 0 : \ (!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct net_device *)fwd)->ifindex : 0))) ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
...@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue, ...@@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
__entry->to_cpu) __entry->to_cpu)
); );
TRACE_EVENT(xdp_devmap_xmit,
TP_PROTO(const struct bpf_map *map, u32 map_index,
int sent, int drops,
const struct net_device *from_dev,
const struct net_device *to_dev, int err),
TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
TP_STRUCT__entry(
__field(int, map_id)
__field(u32, act)
__field(u32, map_index)
__field(int, drops)
__field(int, sent)
__field(int, from_ifindex)
__field(int, to_ifindex)
__field(int, err)
),
TP_fast_assign(
__entry->map_id = map->id;
__entry->act = XDP_REDIRECT;
__entry->map_index = map_index;
__entry->drops = drops;
__entry->sent = sent;
__entry->from_ifindex = from_dev->ifindex;
__entry->to_ifindex = to_dev->ifindex;
__entry->err = err;
),
TP_printk("ndo_xdp_xmit"
" map_id=%d map_index=%d action=%s"
" sent=%d drops=%d"
" from_ifindex=%d to_ifindex=%d err=%d",
__entry->map_id, __entry->map_index,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->sent, __entry->drops,
__entry->from_ifindex, __entry->to_ifindex, __entry->err)
);
#endif /* _TRACE_XDP_H */ #endif /* _TRACE_XDP_H */
#include <trace/define_trace.h> #include <trace/define_trace.h>
...@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu, ...@@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf); err = __ptr_ring_produce(q, xdpf);
if (err) { if (err) {
drops++; drops++;
xdp_return_frame(xdpf); xdp_return_frame_rx_napi(xdpf);
} }
processed++; processed++;
} }
......
...@@ -48,15 +48,25 @@ ...@@ -48,15 +48,25 @@
* calls will fail at this point. * calls will fail at this point.
*/ */
#include <linux/bpf.h> #include <linux/bpf.h>
#include <net/xdp.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <trace/events/xdp.h>
#define DEV_CREATE_FLAG_MASK \ #define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
#define DEV_MAP_BULK_SIZE 16
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
struct net_device *dev_rx;
unsigned int count;
};
struct bpf_dtab_netdev { struct bpf_dtab_netdev {
struct net_device *dev; struct net_device *dev; /* must be first member, due to tracepoint */
struct bpf_dtab *dtab; struct bpf_dtab *dtab;
unsigned int bit; unsigned int bit;
struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu; struct rcu_head rcu;
}; };
...@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) ...@@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
__set_bit(bit, bitmap); __set_bit(bit, bitmap);
} }
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq)
{
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
int i;
if (unlikely(!bq->count))
return 0;
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
prefetch(xdpf);
}
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
if (sent < 0) {
err = sent;
sent = 0;
goto error;
}
drops = bq->count - sent;
out:
bq->count = 0;
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL;
return 0;
error:
/* If ndo_xdp_xmit fails with an errno, no frames have been
* xmit'ed and it's our responsibility to them free all.
*/
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
/* RX path under NAPI protection, can return frames faster */
xdp_return_frame_rx_napi(xdpf);
drops++;
}
goto out;
}
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled /* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll() * from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled * routine is called either from busy_poll context or net_rx_action signaled
...@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) { for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
struct net_device *netdev; struct net_device *netdev;
/* This is possible if the dev entry is removed by user space /* This is possible if the dev entry is removed by user space
...@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
continue; continue;
__clear_bit(bit, bitmap); __clear_bit(bit, bitmap);
bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq);
netdev = dev->dev; netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush)) if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev); netdev->netdev_ops->ndo_xdp_flush(netdev);
...@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
* update happens in parallel here a dev_put wont happen until after reading the * update happens in parallel here a dev_put wont happen until after reading the
* ifindex. * ifindex.
*/ */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key) struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{ {
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev; struct bpf_dtab_netdev *obj;
if (key >= map->max_entries) if (key >= map->max_entries)
return NULL; return NULL;
dev = READ_ONCE(dtab->netdev_map[key]); obj = READ_ONCE(dtab->netdev_map[key]);
return dev ? dev->dev : NULL; return obj;
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
* from net_device drivers NAPI func end.
*/
if (!bq->dev_rx)
bq->dev_rx = dev_rx;
bq->q[bq->count++] = xdpf;
return 0;
}
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
struct xdp_frame *xdpf;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
return bq_enqueue(dst, xdpf, dev_rx);
} }
static void *dev_map_lookup_elem(struct bpf_map *map, void *key) static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{ {
struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key); struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
struct net_device *dev = dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL; return dev ? &dev->ifindex : NULL;
} }
...@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) ...@@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{ {
if (dev->dev->netdev_ops->ndo_xdp_flush) { if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev; struct net_device *fl = dev->dev;
struct xdp_bulk_queue *bq;
unsigned long *bitmap; unsigned long *bitmap;
int cpu; int cpu;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap); __clear_bit(dev->bit, bitmap);
bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq);
fl->netdev_ops->ndo_xdp_flush(dev->dev); fl->netdev_ops->ndo_xdp_flush(dev->dev);
} }
} }
...@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu) ...@@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
dev = container_of(rcu, struct bpf_dtab_netdev, rcu); dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev); dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev); dev_put(dev->dev);
kfree(dev); kfree(dev);
} }
...@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
{ {
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns; struct net *net = current->nsproxy->net_ns;
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev; struct bpf_dtab_netdev *dev, *old_dev;
u32 i = *(u32 *)key; u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value; u32 ifindex = *(u32 *)value;
...@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) { if (!ifindex) {
dev = NULL; dev = NULL;
} else { } else {
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
map->numa_node);
if (!dev) if (!dev)
return -ENOMEM; return -ENOMEM;
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return -ENOMEM;
}
dev->dev = dev_get_by_index(net, ifindex); dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) { if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev); kfree(dev);
return -EINVAL; return -EINVAL;
} }
...@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = { ...@@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void) static int __init dev_map_init(void)
{ {
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier); register_netdevice_notifier(&dev_map_notifier);
return 0; return 0;
} }
......
...@@ -3039,7 +3039,7 @@ static int __bpf_tx_xdp(struct net_device *dev, ...@@ -3039,7 +3039,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index) u32 index)
{ {
struct xdp_frame *xdpf; struct xdp_frame *xdpf;
int err; int sent;
if (!dev->netdev_ops->ndo_xdp_xmit) { if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -3049,9 +3049,9 @@ static int __bpf_tx_xdp(struct net_device *dev, ...@@ -3049,9 +3049,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
if (unlikely(!xdpf)) if (unlikely(!xdpf))
return -EOVERFLOW; return -EOVERFLOW;
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf); sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
if (err) if (sent <= 0)
return err; return sent;
dev->netdev_ops->ndo_xdp_flush(dev); dev->netdev_ops->ndo_xdp_flush(dev);
return 0; return 0;
} }
...@@ -3065,20 +3065,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, ...@@ -3065,20 +3065,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
switch (map->map_type) { switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP: { case BPF_MAP_TYPE_DEVMAP: {
struct net_device *dev = fwd; struct bpf_dtab_netdev *dst = fwd;
struct xdp_frame *xdpf;
if (!dev->netdev_ops->ndo_xdp_xmit) err = dev_map_enqueue(dst, xdp, dev_rx);
return -EOPNOTSUPP;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
/* TODO: move to inside map code instead, for bulk support
* err = dev_map_enqueue(dev, xdp);
*/
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
if (err) if (err)
return err; return err;
__dev_map_insert_ctx(map, index); __dev_map_insert_ctx(map, index);
......
...@@ -308,7 +308,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, ...@@ -308,7 +308,13 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
} }
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
static void xdp_return(void *data, struct xdp_mem_info *mem) /* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolian
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
{ {
struct xdp_mem_allocator *xa; struct xdp_mem_allocator *xa;
struct page *page; struct page *page;
...@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem) ...@@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data); page = virt_to_head_page(data);
if (xa) if (xa)
page_pool_put_page(xa->page_pool, page); page_pool_put_page(xa->page_pool, page, napi_direct);
else else
put_page(page); put_page(page);
rcu_read_unlock(); rcu_read_unlock();
...@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem) ...@@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
void xdp_return_frame(struct xdp_frame *xdpf) void xdp_return_frame(struct xdp_frame *xdpf)
{ {
xdp_return(xdpf->data, &xdpf->mem); __xdp_return(xdpf->data, &xdpf->mem, false);
} }
EXPORT_SYMBOL_GPL(xdp_return_frame); EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
__xdp_return(xdpf->data, &xdpf->mem, true);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
void xdp_return_buff(struct xdp_buff *xdp) void xdp_return_buff(struct xdp_buff *xdp)
{ {
xdp_return(xdp->data, &xdp->rxq->mem); __xdp_return(xdp->data, &xdp->rxq->mem, true);
} }
EXPORT_SYMBOL_GPL(xdp_return_buff); EXPORT_SYMBOL_GPL(xdp_return_buff);
...@@ -125,6 +125,7 @@ struct datarec { ...@@ -125,6 +125,7 @@ struct datarec {
u64 processed; u64 processed;
u64 dropped; u64 dropped;
u64 info; u64 info;
u64 err;
}; };
#define MAX_CPUS 64 #define MAX_CPUS 64
...@@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) ...@@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
return 0; return 0;
} }
struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(struct datarec),
.max_entries = 1,
};
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
* Code in: kernel/include/trace/events/xdp.h
*/
struct devmap_xmit_ctx {
u64 __pad; // First 8 bytes are not accessible by bpf code
int map_id; // offset:8; size:4; signed:1;
u32 act; // offset:12; size:4; signed:0;
u32 map_index; // offset:16; size:4; signed:0;
int drops; // offset:20; size:4; signed:1;
int sent; // offset:24; size:4; signed:1;
int from_ifindex; // offset:28; size:4; signed:1;
int to_ifindex; // offset:32; size:4; signed:1;
int err; // offset:36; size:4; signed:1;
};
SEC("tracepoint/xdp/xdp_devmap_xmit")
int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
{
struct datarec *rec;
u32 key = 0;
rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
if (!rec)
return 0;
rec->processed += ctx->sent;
rec->dropped += ctx->drops;
/* Record bulk events, then userspace can calc average bulk size */
rec->info += 1;
/* Record error cases, where no frame were sent */
if (ctx->err)
rec->err++;
/* Catch API error of drv ndo_xdp_xmit sent more than count */
if (ctx->drops < 0)
rec->err++;
return 1;
}
...@@ -117,6 +117,7 @@ struct datarec { ...@@ -117,6 +117,7 @@ struct datarec {
__u64 processed; __u64 processed;
__u64 dropped; __u64 dropped;
__u64 info; __u64 info;
__u64 err;
}; };
#define MAX_CPUS 64 #define MAX_CPUS 64
...@@ -141,6 +142,7 @@ struct stats_record { ...@@ -141,6 +142,7 @@ struct stats_record {
struct record_u64 xdp_exception[XDP_ACTION_MAX]; struct record_u64 xdp_exception[XDP_ACTION_MAX];
struct record xdp_cpumap_kthread; struct record xdp_cpumap_kthread;
struct record xdp_cpumap_enqueue[MAX_CPUS]; struct record xdp_cpumap_enqueue[MAX_CPUS];
struct record xdp_devmap_xmit;
}; };
static bool map_collect_record(int fd, __u32 key, struct record *rec) static bool map_collect_record(int fd, __u32 key, struct record *rec)
...@@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec) ...@@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
__u64 sum_processed = 0; __u64 sum_processed = 0;
__u64 sum_dropped = 0; __u64 sum_dropped = 0;
__u64 sum_info = 0; __u64 sum_info = 0;
__u64 sum_err = 0;
int i; int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
...@@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec) ...@@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
sum_dropped += values[i].dropped; sum_dropped += values[i].dropped;
rec->cpu[i].info = values[i].info; rec->cpu[i].info = values[i].info;
sum_info += values[i].info; sum_info += values[i].info;
rec->cpu[i].err = values[i].err;
sum_err += values[i].err;
} }
rec->total.processed = sum_processed; rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped; rec->total.dropped = sum_dropped;
rec->total.info = sum_info; rec->total.info = sum_info;
rec->total.err = sum_err;
return true; return true;
} }
...@@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period) ...@@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period)
return pps; return pps;
} }
static double calc_err(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->err - p->err;
pps = packets / period;
}
return pps;
}
static void stats_print(struct stats_record *stats_rec, static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev, struct stats_record *stats_prev,
bool err_only) bool err_only)
...@@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec, ...@@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec,
info = calc_info(r, p, t); info = calc_info(r, p, t);
if (info > 0) if (info > 0)
i_str = "sched"; i_str = "sched";
if (pps > 0) if (pps > 0 || drop > 0)
printf(fmt1, "cpumap-kthread", printf(fmt1, "cpumap-kthread",
i, pps, drop, info, i_str); i, pps, drop, info, i_str);
} }
...@@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec, ...@@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec,
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str); printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
} }
/* devmap ndo_xdp_xmit stats */
{
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
struct record *rec, *prev;
double drop, info, err;
char *i_str = "";
char *err_str = "";
rec = &stats_rec->xdp_devmap_xmit;
prev = &stats_prev->xdp_devmap_xmit;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
err = calc_err(r, p, t);
if (info > 0) {
i_str = "bulk-average";
info = (pps+drop) / info; /* calc avg bulk */
}
if (err > 0)
err_str = "drv-err";
if (pps > 0 || drop > 0)
printf(fmt1, "devmap-xmit",
i, pps, drop, info, i_str, err_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
err = calc_err(&rec->total, &prev->total, t);
if (info > 0) {
i_str = "bulk-average";
info = (pps+drop) / info; /* calc avg bulk */
}
if (err > 0)
err_str = "drv-err";
printf(fmt2, "devmap-xmit", "total", pps, drop,
info, i_str, err_str);
}
printf("\n"); printf("\n");
} }
...@@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec) ...@@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec)
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */ fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread); map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
return true; return true;
} }
...@@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void) ...@@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void)
rec_sz = sizeof(struct datarec); rec_sz = sizeof(struct datarec);
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz); rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < MAX_CPUS; i++) for (i = 0; i < MAX_CPUS; i++)
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz); rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
...@@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r) ...@@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r)
free(r->xdp_exception[i].cpu); free(r->xdp_exception[i].cpu);
free(r->xdp_cpumap_kthread.cpu); free(r->xdp_cpumap_kthread.cpu);
free(r->xdp_devmap_xmit.cpu);
for (i = 0; i < MAX_CPUS; i++) for (i = 0; i < MAX_CPUS; i++)
free(r->xdp_cpumap_enqueue[i].cpu); free(r->xdp_cpumap_enqueue[i].cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment