Commit ea9916ea authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'ndo_xdp_xmit-cleanup'

Jesper Dangaard Brouer says:

====================
As I mentioned in merge commit 10f67868 ("Merge branch 'xdp_xmit-bulking'")
I plan to change the API for ndo_xdp_xmit once more, by adding a flags
argument, which is done in this patchset.

I know it is late in the cycle (currently at rc7), but it would be
nice to avoid changing NDOs over several kernel releases, as it is
annoying to vendors and distro backporters, but it is not strictly
UAPI so it is allowed (according to Alexei).

The end-goal is getting rid of the ndo_xdp_flush operation, as it will
make it possible for drivers to implement a TXQ synchronization mechanism
that is not necessarily derived from the CPU id (smp_processor_id).

This patchset removes all callers of the ndo_xdp_flush operation, but
it doesn't take the last step of removing it from all drivers.  This
can be done later, or I can update the patchset on request.

Micro-benchmarks only show a very small performance improvement, for
map-redirect around ~2 ns, and for non-map redirect ~7 ns.  I've not
benchmarked this with CONFIG_RETPOLINE, but the performance benefit
should be more visible given we end-up removing an indirect call.

---
V2: Updated based on feedback from Song Liu <songliubraving@fb.com>
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 69b45078 c1ece6b2
...@@ -3670,11 +3670,13 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev) ...@@ -3670,11 +3670,13 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* For error cases, a negative errno code is returned and no-frames * For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames). * are transmitted (caller must handle freeing frames).
**/ **/
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags)
{ {
struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id(); unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi; struct i40e_vsi *vsi = np->vsi;
struct i40e_ring *xdp_ring;
int drops = 0; int drops = 0;
int i; int i;
...@@ -3684,17 +3686,25 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) ...@@ -3684,17 +3686,25 @@ int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs) if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO; return -ENXIO;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
xdp_ring = vsi->xdp_rings[queue_index];
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i]; struct xdp_frame *xdpf = frames[i];
int err; int err;
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]); err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
if (err != I40E_XDP_TX) { if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf); xdp_return_frame_rx_napi(xdpf);
drops++; drops++;
} }
} }
if (unlikely(flags & XDP_XMIT_FLUSH))
i40e_xdp_ring_update_tail(xdp_ring);
return n - drops; return n - drops;
} }
......
...@@ -487,7 +487,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); ...@@ -487,7 +487,8 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi); void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb); bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames); int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
void i40e_xdp_flush(struct net_device *dev); void i40e_xdp_flush(struct net_device *dev);
/** /**
......
...@@ -10022,8 +10022,17 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp) ...@@ -10022,8 +10022,17 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
} }
} }
static void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
{
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
}
static int ixgbe_xdp_xmit(struct net_device *dev, int n, static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames) struct xdp_frame **frames, u32 flags)
{ {
struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring; struct ixgbe_ring *ring;
...@@ -10033,6 +10042,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, ...@@ -10033,6 +10042,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN; return -ENETDOWN;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
/* During program transitions its possible adapter->xdp_prog is assigned /* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit. * but ring has not been configured yet. In this case simply abort xmit.
*/ */
...@@ -10051,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, ...@@ -10051,6 +10063,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
} }
} }
if (unlikely(flags & XDP_XMIT_FLUSH))
ixgbe_xdp_ring_update_tail(ring);
return n - drops; return n - drops;
} }
...@@ -10069,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev) ...@@ -10069,11 +10084,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
if (unlikely(!ring)) if (unlikely(!ring))
return; return;
/* Force memory writes to complete before letting h/w know there ixgbe_xdp_ring_update_tail(ring);
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
return; return;
} }
......
...@@ -1285,7 +1285,16 @@ static const struct net_device_ops tun_netdev_ops = { ...@@ -1285,7 +1285,16 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64, .ndo_get_stats64 = tun_net_get_stats64,
}; };
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames) static void __tun_xdp_flush_tfile(struct tun_file *tfile)
{
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
}
static int tun_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags)
{ {
struct tun_struct *tun = netdev_priv(dev); struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile; struct tun_file *tfile;
...@@ -1294,6 +1303,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames ...@@ -1294,6 +1303,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
int cnt = n; int cnt = n;
int i; int i;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
rcu_read_lock(); rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues); numqueues = READ_ONCE(tun->numqueues);
...@@ -1321,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames ...@@ -1321,6 +1333,9 @@ static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames
} }
spin_unlock(&tfile->tx_ring.producer_lock); spin_unlock(&tfile->tx_ring.producer_lock);
if (flags & XDP_XMIT_FLUSH)
__tun_xdp_flush_tfile(tfile);
rcu_read_unlock(); rcu_read_unlock();
return cnt - drops; return cnt - drops;
} }
...@@ -1332,7 +1347,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) ...@@ -1332,7 +1347,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame)) if (unlikely(!frame))
return -EOVERFLOW; return -EOVERFLOW;
return tun_xdp_xmit(dev, 1, &frame); return tun_xdp_xmit(dev, 1, &frame, 0);
} }
static void tun_xdp_flush(struct net_device *dev) static void tun_xdp_flush(struct net_device *dev)
...@@ -1349,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev) ...@@ -1349,11 +1364,7 @@ static void tun_xdp_flush(struct net_device *dev)
tfile = rcu_dereference(tun->tfiles[smp_processor_id() % tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]); numqueues]);
/* Notify and wake up reader process */ __tun_xdp_flush_tfile(tfile);
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
out: out:
rcu_read_unlock(); rcu_read_unlock();
} }
......
...@@ -468,7 +468,7 @@ static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi, ...@@ -468,7 +468,7 @@ static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
} }
static int virtnet_xdp_xmit(struct net_device *dev, static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames) int n, struct xdp_frame **frames, u32 flags)
{ {
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq; struct receive_queue *rq = vi->rq;
...@@ -481,6 +481,9 @@ static int virtnet_xdp_xmit(struct net_device *dev, ...@@ -481,6 +481,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
int err; int err;
int i; int i;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id(); qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp]; sq = &vi->sq[qp];
...@@ -504,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev, ...@@ -504,6 +507,10 @@ static int virtnet_xdp_xmit(struct net_device *dev,
drops++; drops++;
} }
} }
if (flags & XDP_XMIT_FLUSH)
virtqueue_kick(sq->vq);
return n - drops; return n - drops;
} }
......
...@@ -1185,13 +1185,13 @@ struct dev_ifalias { ...@@ -1185,13 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the * This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of * netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details. * enum bpf_netdev_command for details.
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp); * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp,
* u32 flags);
* This function is used to submit @n XDP packets for transmit on a * This function is used to submit @n XDP packets for transmit on a
* netdevice. Returns number of frames successfully transmitted, frames * netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame(). * that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning * Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames. * no frames were xmit'ed and core-caller will free all frames.
* TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev); * void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular * This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit. * xdp tx queue. Must be called on same CPU as xdp_xmit.
...@@ -1380,7 +1380,8 @@ struct net_device_ops { ...@@ -1380,7 +1380,8 @@ struct net_device_ops {
int (*ndo_bpf)(struct net_device *dev, int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf); struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev, int n, int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp); struct xdp_frame **xdp,
u32 flags);
void (*ndo_xdp_flush)(struct net_device *dev); void (*ndo_xdp_flush)(struct net_device *dev);
}; };
......
...@@ -40,6 +40,10 @@ enum xdp_mem_type { ...@@ -40,6 +40,10 @@ enum xdp_mem_type {
MEM_TYPE_MAX, MEM_TYPE_MAX,
}; };
/* XDP flags for ndo_xdp_xmit */
#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */
#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH
struct xdp_mem_info { struct xdp_mem_info {
u32 type; /* enum xdp_mem_type, but known size type */ u32 type; /* enum xdp_mem_type, but known size type */
u32 id; u32 id;
......
...@@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit) ...@@ -217,7 +217,7 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
} }
static int bq_xmit_all(struct bpf_dtab_netdev *obj, static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq) struct xdp_bulk_queue *bq, u32 flags)
{ {
struct net_device *dev = obj->dev; struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0; int sent = 0, drops = 0, err = 0;
...@@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj, ...@@ -232,7 +232,7 @@ static int bq_xmit_all(struct bpf_dtab_netdev *obj,
prefetch(xdpf); prefetch(xdpf);
} }
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q); sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
if (sent < 0) { if (sent < 0) {
err = sent; err = sent;
sent = 0; sent = 0;
...@@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -276,7 +276,6 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) { for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq; struct xdp_bulk_queue *bq;
struct net_device *netdev;
/* This is possible if the dev entry is removed by user space /* This is possible if the dev entry is removed by user space
* between xdp redirect and flush op. * between xdp redirect and flush op.
...@@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map) ...@@ -287,10 +286,7 @@ void __dev_map_flush(struct bpf_map *map)
__clear_bit(bit, bitmap); __clear_bit(bit, bitmap);
bq = this_cpu_ptr(dev->bulkq); bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq); bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev);
} }
} }
...@@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, ...@@ -320,7 +316,7 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq); bq_xmit_all(obj, bq, 0);
/* Ingress dev_rx will be the same for all xdp_frame's in /* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed * bulk_queue, because bq stored per-CPU and must be flushed
...@@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -359,8 +355,7 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
static void dev_map_flush_old(struct bpf_dtab_netdev *dev) static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{ {
if (dev->dev->netdev_ops->ndo_xdp_flush) { if (dev->dev->netdev_ops->ndo_xdp_xmit) {
struct net_device *fl = dev->dev;
struct xdp_bulk_queue *bq; struct xdp_bulk_queue *bq;
unsigned long *bitmap; unsigned long *bitmap;
...@@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) ...@@ -371,9 +366,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
__clear_bit(dev->bit, bitmap); __clear_bit(dev->bit, bitmap);
bq = per_cpu_ptr(dev->bulkq, cpu); bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq); bq_xmit_all(dev, bq, XDP_XMIT_FLUSH);
fl->netdev_ops->ndo_xdp_flush(dev->dev);
} }
} }
} }
......
...@@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev, ...@@ -3056,10 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
if (unlikely(!xdpf)) if (unlikely(!xdpf))
return -EOVERFLOW; return -EOVERFLOW;
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf); sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf, XDP_XMIT_FLUSH);
if (sent <= 0) if (sent <= 0)
return sent; return sent;
dev->netdev_ops->ndo_xdp_flush(dev);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment