Commit 92470808 authored by John Fastabend's avatar John Fastabend Committed by Jeff Kirsher

ixgbe: add XDP support for pass and drop actions

Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
programs instead of RCU primitives as suggested by Daniel Borkmann and
Alex Duyck.

v2: fix the build issues seen w/ XDP when page sizes are larger than 4K
    and made minor fixes based on feedback from Jakub Kicinski
Signed-off-by: default avatarJohn Fastabend <john.r.fastabend@intel.com>
Acked-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 6133406b
...@@ -318,6 +318,7 @@ struct ixgbe_ring { ...@@ -318,6 +318,7 @@ struct ixgbe_ring {
struct ixgbe_ring *next; /* pointer to next ring in q_vector */ struct ixgbe_ring *next; /* pointer to next ring in q_vector */
struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */ struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
struct net_device *netdev; /* netdev ring belongs to */ struct net_device *netdev; /* netdev ring belongs to */
struct bpf_prog *xdp_prog;
struct device *dev; /* device for DMA mapping */ struct device *dev; /* device for DMA mapping */
struct ixgbe_fwd_adapter *l2_accel_priv; struct ixgbe_fwd_adapter *l2_accel_priv;
void *desc; /* descriptor ring memory */ void *desc; /* descriptor ring memory */
...@@ -555,6 +556,7 @@ struct ixgbe_adapter { ...@@ -555,6 +556,7 @@ struct ixgbe_adapter {
unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
/* OS defined structs */ /* OS defined structs */
struct net_device *netdev; struct net_device *netdev;
struct bpf_prog *xdp_prog;
struct pci_dev *pdev; struct pci_dev *pdev;
unsigned long state; unsigned long state;
...@@ -835,7 +837,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter); ...@@ -835,7 +837,7 @@ void ixgbe_down(struct ixgbe_adapter *adapter);
void ixgbe_reinit_locked(struct ixgbe_adapter *adapter); void ixgbe_reinit_locked(struct ixgbe_adapter *adapter);
void ixgbe_reset(struct ixgbe_adapter *adapter); void ixgbe_reset(struct ixgbe_adapter *adapter);
void ixgbe_set_ethtool_ops(struct net_device *netdev); void ixgbe_set_ethtool_ops(struct net_device *netdev);
int ixgbe_setup_rx_resources(struct ixgbe_ring *); int ixgbe_setup_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *);
int ixgbe_setup_tx_resources(struct ixgbe_ring *); int ixgbe_setup_tx_resources(struct ixgbe_ring *);
void ixgbe_free_rx_resources(struct ixgbe_ring *); void ixgbe_free_rx_resources(struct ixgbe_ring *);
void ixgbe_free_tx_resources(struct ixgbe_ring *); void ixgbe_free_tx_resources(struct ixgbe_ring *);
......
...@@ -1128,7 +1128,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev, ...@@ -1128,7 +1128,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
sizeof(struct ixgbe_ring)); sizeof(struct ixgbe_ring));
temp_ring[i].count = new_rx_count; temp_ring[i].count = new_rx_count;
err = ixgbe_setup_rx_resources(&temp_ring[i]); err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
if (err) { if (err) {
while (i) { while (i) {
i--; i--;
...@@ -1761,7 +1761,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter) ...@@ -1761,7 +1761,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter)
rx_ring->netdev = adapter->netdev; rx_ring->netdev = adapter->netdev;
rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx; rx_ring->reg_idx = adapter->rx_ring[0]->reg_idx;
err = ixgbe_setup_rx_resources(rx_ring); err = ixgbe_setup_rx_resources(adapter, rx_ring);
if (err) { if (err) {
ret_val = 4; ret_val = 4;
goto err_nomem; goto err_nomem;
......
...@@ -49,6 +49,9 @@ ...@@ -49,6 +49,9 @@
#include <linux/if_macvlan.h> #include <linux/if_macvlan.h>
#include <linux/if_bridge.h> #include <linux/if_bridge.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/atomic.h>
#include <scsi/fc/fc_fcoe.h> #include <scsi/fc/fc_fcoe.h>
#include <net/udp_tunnel.h> #include <net/udp_tunnel.h>
#include <net/pkt_cls.h> #include <net/pkt_cls.h>
...@@ -1855,6 +1858,10 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, ...@@ -1855,6 +1858,10 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
* @rx_desc: pointer to the EOP Rx descriptor * @rx_desc: pointer to the EOP Rx descriptor
* @skb: pointer to current skb being fixed * @skb: pointer to current skb being fixed
* *
* Check if the skb is valid in the XDP case it will be an error pointer.
* Return true in this case to abort processing and advance to next
* descriptor.
*
* Check for corrupted packet headers caused by senders on the local L2 * Check for corrupted packet headers caused by senders on the local L2
* embedded NIC switch not setting up their Tx Descriptors right. These * embedded NIC switch not setting up their Tx Descriptors right. These
* should be very rare. * should be very rare.
...@@ -1873,6 +1880,10 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, ...@@ -1873,6 +1880,10 @@ static bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
{ {
struct net_device *netdev = rx_ring->netdev; struct net_device *netdev = rx_ring->netdev;
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
/* verify that the packet does not have any known errors */ /* verify that the packet does not have any known errors */
if (unlikely(ixgbe_test_staterr(rx_desc, if (unlikely(ixgbe_test_staterr(rx_desc,
IXGBE_RXDADV_ERR_FRAME_ERR_MASK) && IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
...@@ -2048,7 +2059,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, ...@@ -2048,7 +2059,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
/* hand second half of page back to the ring */ /* hand second half of page back to the ring */
ixgbe_reuse_rx_page(rx_ring, rx_buffer); ixgbe_reuse_rx_page(rx_ring, rx_buffer);
} else { } else {
if (IXGBE_CB(skb)->dma == rx_buffer->dma) { if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
/* the page has been released from the ring */ /* the page has been released from the ring */
IXGBE_CB(skb)->page_released = true; IXGBE_CB(skb)->page_released = true;
} else { } else {
...@@ -2069,21 +2080,22 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, ...@@ -2069,21 +2080,22 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
struct ixgbe_rx_buffer *rx_buffer, struct ixgbe_rx_buffer *rx_buffer,
union ixgbe_adv_rx_desc *rx_desc, struct xdp_buff *xdp,
unsigned int size) union ixgbe_adv_rx_desc *rx_desc)
{ {
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; unsigned int size = xdp->data_end - xdp->data;
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
#else #else
unsigned int truesize = SKB_DATA_ALIGN(size); unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif #endif
struct sk_buff *skb; struct sk_buff *skb;
/* prefetch first cache line of first page */ /* prefetch first cache line of first page */
prefetch(va); prefetch(xdp->data);
#if L1_CACHE_BYTES < 128 #if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES); prefetch(xdp->data + L1_CACHE_BYTES);
#endif #endif
/* allocate a skb to store the frags */ /* allocate a skb to store the frags */
...@@ -2096,7 +2108,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, ...@@ -2096,7 +2108,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
IXGBE_CB(skb)->dma = rx_buffer->dma; IXGBE_CB(skb)->dma = rx_buffer->dma;
skb_add_rx_frag(skb, 0, rx_buffer->page, skb_add_rx_frag(skb, 0, rx_buffer->page,
rx_buffer->page_offset, xdp->data - page_address(rx_buffer->page),
size, truesize); size, truesize);
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize; rx_buffer->page_offset ^= truesize;
...@@ -2104,7 +2116,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, ...@@ -2104,7 +2116,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
rx_buffer->page_offset += truesize; rx_buffer->page_offset += truesize;
#endif #endif
} else { } else {
memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); memcpy(__skb_put(skb, size),
xdp->data, ALIGN(size, sizeof(long)));
rx_buffer->pagecnt_bias++; rx_buffer->pagecnt_bias++;
} }
...@@ -2113,32 +2126,32 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring, ...@@ -2113,32 +2126,32 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
struct ixgbe_rx_buffer *rx_buffer, struct ixgbe_rx_buffer *rx_buffer,
union ixgbe_adv_rx_desc *rx_desc, struct xdp_buff *xdp,
unsigned int size) union ixgbe_adv_rx_desc *rx_desc)
{ {
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
#if (PAGE_SIZE < 8192) #if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2; unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
#else #else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
SKB_DATA_ALIGN(IXGBE_SKB_PAD + size); SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif #endif
struct sk_buff *skb; struct sk_buff *skb;
/* prefetch first cache line of first page */ /* prefetch first cache line of first page */
prefetch(va); prefetch(xdp->data);
#if L1_CACHE_BYTES < 128 #if L1_CACHE_BYTES < 128
prefetch(va + L1_CACHE_BYTES); prefetch(xdp->data + L1_CACHE_BYTES);
#endif #endif
/* build an skb around the page buffer */ /* build an skb to around the page buffer */
skb = build_skb(va - IXGBE_SKB_PAD, truesize); skb = build_skb(xdp->data_hard_start, truesize);
if (unlikely(!skb)) if (unlikely(!skb))
return NULL; return NULL;
/* update pointers within the skb to store the data */ /* update pointers within the skb to store the data */
skb_reserve(skb, IXGBE_SKB_PAD); skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, size); __skb_put(skb, xdp->data_end - xdp->data);
/* record DMA address if this is the start of a chain of buffers */ /* record DMA address if this is the start of a chain of buffers */
if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
...@@ -2154,6 +2167,41 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring, ...@@ -2154,6 +2167,41 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
return skb; return skb;
} }
#define IXGBE_XDP_PASS 0
#define IXGBE_XDP_CONSUMED 1
static struct sk_buff *ixgbe_run_xdp(struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp)
{
int result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
u32 act;
rcu_read_lock();
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (!xdp_prog)
goto xdp_out;
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_TX:
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
/* fallthrough -- handle aborts by dropping packet */
case XDP_DROP:
result = IXGBE_XDP_CONSUMED;
break;
}
xdp_out:
rcu_read_unlock();
return ERR_PTR(-result);
}
/** /**
* ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @q_vector: structure containing interrupt and ring information * @q_vector: structure containing interrupt and ring information
...@@ -2183,6 +2231,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, ...@@ -2183,6 +2231,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
union ixgbe_adv_rx_desc *rx_desc; union ixgbe_adv_rx_desc *rx_desc;
struct ixgbe_rx_buffer *rx_buffer; struct ixgbe_rx_buffer *rx_buffer;
struct sk_buff *skb; struct sk_buff *skb;
struct xdp_buff xdp;
unsigned int size; unsigned int size;
/* return some buffers to hardware, one at a time is too slow */ /* return some buffers to hardware, one at a time is too slow */
...@@ -2205,14 +2254,29 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, ...@@ -2205,14 +2254,29 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size); rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
/* retrieve a buffer from the ring */ /* retrieve a buffer from the ring */
if (skb) if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
xdp.data_hard_start = xdp.data -
ixgbe_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
skb = ixgbe_run_xdp(rx_ring, &xdp);
}
if (IS_ERR(skb)) {
total_rx_packets++;
total_rx_bytes += size;
rx_buffer->pagecnt_bias++;
} else if (skb) {
ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size); ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring)) } else if (ring_uses_build_skb(rx_ring)) {
skb = ixgbe_build_skb(rx_ring, rx_buffer, skb = ixgbe_build_skb(rx_ring, rx_buffer,
rx_desc, size); &xdp, rx_desc);
else } else {
skb = ixgbe_construct_skb(rx_ring, rx_buffer, skb = ixgbe_construct_skb(rx_ring, rx_buffer,
rx_desc, size); &xdp, rx_desc);
}
/* exit if we failed to retrieve a buffer */ /* exit if we failed to retrieve a buffer */
if (!skb) { if (!skb) {
...@@ -6073,7 +6137,8 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) ...@@ -6073,7 +6137,8 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
* *
* Returns 0 on success, negative on failure * Returns 0 on success, negative on failure
**/ **/
int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring)
{ {
struct device *dev = rx_ring->dev; struct device *dev = rx_ring->dev;
int orig_node = dev_to_node(dev); int orig_node = dev_to_node(dev);
...@@ -6112,6 +6177,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) ...@@ -6112,6 +6177,8 @@ int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring)
rx_ring->next_to_clean = 0; rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0; rx_ring->next_to_use = 0;
rx_ring->xdp_prog = adapter->xdp_prog;
return 0; return 0;
err: err:
vfree(rx_ring->rx_buffer_info); vfree(rx_ring->rx_buffer_info);
...@@ -6135,7 +6202,7 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter) ...@@ -6135,7 +6202,7 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
int i, err = 0; int i, err = 0;
for (i = 0; i < adapter->num_rx_queues; i++) { for (i = 0; i < adapter->num_rx_queues; i++) {
err = ixgbe_setup_rx_resources(adapter->rx_ring[i]); err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]);
if (!err) if (!err)
continue; continue;
...@@ -6203,6 +6270,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) ...@@ -6203,6 +6270,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
{ {
ixgbe_clean_rx_ring(rx_ring); ixgbe_clean_rx_ring(rx_ring);
rx_ring->xdp_prog = NULL;
vfree(rx_ring->rx_buffer_info); vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL; rx_ring->rx_buffer_info = NULL;
...@@ -9468,6 +9536,54 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev, ...@@ -9468,6 +9536,54 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
return features; return features;
} }
static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
{
int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct bpf_prog *old_prog;
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
return -EINVAL;
if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
return -EINVAL;
/* verify ixgbe ring attributes are sufficient for XDP */
for (i = 0; i < adapter->num_rx_queues; i++) {
struct ixgbe_ring *ring = adapter->rx_ring[i];
if (ring_is_rsc_enabled(ring))
return -EINVAL;
if (frame_size > ixgbe_rx_bufsz(ring))
return -EINVAL;
}
old_prog = xchg(&adapter->xdp_prog, prog);
for (i = 0; i < adapter->num_rx_queues; i++)
xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
if (old_prog)
bpf_prog_put(old_prog);
return 0;
}
static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
switch (xdp->command) {
case XDP_SETUP_PROG:
return ixgbe_xdp_setup(dev, xdp->prog);
case XDP_QUERY_PROG:
xdp->prog_attached = !!(adapter->xdp_prog);
return 0;
default:
return -EINVAL;
}
}
static const struct net_device_ops ixgbe_netdev_ops = { static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_open = ixgbe_open, .ndo_open = ixgbe_open,
.ndo_stop = ixgbe_close, .ndo_stop = ixgbe_close,
...@@ -9513,6 +9629,7 @@ static const struct net_device_ops ixgbe_netdev_ops = { ...@@ -9513,6 +9629,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port,
.ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port,
.ndo_features_check = ixgbe_features_check, .ndo_features_check = ixgbe_features_check,
.ndo_xdp = ixgbe_xdp,
}; };
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment