Commit b39212d5 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
i40e: support XDP multi-buffer

Tirthendu Sarkar says:

This patchset adds multi-buffer support for XDP. Tx side already has
support for multi-buffer. This patchset focuses on Rx side. The last
patch contains actual multi-buffer changes while the previous ones are
preparatory patches.

On receiving the first buffer of a packet, xdp_buff is built and its
subsequent buffers are added to it as frags. While 'next_to_clean' keeps
pointing to the first descriptor, the newly introduced 'next_to_process'
keeps track of every descriptor for the packet.

On receiving EOP buffer the XDP program is called and appropriate action
is taken (building skb for XDP_PASS, reusing page for XDP_DROP, adjusting
page offsets for XDP_{REDIRECT,TX}).

The patchset also streamlines page offset adjustments for buffer reuse
to make it easier to post process the rx_buffers after running XDP prog.

With this patchset there does not seem to be any performance degradation
for XDP_PASS and some improvement (~1% for XDP_TX, ~5% for XDP_DROP) when
measured using xdp_rxq_info program from samples/bpf/ for 64B packets.

v1: https://lore.kernel.org/netdev/20230306210822.3381942-1-anthony.l.nguyen@intel.com/

* '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  i40e: add support for XDP multi-buffer Rx
  i40e: add xdp_buff to i40e_ring struct
  i40e: introduce next_to_process to i40e_ring
  i40e: use frame_sz instead of recalculating truesize for building skb
  i40e: Change size to truesize when using i40e_rx_buffer_flip()
  i40e: add pre-xdp page_count in rx_buffer
  i40e: change Rx buffer size for legacy-rx to support XDP multi-buffer
  i40e: consolidate maximum frame size calculation for vsi
====================

Link: https://lore.kernel.org/r/20230309212819.1198218-1-anthony.l.nguyen@intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c66b2111 e213ced1
......@@ -5402,6 +5402,13 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
return -EOPNOTSUPP;
}
if ((changed_flags & I40E_FLAG_LEGACY_RX) &&
I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_warn(&pf->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
return -EOPNOTSUPP;
}
if ((changed_flags & new_flags &
I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) &&
(new_flags & I40E_FLAG_MFP_ENABLED))
......
......@@ -2896,15 +2896,35 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
}
/**
* i40e_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* i40e_calculate_vsi_rx_buf_len - Calculates buffer length
*
* @vsi: VSI to calculate rx_buf_len from
*/
static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
{
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
}
/**
* i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI
* @vsi: the vsi
* @xdp_prog: XDP program
**/
static int i40e_max_xdp_frame_size(struct i40e_vsi *vsi)
static int i40e_max_vsi_frame_size(struct i40e_vsi *vsi,
struct bpf_prog *xdp_prog)
{
if (PAGE_SIZE >= 8192 || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return I40E_RXBUFFER_2048;
u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
u16 chain_len;
if (xdp_prog && !xdp_prog->aux->xdp_has_frags)
chain_len = 1;
else
return I40E_RXBUFFER_3072;
chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
return min_t(u16, rx_buf_len * chain_len, I40E_MAX_RXBUFFER);
}
/**
......@@ -2919,12 +2939,13 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
int frame_size;
if (i40e_enabled_xdp_vsi(vsi)) {
int frame_size = new_mtu + I40E_PACKET_HDR_PAD;
if (frame_size > i40e_max_xdp_frame_size(vsi))
return -EINVAL;
frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) {
netdev_err(netdev, "Error changing mtu to %d, Max is %d\n",
new_mtu, frame_size - I40E_PACKET_HDR_PAD);
return -EINVAL;
}
netdev_dbg(netdev, "changing MTU from %d to %d\n",
......@@ -3595,6 +3616,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
}
xdp_init_buff(&ring->xdp, i40e_rx_pg_size(ring) / 2, &ring->xdp_rxq);
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
......@@ -3640,10 +3663,16 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
}
/* configure Rx buffer alignment */
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) {
if (I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_info(&vsi->back->pdev->dev,
"2k Rx buffer is too small to fit standard MTU and skb_shared_info\n");
return -EOPNOTSUPP;
}
clear_ring_build_skb_enabled(ring);
else
} else {
set_ring_build_skb_enabled(ring);
}
ring->rx_offset = i40e_rx_offset(ring);
......@@ -3693,24 +3722,6 @@ static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
return err;
}
/**
* i40e_calculate_vsi_rx_buf_len - Calculates buffer length
*
* @vsi: VSI to calculate rx_buf_len from
*/
static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
{
if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX))
return I40E_RXBUFFER_2048;
#if (PAGE_SIZE < 8192)
if (!I40E_2K_TOO_SMALL_WITH_PADDING && vsi->netdev->mtu <= ETH_DATA_LEN)
return I40E_RXBUFFER_1536 - NET_IP_ALIGN;
#endif
return PAGE_SIZE < 8192 ? I40E_RXBUFFER_3072 : I40E_RXBUFFER_2048;
}
/**
* i40e_vsi_configure_rx - Configure the VSI for Rx
* @vsi: the VSI being configured
......@@ -3722,13 +3733,15 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
int err = 0;
u16 i;
vsi->max_frame = I40E_MAX_RXBUFFER;
vsi->max_frame = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog);
vsi->rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
#if (PAGE_SIZE < 8192)
if (vsi->netdev && !I40E_2K_TOO_SMALL_WITH_PADDING &&
vsi->netdev->mtu <= ETH_DATA_LEN)
vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
vsi->netdev->mtu <= ETH_DATA_LEN) {
vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN;
vsi->max_frame = vsi->rx_buf_len;
}
#endif
/* set up individual rings */
......@@ -13316,15 +13329,15 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
int frame_size = vsi->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
int frame_size = i40e_max_vsi_frame_size(vsi, prog);
struct i40e_pf *pf = vsi->back;
struct bpf_prog *old_prog;
bool need_reset;
int i;
/* Don't allow frames that span over multiple buffers */
if (frame_size > i40e_calculate_vsi_rx_buf_len(vsi)) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags");
return -EINVAL;
}
......@@ -13810,7 +13823,8 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY;
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_RX_SG;
} else {
/* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we
* are still limited by IFNAMSIZ, but we're adding 'v%d\0' to
......
......@@ -162,45 +162,45 @@ DECLARE_EVENT_CLASS(
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
struct xdp_buff *xdp),
TP_ARGS(ring, desc, skb),
TP_ARGS(ring, desc, xdp),
TP_STRUCT__entry(
__field(void*, ring)
__field(void*, desc)
__field(void*, skb)
__field(void*, xdp)
__string(devname, ring->netdev->name)
),
TP_fast_assign(
__entry->ring = ring;
__entry->desc = desc;
__entry->skb = skb;
__entry->xdp = xdp;
__assign_str(devname, ring->netdev->name);
),
TP_printk(
"netdev: %s ring: %p desc: %p skb %p",
"netdev: %s ring: %p desc: %p xdp %p",
__get_str(devname), __entry->ring,
__entry->desc, __entry->skb)
__entry->desc, __entry->xdp)
);
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq,
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
struct xdp_buff *xdp),
TP_ARGS(ring, desc, skb));
TP_ARGS(ring, desc, xdp));
DEFINE_EVENT(
i40e_rx_template, i40e_clean_rx_irq_rx,
TP_PROTO(struct i40e_ring *ring,
union i40e_16byte_rx_desc *desc,
struct sk_buff *skb),
struct xdp_buff *xdp),
TP_ARGS(ring, desc, skb));
TP_ARGS(ring, desc, xdp));
DECLARE_EVENT_CLASS(
i40e_xmit_template,
......
This diff is collapsed.
......@@ -277,6 +277,7 @@ struct i40e_rx_buffer {
struct page *page;
__u32 page_offset;
__u16 pagecnt_bias;
__u32 page_count;
};
struct i40e_queue_stats {
......@@ -336,6 +337,17 @@ struct i40e_ring {
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;
/* Storing xdp_buff on ring helps in saving the state of partially built
* packet when i40e_clean_rx_ring_irq() must return before it sees EOP
* and to resume packet building for this ring in the next call to
* i40e_clean_rx_ring_irq().
*/
struct xdp_buff xdp;
/* Next descriptor to be processed; next_to_clean is updated only on
* processing EOP descriptor
*/
u16 next_to_process;
/* high bit set means dynamic, use accessor routines to read/write.
* hardware only supports 2us resolution for the ITR registers.
* these values always store the USER setting, and must be converted
......@@ -380,14 +392,6 @@ struct i40e_ring {
struct rcu_head rcu; /* to avoid race on free */
u16 next_to_alloc;
struct sk_buff *skb; /* When i40e_clean_rx_ring_irq() must
* return before it sees the EOP for
* the current packet, we save that skb
* here and resume receiving this
* packet the next time
* i40e_clean_rx_ring_irq() is called
* for this ring.
*/
struct i40e_channel *ch;
u16 rx_offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment