Commit 69cf8730 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
idpf: XDP chapter I: convert Rx to libeth

Alexander Lobakin says:

XDP for idpf is currently 5 chapters:
* convert Rx to libeth (this);
* convert Tx and stats to libeth;
* generic XDP and XSk code changes, libeth_xdp;
* actual XDP for idpf via libeth_xdp;
* XSk for idpf (^).

Part I does the following:
* splits &idpf_queue into 4 (RQ, SQ, FQ, CQ) and puts them on a diet;
* ensures optimal cacheline placement, strictly asserts CL sizes;
* moves currently unused/dead singleq mode out of line;
* reuses libeth's Rx ptype definitions and helpers;
* uses libeth's Rx buffer management for both header and payload;
* eliminates memcpy()s and coherent DMA uses on hotpath, uses
  napi_build_skb() instead of in-place short skb allocation.

Most idpf patches, except for the queue split, removes more lines
than adds.

Expect far better memory utilization and +5-8% on Rx depending on
the case (+17% on skb XDP_DROP :>).

* '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
  idpf: use libeth Rx buffer management for payload buffer
  idpf: convert header split mode to libeth + napi_build_skb()
  libeth: support different types of buffers for Rx
  idpf: remove legacy Page Pool Ethtool stats
  idpf: reuse libeth's definitions of parsed ptype structures
  idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ
  idpf: merge singleq and splitq &net_device_ops
  idpf: strictly assert cachelines of queue and queue vector structures
  idpf: avoid bloating &idpf_q_vector with big %NR_CPUS
  idpf: split &idpf_queue into 4 strictly-typed queue structures
  idpf: stop using macros for accessing queue descriptors
  libeth: add cacheline / struct layout assertion helpers
  page_pool: use __cacheline_group_{begin, end}_aligned()
  cache: add __cacheline_group_{begin, end}_aligned() (+ couple more)
====================

Link: https://patch.msgid.link/20240710203031.188081-1-anthony.l.nguyen@intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 26f45317 74d1412a
......@@ -384,17 +384,6 @@ config IGC_LEDS
Optional support for controlling the NIC LED's with the netdev
LED trigger.
config IDPF
tristate "Intel(R) Infrastructure Data Path Function Support"
depends on PCI_MSI
select DIMLIB
select PAGE_POOL
select PAGE_POOL_STATS
help
This driver supports Intel(R) Infrastructure Data Path Function
devices.
To compile this driver as a module, choose M here. The module
will be called idpf.
source "drivers/net/ethernet/intel/idpf/Kconfig"
endif # NET_VENDOR_INTEL
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (C) 2024 Intel Corporation
config IDPF
tristate "Intel(R) Infrastructure Data Path Function Support"
depends on PCI_MSI
select DIMLIB
select LIBETH
help
This driver supports Intel(R) Infrastructure Data Path Function
devices.
To compile this driver as a module, choose M here. The module
will be called idpf.
if IDPF
config IDPF_SINGLEQ
bool "idpf singleq support"
help
This option enables support for legacy single Rx/Tx queues w/no
completion and fill queues. Only enable if you have hardware which
wants to work in this mode as it increases the driver size and adds
runtme checks on hotpath.
endif # IDPF
......@@ -12,7 +12,8 @@ idpf-y := \
idpf_ethtool.o \
idpf_lib.o \
idpf_main.o \
idpf_singleq_txrx.o \
idpf_txrx.o \
idpf_virtchnl.o \
idpf_vf_dev.o
idpf-$(CONFIG_IDPF_SINGLEQ) += idpf_singleq_txrx.o
......@@ -17,10 +17,8 @@ struct idpf_vport_max_q;
#include <linux/sctp.h>
#include <linux/ethtool_netlink.h>
#include <net/gro.h>
#include <linux/dim.h>
#include "virtchnl2.h"
#include "idpf_lan_txrx.h"
#include "idpf_txrx.h"
#include "idpf_controlq.h"
......@@ -266,7 +264,6 @@ struct idpf_port_stats {
* the worst case.
* @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping
* @bufq_desc_count: Buffer queue descriptor count
* @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc)
* @num_rxq_grp: Number of RX queues in a group
* @rxq_grps: Total number of RX groups. Number of groups * number of RX per
* group will yield total number of RX queues.
......@@ -302,7 +299,7 @@ struct idpf_vport {
u16 num_txq_grp;
struct idpf_txq_group *txq_grps;
u32 txq_model;
struct idpf_queue **txqs;
struct idpf_tx_queue **txqs;
bool crc_enable;
u16 num_rxq;
......@@ -310,11 +307,10 @@ struct idpf_vport {
u32 rxq_desc_count;
u8 num_bufqs_per_qgrp;
u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP];
u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP];
u16 num_rxq_grp;
struct idpf_rxq_group *rxq_grps;
u32 rxq_model;
struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE];
struct libeth_rx_pt *rx_ptype_lkup;
struct idpf_adapter *adapter;
struct net_device *netdev;
......@@ -601,7 +597,8 @@ struct idpf_adapter {
*/
static inline int idpf_is_queue_model_split(u16 q_model)
{
return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
return !IS_ENABLED(CONFIG_IDPF_SINGLEQ) ||
q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
}
#define idpf_is_cap_ena(adapter, field, flag) \
......
......@@ -4,6 +4,8 @@
#ifndef _IDPF_LAN_TXRX_H_
#define _IDPF_LAN_TXRX_H_
#include <linux/bits.h>
enum idpf_rss_hash {
IDPF_HASH_INVALID = 0,
/* Values 1 - 28 are reserved for future use */
......
......@@ -4,8 +4,7 @@
#include "idpf.h"
#include "idpf_virtchnl.h"
static const struct net_device_ops idpf_netdev_ops_splitq;
static const struct net_device_ops idpf_netdev_ops_singleq;
static const struct net_device_ops idpf_netdev_ops;
/**
* idpf_init_vector_stack - Fill the MSIX vector stack with vector index
......@@ -69,7 +68,7 @@ static void idpf_deinit_vector_stack(struct idpf_adapter *adapter)
static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter)
{
clear_bit(IDPF_MB_INTR_MODE, adapter->flags);
free_irq(adapter->msix_entries[0].vector, adapter);
kfree(free_irq(adapter->msix_entries[0].vector, adapter));
queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
}
......@@ -124,15 +123,14 @@ static void idpf_mb_irq_enable(struct idpf_adapter *adapter)
*/
static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter)
{
struct idpf_q_vector *mb_vector = &adapter->mb_vector;
int irq_num, mb_vidx = 0, err;
char *name;
irq_num = adapter->msix_entries[mb_vidx].vector;
mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d",
dev_driver_string(&adapter->pdev->dev),
"Mailbox", mb_vidx);
err = request_irq(irq_num, adapter->irq_mb_handler, 0,
mb_vector->name, adapter);
name = kasprintf(GFP_KERNEL, "%s-%s-%d",
dev_driver_string(&adapter->pdev->dev),
"Mailbox", mb_vidx);
err = request_irq(irq_num, adapter->irq_mb_handler, 0, name, adapter);
if (err) {
dev_err(&adapter->pdev->dev,
"IRQ request for mailbox failed, error: %d\n", err);
......@@ -765,10 +763,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
}
/* assign netdev_ops */
if (idpf_is_queue_model_split(vport->txq_model))
netdev->netdev_ops = &idpf_netdev_ops_splitq;
else
netdev->netdev_ops = &idpf_netdev_ops_singleq;
netdev->netdev_ops = &idpf_netdev_ops;
/* setup watchdog timeout value to be 5 second */
netdev->watchdog_timeo = 5 * HZ;
......@@ -946,6 +941,9 @@ static void idpf_decfg_netdev(struct idpf_vport *vport)
{
struct idpf_adapter *adapter = vport->adapter;
kfree(vport->rx_ptype_lkup);
vport->rx_ptype_lkup = NULL;
unregister_netdev(vport->netdev);
free_netdev(vport->netdev);
vport->netdev = NULL;
......@@ -1318,14 +1316,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
if (idpf_is_queue_model_split(vport->rxq_model)) {
for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
struct idpf_queue *q =
const struct idpf_buf_queue *q =
&grp->splitq.bufq_sets[j].bufq;
writel(q->next_to_alloc, q->tail);
}
} else {
for (j = 0; j < grp->singleq.num_rxq; j++) {
struct idpf_queue *q =
const struct idpf_rx_queue *q =
grp->singleq.rxqs[j];
writel(q->next_to_alloc, q->tail);
......@@ -1855,7 +1853,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
enum idpf_vport_state current_state = np->state;
struct idpf_adapter *adapter = vport->adapter;
struct idpf_vport *new_vport;
int err, i;
int err;
/* If the system is low on memory, we can end up in bad state if we
* free all the memory for queue resources and try to allocate them
......@@ -1929,46 +1927,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
*/
memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps));
/* Since idpf_vport_queues_alloc was called with new_port, the queue
* back pointers are currently pointing to the local new_vport. Reset
* the backpointers to the original vport here
*/
for (i = 0; i < vport->num_txq_grp; i++) {
struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
int j;
tx_qgrp->vport = vport;
for (j = 0; j < tx_qgrp->num_txq; j++)
tx_qgrp->txqs[j]->vport = vport;
if (idpf_is_queue_model_split(vport->txq_model))
tx_qgrp->complq->vport = vport;
}
for (i = 0; i < vport->num_rxq_grp; i++) {
struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
struct idpf_queue *q;
u16 num_rxq;
int j;
rx_qgrp->vport = vport;
for (j = 0; j < vport->num_bufqs_per_qgrp; j++)
rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport;
if (idpf_is_queue_model_split(vport->rxq_model))
num_rxq = rx_qgrp->splitq.num_rxq_sets;
else
num_rxq = rx_qgrp->singleq.num_rxq;
for (j = 0; j < num_rxq; j++) {
if (idpf_is_queue_model_split(vport->rxq_model))
q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
else
q = rx_qgrp->singleq.rxqs[j];
q->vport = vport;
}
}
if (reset_cause == IDPF_SR_Q_CHANGE)
idpf_vport_alloc_vec_indexes(vport);
......@@ -2393,24 +2351,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
mem->pa = 0;
}
static const struct net_device_ops idpf_netdev_ops_splitq = {
.ndo_open = idpf_open,
.ndo_stop = idpf_stop,
.ndo_start_xmit = idpf_tx_splitq_start,
.ndo_features_check = idpf_features_check,
.ndo_set_rx_mode = idpf_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = idpf_set_mac,
.ndo_change_mtu = idpf_change_mtu,
.ndo_get_stats64 = idpf_get_stats64,
.ndo_set_features = idpf_set_features,
.ndo_tx_timeout = idpf_tx_timeout,
};
static const struct net_device_ops idpf_netdev_ops_singleq = {
static const struct net_device_ops idpf_netdev_ops = {
.ndo_open = idpf_open,
.ndo_stop = idpf_stop,
.ndo_start_xmit = idpf_tx_singleq_start,
.ndo_start_xmit = idpf_tx_start,
.ndo_features_check = idpf_features_check,
.ndo_set_rx_mode = idpf_set_rx_mode,
.ndo_validate_addr = eth_validate_addr,
......
......@@ -8,6 +8,7 @@
#define DRV_SUMMARY "Intel(R) Infrastructure Data Path Function Linux Driver"
MODULE_DESCRIPTION(DRV_SUMMARY);
MODULE_IMPORT_NS(LIBETH);
MODULE_LICENSE("GPL");
/**
......
This diff is collapsed.
This diff is collapsed.
......@@ -6,7 +6,7 @@
/* Rx buffer management */
/**
* libeth_rx_hw_len - get the actual buffer size to be passed to HW
* libeth_rx_hw_len_mtu - get the actual buffer size to be passed to HW
* @pp: &page_pool_params of the netdev to calculate the size for
* @max_len: maximum buffer size for a single descriptor
*
......@@ -14,7 +14,7 @@
* MTU the @dev has, HW required alignment, minimum and maximum allowed values,
* and system's page size.
*/
static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len)
static u32 libeth_rx_hw_len_mtu(const struct page_pool_params *pp, u32 max_len)
{
u32 len;
......@@ -26,6 +26,118 @@ static u32 libeth_rx_hw_len(const struct page_pool_params *pp, u32 max_len)
return len;
}
/**
* libeth_rx_hw_len_truesize - get the short buffer size to be passed to HW
* @pp: &page_pool_params of the netdev to calculate the size for
* @max_len: maximum buffer size for a single descriptor
* @truesize: desired truesize for the buffers
*
* Return: HW-writeable length per one buffer to pass it to the HW ignoring the
* MTU and closest to the passed truesize. Can be used for "short" buffer
* queues to fragment pages more efficiently.
*/
static u32 libeth_rx_hw_len_truesize(const struct page_pool_params *pp,
u32 max_len, u32 truesize)
{
u32 min, len;
min = SKB_HEAD_ALIGN(pp->offset + LIBETH_RX_BUF_STRIDE);
truesize = clamp(roundup_pow_of_two(truesize), roundup_pow_of_two(min),
PAGE_SIZE << LIBETH_RX_PAGE_ORDER);
len = SKB_WITH_OVERHEAD(truesize - pp->offset);
len = ALIGN_DOWN(len, LIBETH_RX_BUF_STRIDE) ? : LIBETH_RX_BUF_STRIDE;
len = min3(len, ALIGN_DOWN(max_len ? : U32_MAX, LIBETH_RX_BUF_STRIDE),
pp->max_len);
return len;
}
/**
* libeth_rx_page_pool_params - calculate params with the stack overhead
* @fq: buffer queue to calculate the size for
* @pp: &page_pool_params of the netdev
*
* Set the PP params to will all needed stack overhead (headroom, tailroom) and
* both the HW buffer length and the truesize for all types of buffers. For
* "short" buffers, truesize never exceeds the "wanted" one; for the rest,
* it can be up to the page size.
*
* Return: true on success, false on invalid input params.
*/
static bool libeth_rx_page_pool_params(struct libeth_fq *fq,
struct page_pool_params *pp)
{
pp->offset = LIBETH_SKB_HEADROOM;
/* HW-writeable / syncable length per one page */
pp->max_len = LIBETH_RX_PAGE_LEN(pp->offset);
/* HW-writeable length per buffer */
switch (fq->type) {
case LIBETH_FQE_MTU:
fq->buf_len = libeth_rx_hw_len_mtu(pp, fq->buf_len);
break;
case LIBETH_FQE_SHORT:
fq->buf_len = libeth_rx_hw_len_truesize(pp, fq->buf_len,
fq->truesize);
break;
case LIBETH_FQE_HDR:
fq->buf_len = ALIGN(LIBETH_MAX_HEAD, LIBETH_RX_BUF_STRIDE);
break;
default:
return false;
}
/* Buffer size to allocate */
fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp->offset +
fq->buf_len));
return true;
}
/**
* libeth_rx_page_pool_params_zc - calculate params without the stack overhead
* @fq: buffer queue to calculate the size for
* @pp: &page_pool_params of the netdev
*
* Set the PP params to exclude the stack overhead and both the buffer length
* and the truesize, which are equal for the data buffers. Note that this
* requires separate header buffers to be always active and account the
* overhead.
* With the MTU == ``PAGE_SIZE``, this allows the kernel to enable the zerocopy
* mode.
*
* Return: true on success, false on invalid input params.
*/
static bool libeth_rx_page_pool_params_zc(struct libeth_fq *fq,
struct page_pool_params *pp)
{
u32 mtu, max;
pp->offset = 0;
pp->max_len = PAGE_SIZE << LIBETH_RX_PAGE_ORDER;
switch (fq->type) {
case LIBETH_FQE_MTU:
mtu = READ_ONCE(pp->netdev->mtu);
break;
case LIBETH_FQE_SHORT:
mtu = fq->truesize;
break;
default:
return false;
}
mtu = roundup_pow_of_two(mtu);
max = min(rounddown_pow_of_two(fq->buf_len ? : U32_MAX),
pp->max_len);
fq->buf_len = clamp(mtu, LIBETH_RX_BUF_STRIDE, max);
fq->truesize = fq->buf_len;
return true;
}
/**
* libeth_rx_fq_create - create a PP with the default libeth settings
* @fq: buffer queue struct to fill
......@@ -44,19 +156,17 @@ int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi)
.netdev = napi->dev,
.napi = napi,
.dma_dir = DMA_FROM_DEVICE,
.offset = LIBETH_SKB_HEADROOM,
};
struct libeth_fqe *fqes;
struct page_pool *pool;
bool ret;
/* HW-writeable / syncable length per one page */
pp.max_len = LIBETH_RX_PAGE_LEN(pp.offset);
/* HW-writeable length per buffer */
fq->buf_len = libeth_rx_hw_len(&pp, fq->buf_len);
/* Buffer size to allocate */
fq->truesize = roundup_pow_of_two(SKB_HEAD_ALIGN(pp.offset +
fq->buf_len));
if (!fq->hsplit)
ret = libeth_rx_page_pool_params(fq, &pp);
else
ret = libeth_rx_page_pool_params_zc(fq, &pp);
if (!ret)
return -EINVAL;
pool = page_pool_create(&pp);
if (IS_ERR(pool))
......
......@@ -13,6 +13,32 @@
#define SMP_CACHE_BYTES L1_CACHE_BYTES
#endif
/**
* SMP_CACHE_ALIGN - align a value to the L2 cacheline size
* @x: value to align
*
* On some architectures, L2 ("SMP") CL size is bigger than L1, and sometimes,
* this needs to be accounted.
*
* Return: aligned value.
*/
#ifndef SMP_CACHE_ALIGN
#define SMP_CACHE_ALIGN(x) ALIGN(x, SMP_CACHE_BYTES)
#endif
/*
* ``__aligned_largest`` aligns a field to the value most optimal for the
* target architecture to perform memory operations. Get the actual value
* to be able to use it anywhere else.
*/
#ifndef __LARGEST_ALIGN
#define __LARGEST_ALIGN sizeof(struct { long x; } __aligned_largest)
#endif
#ifndef LARGEST_ALIGN
#define LARGEST_ALIGN(x) ALIGN(x, __LARGEST_ALIGN)
#endif
/*
* __read_mostly is used to keep rarely changing variables out of frequently
* updated cachelines. Its use should be reserved for data that is used
......@@ -95,6 +121,39 @@
__u8 __cacheline_group_end__##GROUP[0]
#endif
/**
* __cacheline_group_begin_aligned - declare an aligned group start
* @GROUP: name of the group
* @...: optional group alignment
*
* The following block inside a struct:
*
* __cacheline_group_begin_aligned(grp);
* field a;
* field b;
* __cacheline_group_end_aligned(grp);
*
* will always be aligned to either the specified alignment or
* ``SMP_CACHE_BYTES``.
*/
#define __cacheline_group_begin_aligned(GROUP, ...) \
__cacheline_group_begin(GROUP) \
__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
/**
* __cacheline_group_end_aligned - declare an aligned group end
* @GROUP: name of the group
* @...: optional alignment (same as was in __cacheline_group_begin_aligned())
*
* Note that the end marker is aligned to sizeof(long) to allow more precise
* size assertion. It also declares a padding at the end to avoid next field
* falling into this cacheline.
*/
#define __cacheline_group_end_aligned(GROUP, ...) \
__cacheline_group_end(GROUP) __aligned(sizeof(long)); \
struct { } __cacheline_group_pad__##GROUP \
__aligned((__VA_ARGS__ + 0) ? : SMP_CACHE_BYTES)
#ifndef CACHELINE_ASSERT_GROUP_MEMBER
#define CACHELINE_ASSERT_GROUP_MEMBER(TYPE, GROUP, MEMBER) \
BUILD_BUG_ON(!(offsetof(TYPE, MEMBER) >= \
......
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (C) 2024 Intel Corporation */
#ifndef __LIBETH_CACHE_H
#define __LIBETH_CACHE_H
#include <linux/cache.h>
/**
* libeth_cacheline_group_assert - make sure cacheline group size is expected
* @type: type of the structure containing the group
* @grp: group name inside the struct
* @sz: expected group size
*/
#if defined(CONFIG_64BIT) && SMP_CACHE_BYTES == 64
#define libeth_cacheline_group_assert(type, grp, sz) \
static_assert(offsetof(type, __cacheline_group_end__##grp) - \
offsetofend(type, __cacheline_group_begin__##grp) == \
(sz))
#define __libeth_cacheline_struct_assert(type, sz) \
static_assert(sizeof(type) == (sz))
#else /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
#define libeth_cacheline_group_assert(type, grp, sz) \
static_assert(offsetof(type, __cacheline_group_end__##grp) - \
offsetofend(type, __cacheline_group_begin__##grp) <= \
(sz))
#define __libeth_cacheline_struct_assert(type, sz) \
static_assert(sizeof(type) <= (sz))
#endif /* !CONFIG_64BIT || SMP_CACHE_BYTES != 64 */
#define __libeth_cls1(sz1) SMP_CACHE_ALIGN(sz1)
#define __libeth_cls2(sz1, sz2) (SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2))
#define __libeth_cls3(sz1, sz2, sz3) \
(SMP_CACHE_ALIGN(sz1) + SMP_CACHE_ALIGN(sz2) + SMP_CACHE_ALIGN(sz3))
#define __libeth_cls(...) \
CONCATENATE(__libeth_cls, COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__)
/**
* libeth_cacheline_struct_assert - make sure CL-based struct size is expected
* @type: type of the struct
* @...: from 1 to 3 CL group sizes (read-mostly, read-write, cold)
*
* When a struct contains several CL groups, it's difficult to predict its size
* on different architectures. The macro instead takes sizes of all of the
* groups the structure contains and generates the final struct size.
*/
#define libeth_cacheline_struct_assert(type, ...) \
__libeth_cacheline_struct_assert(type, __libeth_cls(__VA_ARGS__)); \
static_assert(__alignof(type) >= SMP_CACHE_BYTES)
/**
* libeth_cacheline_set_assert - make sure CL-based struct layout is expected
* @type: type of the struct
* @ro: expected size of the read-mostly group
* @rw: expected size of the read-write group
* @c: expected size of the cold group
*
* Check that each group size is expected and then do final struct size check.
*/
#define libeth_cacheline_set_assert(type, ro, rw, c) \
libeth_cacheline_group_assert(type, read_mostly, ro); \
libeth_cacheline_group_assert(type, read_write, rw); \
libeth_cacheline_group_assert(type, cold, c); \
libeth_cacheline_struct_assert(type, ro, rw, c)
#endif /* __LIBETH_CACHE_H */
......@@ -17,6 +17,8 @@
#define LIBETH_MAX_HEADROOM LIBETH_SKB_HEADROOM
/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
#define LIBETH_RX_LL_LEN (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
/* Maximum supported L2-L4 header length */
#define LIBETH_MAX_HEAD roundup_pow_of_two(max(MAX_HEADER, 256))
/* Always use order-0 pages */
#define LIBETH_RX_PAGE_ORDER 0
......@@ -43,6 +45,18 @@ struct libeth_fqe {
u32 truesize;
} __aligned_largest;
/**
* enum libeth_fqe_type - enum representing types of Rx buffers
* @LIBETH_FQE_MTU: buffer size is determined by MTU
* @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
* @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
*/
enum libeth_fqe_type {
LIBETH_FQE_MTU = 0U,
LIBETH_FQE_SHORT,
LIBETH_FQE_HDR,
};
/**
* struct libeth_fq - structure representing a buffer (fill) queue
* @fp: hotpath part of the structure
......@@ -50,6 +64,8 @@ struct libeth_fqe {
* @fqes: array of Rx buffers
* @truesize: size to allocate per buffer, w/overhead
* @count: number of descriptors/buffers the queue has
* @type: type of the buffers this queue has
* @hsplit: flag whether header split is enabled
* @buf_len: HW-writeable length per each buffer
* @nid: ID of the closest NUMA node with memory
*/
......@@ -63,6 +79,9 @@ struct libeth_fq {
);
/* Cold fields */
enum libeth_fqe_type type:2;
bool hsplit:1;
u32 buf_len;
int nid;
};
......
......@@ -129,6 +129,16 @@ struct page_pool_stats {
};
#endif
/* The whole frag API block must stay within one cacheline. On 32-bit systems,
* sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
* On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
* The closest pow-2 to both of them is ``4 * sizeof(long)``, so just use that
* one for simplicity.
* Having it aligned to a cacheline boundary may be excessive and doesn't bring
* any good.
*/
#define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long))
struct page_pool {
struct page_pool_params_fast p;
......@@ -142,19 +152,11 @@ struct page_pool {
bool system:1; /* This is a global percpu pool */
#endif
/* The following block must stay within one cacheline. On 32-bit
* systems, sizeof(long) == sizeof(int), so that the block size is
* ``3 * sizeof(long)``. On 64-bit systems, the actual size is
* ``2 * sizeof(long) + sizeof(int)``. The closest pow-2 to both of
* them is ``4 * sizeof(long)``, so just use that one for simplicity.
* Having it aligned to a cacheline boundary may be excessive and
* doesn't bring any good.
*/
__cacheline_group_begin(frag) __aligned(4 * sizeof(long));
__cacheline_group_begin_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
long frag_users;
netmem_ref frag_page;
unsigned int frag_offset;
__cacheline_group_end(frag);
__cacheline_group_end_aligned(frag, PAGE_POOL_FRAG_GROUP_ALIGN);
struct delayed_work release_dw;
void (*disconnect)(void *pool);
......
......@@ -178,7 +178,8 @@ static void page_pool_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long));
CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag,
PAGE_POOL_FRAG_GROUP_ALIGN);
}
static int page_pool_init(struct page_pool *pool,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment