Commit 573a8095 authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2020-09-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2020-09-21

Multi packet TX descriptor support for SKBs.

This series introduces some refactoring of the regular TX data path in
mlx5 and adds the Enhanced TX MPWQE feature support. MPWQE stands for
multi-packet work queue element, and it can serve multiple packets,
reducing the PCI bandwidth spent on control traffic. It should improve
performance in scenarios where PCI is the bottleneck, and xmit_more is
signaled by the kernel. The refactoring done in this series also
improves the packet rate on its own.

MPWQE is already implemented in the XDP tx path, this series adds the
support of MPWQE for regular kernel SKB tx path.

MPWQE is supported from ConnectX-5 and onward, for legacy devices we need
to keep backward compatibility for regular (Single packet) WQE descriptor.

MPWQE is not compatible with certain offloads and features, such as TLS
offload, TSO, nonlinear SKBs. If such incompatible features are in use,
the driver gracefully falls back to non-MPWQE per SKB.

Prior to the final patch "net/mlx5e: Enhanced TX MPWQE for SKBs" that adds
the actual support, Maxim did some refactoring to the tx data path to
split it into stages and smaller helper functions that can be utilized and
reused for both legacy and new MPWQE feature.

Performance testing:

UDP performance is improved in a single stream pktgen test:
  Packet rate: 16.86 Mpps (±0.15 Mpps) -> 20.94 Mpps (±0.33 Mpps)
  Instructions per packet: 434 -> 329
  Cycles per packet: 158 -> 123
  Instructions per cycle: 2.75 -> 2.67

TCP and XDP_TX single stream tests show no performance difference.

MPWQE can reduce PCI bandwidth:
  PCI Gen2, pktgen at fixed rate of 36864000 pps on 24 CPU cores:
    Inbound PCI utilization with MPWQE off: 80.3%
    Inbound PCI utilization with MPWQE on: 59.0%
  PCI Gen3, pktgen at fixed rate of 56064000 pps on 24 CPU cores:
    Inbound PCI utilization with MPWQE off: 65.4%
    Inbound PCI utilization with MPWQE on: 49.3%

MPWQE can also reduce CPU load, increasing the packet rate in case of
CPU bottleneck:
  PCI Gen2, pktgen at full rate on 24 CPU cores:
    Packet rate with MPWQE off: 37.5 Mpps
    Packet rate with MPWQE on: 49.0 Mpps
  PCI Gen3, pktgen at full rate on 24 CPU cores:
    Packet rate with MPWQE off: 57.0 Mpps
    Packet rate with MPWQE on: 66.8 Mpps

Burst size in all pktgen tests is 32.

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 748d1c8a 5af75c74
......@@ -221,6 +221,7 @@ enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_STRIDING_RQ,
MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
MLX5E_PFLAG_XDP_TX_MPWQE,
MLX5E_PFLAG_SKB_TX_MPWQE,
MLX5E_NUM_PFLAGS, /* Keep last */
};
......@@ -305,6 +306,7 @@ struct mlx5e_sq_dma {
enum {
MLX5E_SQ_STATE_ENABLED,
MLX5E_SQ_STATE_MPWQE,
MLX5E_SQ_STATE_RECOVERING,
MLX5E_SQ_STATE_IPSEC,
MLX5E_SQ_STATE_AM,
......@@ -313,26 +315,40 @@ enum {
MLX5E_SQ_STATE_PENDING_XSK_TX,
};
struct mlx5e_tx_mpwqe {
/* Current MPWQE session */
struct mlx5e_tx_wqe *wqe;
u32 bytes_count;
u8 ds_count;
u8 pkt_count;
u8 inline_on;
};
struct mlx5e_txqsq {
/* data path */
/* dirtied @completion */
u16 cc;
u16 skb_fifo_cc;
u32 dma_fifo_cc;
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
u16 skb_fifo_pc;
u32 dma_fifo_pc;
struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
u32 dma_fifo_mask;
u16 skb_fifo_mask;
struct mlx5e_sq_stats *stats;
struct {
struct mlx5e_sq_dma *dma_fifo;
struct sk_buff **skb_fifo;
struct mlx5e_tx_wqe_info *wqe_info;
} db;
void __iomem *uar_map;
......@@ -399,7 +415,7 @@ struct mlx5e_xdp_info {
};
};
struct mlx5e_xdp_xmit_data {
struct mlx5e_xmit_data {
dma_addr_t dma_addr;
void *data;
u32 len;
......@@ -412,18 +428,10 @@ struct mlx5e_xdp_info_fifo {
u32 mask;
};
struct mlx5e_xdp_mpwqe {
/* Current MPWQE session */
struct mlx5e_tx_wqe *wqe;
u8 ds_count;
u8 pkt_count;
u8 inline_on;
};
struct mlx5e_xdpsq;
typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *);
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *,
struct mlx5e_xdp_xmit_data *,
struct mlx5e_xmit_data *,
struct mlx5e_xdp_info *,
int);
......@@ -438,7 +446,7 @@ struct mlx5e_xdpsq {
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
u16 pc;
struct mlx5_wqe_ctrl_seg *doorbell_cseg;
struct mlx5e_xdp_mpwqe mpwqe;
struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
......
......@@ -7,6 +7,21 @@
#include "en.h"
#include <linux/indirect_call_wrapper.h>
#define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
* (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
* We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
* full-session WQE be cache-aligned.
*/
#if L1_CACHE_BYTES < 128
#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#else
#define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
#endif
#define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
enum mlx5e_icosq_wqe_type {
......@@ -46,8 +61,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
......@@ -110,6 +123,7 @@ struct mlx5e_tx_wqe_info {
u32 num_bytes;
u8 num_wqebbs;
u8 num_dma;
u8 num_fifo_pkts;
#ifdef CONFIG_MLX5_EN_TLS
struct page *resync_dump_frag_page;
#endif
......@@ -193,23 +207,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
return pi;
}
static inline void
mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
u16 pi, u16 nnops)
{
struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
edge_wi = wi + nnops;
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
memset(wi, 0, sizeof(*wi));
wi->num_wqebbs = 1;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
}
sq->stats->nop += nnops;
}
static inline void
mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
struct mlx5_wqe_ctrl_seg *ctrl)
......@@ -228,29 +225,6 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
mlx5_write64((__be32 *)ctrl, uar_map);
}
static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg)
{
return cseg && !!cseg->tis_tir_num;
}
static inline u8
mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
struct sk_buff *skb)
{
u8 mode;
if (mlx5e_transport_inline_tx_wqe(cseg))
return MLX5_INLINE_MODE_TCP_UDP;
mode = sq->min_inline_mode;
if (skb_vlan_tag_present(skb) &&
test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
return mode;
}
static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
{
struct mlx5_core_cq *mcq;
......@@ -276,6 +250,23 @@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size,
dma->type = map_type;
}
static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i)
{
return &sq->db.skb_fifo[i & sq->skb_fifo_mask];
}
static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb)
{
struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++);
*skb_item = skb;
}
static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq)
{
return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++);
}
static inline void
mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
{
......@@ -291,6 +282,14 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
}
}
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq);
static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session)
{
return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS;
}
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
{
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
......
......@@ -59,7 +59,7 @@ static inline bool
mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
struct mlx5e_dma_info *di, struct xdp_buff *xdp)
{
struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct xdp_frame *xdpf;
dma_addr_t dma_addr;
......@@ -194,18 +194,22 @@ static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5e_tx_wqe *wqe;
u16 pi;
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
net_prefetchw(wqe->data);
net_prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;
mlx5e_xdp_update_inline_state(sq);
*session = (struct mlx5e_tx_mpwqe) {
.wqe = wqe,
.bytes_count = 0,
.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
.pkt_count = 0,
.inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on),
};
stats->mpwqe++;
}
......@@ -213,7 +217,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
u16 ds_count = session->ds_count;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
......@@ -258,10 +262,10 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq
}
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi, int check_result)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
if (unlikely(xdptxd->len > sq->hw_mtu)) {
......@@ -284,8 +288,7 @@ mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *x
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
if (unlikely(mlx5e_xdp_mpqwe_is_full(session)))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
......@@ -306,7 +309,7 @@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq)
}
INDIRECT_CALLABLE_SCOPE bool
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd,
mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi, int check_result)
{
struct mlx5_wq_cyc *wq = &sq->wq;
......@@ -503,7 +506,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
struct mlx5e_xdp_xmit_data xdptxd;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
bool ret;
......
......@@ -38,27 +38,12 @@
#include "en/txrx.h"
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
* (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
* We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
* full-session WQE be cache-aligned.
*/
#if L1_CACHE_BYTES < 128
#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#else
#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
#endif
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_TX_WQE_EMPTY_DS_COUNT + 1 /* SG DS */)
#define MLX5E_XDP_MPW_MAX_NUM_DS \
(MLX5E_XDP_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS)
#define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT 16
#define MLX5E_XDP_INLINE_WQE_SZ_THRSD \
(MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
sizeof(struct mlx5_wqe_inline_seg))
struct mlx5e_xsk_param;
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
......@@ -73,11 +58,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi,
int check_result));
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdp_info *xdpi,
int check_result));
INDIRECT_CALLABLE_DECLARE(int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq));
......@@ -122,30 +107,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
/* Enable inline WQEs to shift some load from a congested HCA (HW) to
* a less congested cpu (SW).
*/
static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq)
static inline bool mlx5e_xdp_get_inline_state(struct mlx5e_xdpsq *sq, bool cur)
{
u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
#define MLX5E_XDP_INLINE_WATERMARK_LOW 10
#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128
if (session->inline_on) {
if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
session->inline_on = 0;
return;
}
if (cur && outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW)
return false;
if (!cur && outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
return true;
/* inline is false */
if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH)
session->inline_on = 1;
return cur;
}
static inline bool
mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
static inline bool mlx5e_xdp_mpqwe_is_full(struct mlx5e_tx_mpwqe *session)
{
return session->inline_on &&
session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
if (session->inline_on)
return session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT >
MLX5E_TX_MPW_MAX_NUM_DS;
return mlx5e_tx_mpwqe_is_full(session);
}
struct mlx5e_xdp_wqe_info {
......@@ -155,15 +138,16 @@ struct mlx5e_xdp_wqe_info {
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_xmit_data *xdptxd,
struct mlx5e_xmit_data *xdptxd,
struct mlx5e_xdpsq_stats *stats)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
u32 dma_len = xdptxd->len;
session->pkt_count++;
session->bytes_count += dma_len;
if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) {
struct mlx5_wqe_inline_seg *inline_dseg =
......
......@@ -67,8 +67,8 @@ static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq,
bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
{
struct xsk_buff_pool *pool = sq->xsk_pool;
struct mlx5e_xmit_data xdptxd;
struct mlx5e_xdp_info xdpi;
struct mlx5e_xdp_xmit_data xdptxd;
bool work_done = true;
bool flush = false;
......
......@@ -128,26 +128,38 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
return true;
}
static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
struct mlx5e_txqsq *sq,
struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_state *state)
{
#ifdef CONFIG_MLX5_EN_TLS
mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif
/* Part of the eseg touched by TX offloads */
#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
struct mlx5e_txqsq *sq,
struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
#ifdef CONFIG_MLX5_EN_IPSEC
if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb)))
if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, eseg, skb)))
return false;
}
#endif
#if IS_ENABLED(CONFIG_GENEVE)
if (skb->encapsulation)
mlx5e_tx_tunnel_accel(skb, eseg);
#endif
return true;
}
static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_state *state)
{
#ifdef CONFIG_MLX5_EN_TLS
mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif
}
static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
{
return mlx5e_ktls_init_rx(priv);
......
......@@ -345,9 +345,6 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_sq_stats *stats;
struct mlx5e_sq_dma *dma;
if (!wi->resync_dump_frag_page)
return;
dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
stats = sq->stats;
......
......@@ -29,12 +29,24 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc);
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
if (unlikely(wi->resync_dump_frag_page)) {
mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma_fifo_cc);
return true;
}
return false;
}
#else
static inline void
mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
static inline bool
mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
return false;
}
#endif /* CONFIG_MLX5_EN_TLS */
......
......@@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
struct mlx5e_tls *tls)
{
u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
struct mlx5e_tx_wqe *wqe;
struct sync_info info;
struct sk_buff *nskb;
int linear_len = 0;
int headln;
u16 pi;
int i;
sq->stats->tls_ooo++;
......@@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
sq->stats->tls_resync_bytes += nskb->len;
mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
cpu_to_be64(info.rcd_sn));
pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
mlx5e_sq_xmit(sq, nskb, wqe, pi, true);
mlx5e_sq_xmit_simple(sq, nskb, true);
return true;
......@@ -274,6 +270,8 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
if (!datalen)
return true;
mlx5e_tx_mpwqe_ensure_complete(sq);
tls_ctx = tls_get_ctx(skb->sk);
if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
goto err_out;
......
......@@ -1908,7 +1908,7 @@ static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
return 0;
}
static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
static int set_pflag_tx_mpwqe_common(struct net_device *netdev, u32 flag, bool enable)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5_core_dev *mdev = priv->mdev;
......@@ -1920,7 +1920,7 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
new_channels.params = priv->channels.params;
MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
MLX5E_SET_PFLAG(&new_channels.params, flag, enable);
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
......@@ -1931,6 +1931,16 @@ static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
return err;
}
static int set_pflag_xdp_tx_mpwqe(struct net_device *netdev, bool enable)
{
return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_XDP_TX_MPWQE, enable);
}
static int set_pflag_skb_tx_mpwqe(struct net_device *netdev, bool enable)
{
return set_pflag_tx_mpwqe_common(netdev, MLX5E_PFLAG_SKB_TX_MPWQE, enable);
}
static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_cqe_moder", set_pflag_rx_cqe_based_moder },
{ "tx_cqe_moder", set_pflag_tx_cqe_based_moder },
......@@ -1938,6 +1948,7 @@ static const struct pflag_desc mlx5e_priv_flags[MLX5E_NUM_PFLAGS] = {
{ "rx_striding_rq", set_pflag_rx_striding_rq },
{ "rx_no_csum_complete", set_pflag_rx_no_csum_complete },
{ "xdp_tx_mpwqe", set_pflag_xdp_tx_mpwqe },
{ "skb_tx_mpwqe", set_pflag_skb_tx_mpwqe },
};
static int mlx5e_handle_pflag(struct net_device *netdev,
......
......@@ -1043,6 +1043,7 @@ static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
{
kvfree(sq->db.wqe_info);
kvfree(sq->db.skb_fifo);
kvfree(sq->db.dma_fifo);
}
......@@ -1054,15 +1055,19 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.dma_fifo)),
GFP_KERNEL, numa);
sq->db.skb_fifo = kvzalloc_node(array_size(df_sz,
sizeof(*sq->db.skb_fifo)),
GFP_KERNEL, numa);
sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
sizeof(*sq->db.wqe_info)),
GFP_KERNEL, numa);
if (!sq->db.dma_fifo || !sq->db.wqe_info) {
if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) {
mlx5e_free_txqsq_db(sq);
return -ENOMEM;
}
sq->dma_fifo_mask = df_sz - 1;
sq->skb_fifo_mask = df_sz - 1;
return 0;
}
......@@ -1073,6 +1078,12 @@ static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
sq->stop_room = mlx5e_tls_get_stop_room(sq);
sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state))
/* A MPWQE can take up to the maximum-sized WQE + all the normal
* stop room can be taken if a new packet breaks the active
* MPWQE session and allocates its WQEs right away.
*/
sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (WARN_ON(sq->stop_room >= sq_size)) {
netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
......@@ -1114,6 +1125,8 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
if (mlx5_accel_is_tls_device(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
if (param->is_mpw)
set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
if (err)
return err;
......@@ -2162,6 +2175,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
MLX5_SET(sqc, sqc, allow_swp, allow_swp);
param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
mlx5e_build_tx_cq_param(priv, params, &param->cqp);
}
......@@ -4703,6 +4717,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv,
params->log_sq_size = is_kdump_kernel() ?
MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE,
MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
/* XDP SQ */
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
......
......@@ -110,6 +110,8 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
......@@ -365,6 +367,8 @@ static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(sw)
s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes;
s->tx_added_vlan_packets += sq_stats->added_vlan_packets;
s->tx_nop += sq_stats->nop;
s->tx_mpwqe_blks += sq_stats->mpwqe_blks;
s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts;
s->tx_queue_stopped += sq_stats->stopped;
s->tx_queue_wake += sq_stats->wake;
s->tx_queue_dropped += sq_stats->dropped;
......@@ -1568,6 +1572,8 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
......
......@@ -121,6 +121,8 @@ struct mlx5e_sw_stats {
u64 tx_tso_inner_bytes;
u64 tx_added_vlan_packets;
u64 tx_nop;
u64 tx_mpwqe_blks;
u64 tx_mpwqe_pkts;
u64 rx_lro_packets;
u64 rx_lro_bytes;
u64 rx_mcast_packets;
......@@ -351,6 +353,8 @@ struct mlx5e_sq_stats {
u64 csum_partial_inner;
u64 added_vlan_packets;
u64 nop;
u64 mpwqe_blks;
u64 mpwqe_pkts;
#ifdef CONFIG_MLX5_EN_TLS
u64 tls_encrypted_packets;
u64 tls_encrypted_bytes;
......
......@@ -232,131 +232,180 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
return -ENOMEM;
}
struct mlx5e_tx_attr {
u32 num_bytes;
u16 headlen;
u16 ihs;
__be16 mss;
u8 opcode;
};
struct mlx5e_tx_wqe_attr {
u16 ds_cnt;
u16 ds_cnt_inl;
u8 num_wqebbs;
};
static u8
mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_accel_tx_state *accel)
{
u8 mode;
#ifdef CONFIG_MLX5_EN_TLS
if (accel && accel->tls.tls_tisn)
return MLX5_INLINE_MODE_TCP_UDP;
#endif
mode = sq->min_inline_mode;
if (skb_vlan_tag_present(skb) &&
test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state))
mode = max_t(u8, MLX5_INLINE_MODE_L2, mode);
return mode;
}
static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_accel_tx_state *accel,
struct mlx5e_tx_attr *attr)
{
struct mlx5e_sq_stats *stats = sq->stats;
if (skb_is_gso(skb)) {
u16 ihs = mlx5e_tx_get_gso_ihs(sq, skb);
*attr = (struct mlx5e_tx_attr) {
.opcode = MLX5_OPCODE_LSO,
.mss = cpu_to_be16(skb_shinfo(skb)->gso_size),
.ihs = ihs,
.num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs,
.headlen = skb_headlen(skb) - ihs,
};
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
u8 mode = mlx5e_tx_wqe_inline_mode(sq, skb, accel);
u16 ihs = mlx5e_calc_min_inline(mode, skb);
*attr = (struct mlx5e_tx_attr) {
.opcode = MLX5_OPCODE_SEND,
.mss = cpu_to_be16(0),
.ihs = ihs,
.num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN),
.headlen = skb_headlen(skb) - ihs,
};
stats->packets++;
}
stats->bytes += attr->num_bytes;
}
static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_attr *attr,
struct mlx5e_tx_wqe_attr *wqe_attr)
{
u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
u16 ds_cnt_inl = 0;
ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
if (attr->ihs) {
u16 inl = attr->ihs - INL_HDR_START_SZ;
if (skb_vlan_tag_present(skb))
inl += VLAN_HLEN;
ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
ds_cnt += ds_cnt_inl;
}
*wqe_attr = (struct mlx5e_tx_wqe_attr) {
.ds_cnt = ds_cnt,
.ds_cnt_inl = ds_cnt_inl,
.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
};
}
static void mlx5e_tx_skb_update_hwts_flags(struct sk_buff *skb)
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
}
static void mlx5e_tx_check_stop(struct mlx5e_txqsq *sq)
{
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room))) {
netif_tx_stop_queue(sq->txq);
sq->stats->stopped++;
}
}
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma,
const struct mlx5e_tx_attr *attr,
const struct mlx5e_tx_wqe_attr *wqe_attr, u8 num_dma,
struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg,
bool xmit_more)
{
struct mlx5_wq_cyc *wq = &sq->wq;
bool send_doorbell;
wi->num_bytes = num_bytes;
wi->num_dma = num_dma;
wi->num_wqebbs = num_wqebbs;
wi->skb = skb;
*wi = (struct mlx5e_tx_wqe_info) {
.skb = skb,
.num_bytes = attr->num_bytes,
.num_dma = num_dma,
.num_wqebbs = wqe_attr->num_wqebbs,
.num_fifo_pkts = 0,
};
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | attr->opcode);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | wqe_attr->ds_cnt);
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
mlx5e_tx_skb_update_hwts_flags(skb);
sq->pc += wi->num_wqebbs;
if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) {
netif_tx_stop_queue(sq->txq);
sq->stats->stopped++;
}
send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes,
xmit_more);
mlx5e_tx_check_stop(sq);
send_doorbell = __netdev_tx_sent_queue(sq->txq, attr->num_bytes, xmit_more);
if (send_doorbell)
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
}
void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
static void
mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
const struct mlx5e_tx_attr *attr, const struct mlx5e_tx_wqe_attr *wqe_attr,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5_wqe_eth_seg *eseg;
struct mlx5_wqe_data_seg *dseg;
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
u16 headlen, ihs, contig_wqebbs_room;
u16 ds_cnt, ds_cnt_inl = 0;
u8 num_wqebbs, opcode;
u32 num_bytes;
int num_dma;
__be16 mss;
/* Calc ihs and ds cnt, no writes to wqe yet */
ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
if (skb_is_gso(skb)) {
opcode = MLX5_OPCODE_LSO;
mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
ihs = mlx5e_tx_get_gso_ihs(sq, skb);
num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
u8 mode = mlx5e_tx_wqe_inline_mode(sq, &wqe->ctrl, skb);
opcode = MLX5_OPCODE_SEND;
mss = 0;
ihs = mlx5e_calc_min_inline(mode, skb);
num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
stats->packets++;
}
stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
headlen = skb->len - ihs - skb->data_len;
ds_cnt += !!headlen;
ds_cnt += skb_shinfo(skb)->nr_frags;
if (ihs) {
ihs += !!skb_vlan_tag_present(skb) * VLAN_HLEN;
ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
ds_cnt += ds_cnt_inl;
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
if (unlikely(contig_wqebbs_room < num_wqebbs)) {
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5_wqe_eth_seg cur_eth = wqe->eth;
#endif
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
#endif
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
#ifdef CONFIG_MLX5_EN_IPSEC
wqe->eth = cur_eth;
#endif
#ifdef CONFIG_MLX5_EN_TLS
wqe->ctrl = cur_ctrl;
#endif
}
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
eseg = &wqe->eth;
dseg = wqe->data;
#if IS_ENABLED(CONFIG_GENEVE)
if (skb->encapsulation)
mlx5e_tx_tunnel_accel(skb, eseg);
#endif
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
eseg->mss = mss;
eseg->mss = attr->mss;
if (ihs) {
eseg->inline_hdr.sz = cpu_to_be16(ihs);
if (attr->ihs) {
if (skb_vlan_tag_present(skb)) {
ihs -= VLAN_HLEN;
mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs);
eseg->inline_hdr.sz = cpu_to_be16(attr->ihs + VLAN_HLEN);
mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
stats->added_vlan_packets++;
} else {
memcpy(eseg->inline_hdr.start, skb->data, ihs);
eseg->inline_hdr.sz = cpu_to_be16(attr->ihs);
memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
}
dseg += ds_cnt_inl;
dseg += wqe_attr->ds_cnt_inl;
} else if (skb_vlan_tag_present(skb)) {
eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN);
if (skb->vlan_proto == cpu_to_be16(ETH_P_8021AD))
......@@ -365,12 +414,12 @@ void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->added_vlan_packets++;
}
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
attr->headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
num_dma, wi, cseg, xmit_more);
mlx5e_txwqe_complete(sq, skb, attr, wqe_attr, num_dma, wi, cseg, xmit_more);
return;
......@@ -379,10 +428,172 @@ void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
dev_kfree_skb_any(skb);
}
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
{
return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs;
}
static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
/* Assumes the session is already running and has at least one packet. */
return !memcmp(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
}
static void mlx5e_tx_mpwqe_session_start(struct mlx5e_txqsq *sq,
struct mlx5_wqe_eth_seg *eseg)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5e_tx_wqe *wqe;
u16 pi;
pi = mlx5e_txqsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
prefetchw(wqe->data);
*session = (struct mlx5e_tx_mpwqe) {
.wqe = wqe,
.bytes_count = 0,
.ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT,
.pkt_count = 0,
.inline_on = 0,
};
memcpy(&session->wqe->eth, eseg, MLX5E_ACCEL_ESEG_LEN);
sq->stats->mpwqe_blks++;
}
static bool mlx5e_tx_mpwqe_session_is_active(struct mlx5e_txqsq *sq)
{
return sq->mpwqe.wqe;
}
static void mlx5e_tx_mpwqe_add_dseg(struct mlx5e_txqsq *sq, struct mlx5e_xmit_data *txd)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_data_seg *dseg;
dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count;
session->pkt_count++;
session->bytes_count += txd->len;
dseg->addr = cpu_to_be64(txd->dma_addr);
dseg->byte_count = cpu_to_be32(txd->len);
dseg->lkey = sq->mkey_be;
session->ds_count++;
sq->stats->mpwqe_pkts++;
}
static struct mlx5_wqe_ctrl_seg *mlx5e_tx_mpwqe_session_complete(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_mpwqe *session = &sq->mpwqe;
u8 ds_count = session->ds_count;
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_tx_wqe_info *wi;
u16 pi;
cseg = &session->wqe->ctrl;
cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
wi = &sq->db.wqe_info[pi];
*wi = (struct mlx5e_tx_wqe_info) {
.skb = NULL,
.num_bytes = session->bytes_count,
.num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS),
.num_dma = session->pkt_count,
.num_fifo_pkts = session->pkt_count,
};
sq->pc += wi->num_wqebbs;
session->wqe = NULL;
mlx5e_tx_check_stop(sq);
return cseg;
}
static void
mlx5e_sq_xmit_mpwqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg, bool xmit_more)
{
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_xmit_data txd;
if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
mlx5e_tx_mpwqe_session_start(sq, eseg);
} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
mlx5e_tx_mpwqe_session_complete(sq);
mlx5e_tx_mpwqe_session_start(sq, eseg);
}
sq->stats->xmit_more += xmit_more;
txd.data = skb->data;
txd.len = skb->len;
txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
goto err_unmap;
mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
mlx5e_skb_fifo_push(sq, skb);
mlx5e_tx_mpwqe_add_dseg(sq, &txd);
mlx5e_tx_skb_update_hwts_flags(skb);
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe))) {
/* Might stop the queue and affect the retval of __netdev_tx_sent_queue. */
cseg = mlx5e_tx_mpwqe_session_complete(sq);
if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more))
mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
} else if (__netdev_tx_sent_queue(sq->txq, txd.len, xmit_more)) {
/* Might stop the queue, but we were asked to ring the doorbell anyway. */
cseg = mlx5e_tx_mpwqe_session_complete(sq);
mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
}
return;
err_unmap:
mlx5e_dma_unmap_wqe_err(sq, 1);
sq->stats->dropped++;
dev_kfree_skb_any(skb);
}
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
{
/* Unlikely in non-MPWQE workloads; not important in MPWQE workloads. */
if (unlikely(mlx5e_tx_mpwqe_session_is_active(sq)))
mlx5e_tx_mpwqe_session_complete(sq);
}
static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
if (unlikely(!mlx5e_accel_tx_eseg(priv, sq, skb, eseg)))
return false;
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
return true;
}
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_accel_tx_state accel = {};
struct mlx5e_tx_wqe_attr wqe_attr;
struct mlx5e_tx_attr attr;
struct mlx5e_tx_wqe *wqe;
struct mlx5e_txqsq *sq;
u16 pi;
......@@ -391,21 +602,91 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
/* May send SKBs and WQEs. */
if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
goto out;
return NETDEV_TX_OK;
pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr);
if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) {
if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
struct mlx5_wqe_eth_seg eseg = {};
if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &eseg)))
return NETDEV_TX_OK;
mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
return NETDEV_TX_OK;
}
mlx5e_tx_mpwqe_ensure_complete(sq);
}
mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
/* May update the WQE, but may not post other WQEs. */
if (unlikely(!mlx5e_accel_tx_finish(priv, sq, skb, wqe, &accel)))
goto out;
mlx5e_accel_tx_finish(sq, wqe, &accel);
if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
return NETDEV_TX_OK;
mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
out:
return NETDEV_TX_OK;
}
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more)
{
struct mlx5e_tx_wqe_attr wqe_attr;
struct mlx5e_tx_attr attr;
struct mlx5e_tx_wqe *wqe;
u16 pi;
mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
mlx5e_txwqe_build_eseg_csum(sq, skb, &wqe->eth);
mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more);
}
static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
u32 *dma_fifo_cc)
{
int i;
for (i = 0; i < wi->num_dma; i++) {
struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
mlx5e_tx_dma_unmap(sq->pdev, dma);
}
}
static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_cqe64 *cqe, int napi_budget)
{
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
struct skb_shared_hwtstamps hwts = {};
u64 ts = get_cqe_ts(cqe);
hwts.hwtstamp = mlx5_timecounter_cyc2time(sq->clock, ts);
skb_tstamp_tx(skb, &hwts);
}
napi_consume_skb(skb, napi_budget);
}
static void mlx5e_tx_wi_consume_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi,
struct mlx5_cqe64 *cqe, int napi_budget)
{
int i;
for (i = 0; i < wi->num_fifo_pkts; i++) {
struct sk_buff *skb = mlx5e_skb_fifo_pop(sq);
mlx5e_consume_skb(sq, skb, cqe, napi_budget);
}
}
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
{
struct mlx5e_sq_stats *stats;
......@@ -451,42 +732,33 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
wqe_counter = be16_to_cpu(cqe->wqe_counter);
do {
struct sk_buff *skb;
int j;
last_wqe = (sqcc == wqe_counter);
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
if (unlikely(!skb)) {
mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
sqcc += wi->num_wqebbs;
continue;
}
sqcc += wi->num_wqebbs;
if (unlikely(skb_shinfo(skb)->tx_flags &
SKBTX_HW_TSTAMP)) {
struct skb_shared_hwtstamps hwts = {};
if (likely(wi->skb)) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_consume_skb(sq, wi->skb, cqe, napi_budget);
hwts.hwtstamp =
mlx5_timecounter_cyc2time(sq->clock,
get_cqe_ts(cqe));
skb_tstamp_tx(skb, &hwts);
npkts++;
nbytes += wi->num_bytes;
continue;
}
for (j = 0; j < wi->num_dma; j++) {
struct mlx5e_sq_dma *dma =
mlx5e_dma_get(sq, dma_fifo_cc++);
if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi,
&dma_fifo_cc)))
continue;
mlx5e_tx_dma_unmap(sq->pdev, dma);
}
if (wi->num_fifo_pkts) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_tx_wi_consume_fifo_skbs(sq, wi, cqe, napi_budget);
npkts++;
nbytes += wi->num_bytes;
sqcc += wi->num_wqebbs;
napi_consume_skb(skb, napi_budget);
npkts += wi->num_fifo_pkts;
nbytes += wi->num_bytes;
}
} while (!last_wqe);
if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
......@@ -525,13 +797,19 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
static void mlx5e_tx_wi_kfree_fifo_skbs(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi)
{
int i;
for (i = 0; i < wi->num_fifo_pkts; i++)
dev_kfree_skb_any(mlx5e_skb_fifo_pop(sq));
}
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
u32 dma_fifo_cc, nbytes = 0;
u16 ci, sqcc, npkts = 0;
struct sk_buff *skb;
int i;
sqcc = sq->cc;
dma_fifo_cc = sq->dma_fifo_cc;
......@@ -539,25 +817,28 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
while (sqcc != sq->pc) {
ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
if (!skb) {
mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
sqcc += wi->num_wqebbs;
sqcc += wi->num_wqebbs;
if (likely(wi->skb)) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
dev_kfree_skb_any(wi->skb);
npkts++;
nbytes += wi->num_bytes;
continue;
}
for (i = 0; i < wi->num_dma; i++) {
struct mlx5e_sq_dma *dma =
mlx5e_dma_get(sq, dma_fifo_cc++);
if (unlikely(mlx5e_ktls_tx_try_handle_resync_dump_comp(sq, wi, &dma_fifo_cc)))
continue;
mlx5e_tx_dma_unmap(sq->pdev, dma);
}
if (wi->num_fifo_pkts) {
mlx5e_tx_wi_dma_unmap(sq, wi, &dma_fifo_cc);
mlx5e_tx_wi_kfree_fifo_skbs(sq, wi);
dev_kfree_skb_any(skb);
npkts++;
nbytes += wi->num_bytes;
sqcc += wi->num_wqebbs;
npkts += wi->num_fifo_pkts;
nbytes += wi->num_bytes;
}
}
sq->dma_fifo_cc = dma_fifo_cc;
......@@ -576,9 +857,34 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
}
static void mlx5i_sq_calc_wqe_attr(struct sk_buff *skb,
const struct mlx5e_tx_attr *attr,
struct mlx5e_tx_wqe_attr *wqe_attr)
{
u16 ds_cnt = sizeof(struct mlx5i_tx_wqe) / MLX5_SEND_WQE_DS;
u16 ds_cnt_inl = 0;
ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
if (attr->ihs) {
u16 inl = attr->ihs - INL_HDR_START_SZ;
ds_cnt_inl = DIV_ROUND_UP(inl, MLX5_SEND_WQE_DS);
ds_cnt += ds_cnt_inl;
}
*wqe_attr = (struct mlx5e_tx_wqe_attr) {
.ds_cnt = ds_cnt,
.ds_cnt_inl = ds_cnt_inl,
.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
};
}
void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
{
struct mlx5e_tx_wqe_attr wqe_attr;
struct mlx5e_tx_attr attr;
struct mlx5i_tx_wqe *wqe;
struct mlx5_wqe_datagram_seg *datagram;
......@@ -588,47 +894,17 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe_info *wi;
struct mlx5e_sq_stats *stats = sq->stats;
u16 ds_cnt, ds_cnt_inl = 0;
u8 num_wqebbs, opcode;
u16 headlen, ihs, pi;
u32 num_bytes;
int num_dma;
__be16 mss;
u16 pi;
/* Calc ihs and ds cnt, no writes to wqe yet */
ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
if (skb_is_gso(skb)) {
opcode = MLX5_OPCODE_LSO;
mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
ihs = mlx5e_tx_get_gso_ihs(sq, skb);
num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
stats->packets += skb_shinfo(skb)->gso_segs;
} else {
u8 mode = mlx5e_tx_wqe_inline_mode(sq, NULL, skb);
mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr);
mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr);
opcode = MLX5_OPCODE_SEND;
mss = 0;
ihs = mlx5e_calc_min_inline(mode, skb);
num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
stats->packets++;
}
pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs);
wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
stats->bytes += num_bytes;
stats->xmit_more += xmit_more;
headlen = skb->len - ihs - skb->data_len;
ds_cnt += !!headlen;
ds_cnt += skb_shinfo(skb)->nr_frags;
if (ihs) {
ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS);
ds_cnt += ds_cnt_inl;
}
num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
/* fill wqe */
wi = &sq->db.wqe_info[pi];
cseg = &wqe->ctrl;
......@@ -640,20 +916,20 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
eseg->mss = mss;
eseg->mss = attr.mss;
if (ihs) {
memcpy(eseg->inline_hdr.start, skb->data, ihs);
eseg->inline_hdr.sz = cpu_to_be16(ihs);
dseg += ds_cnt_inl;
if (attr.ihs) {
memcpy(eseg->inline_hdr.start, skb->data, attr.ihs);
eseg->inline_hdr.sz = cpu_to_be16(attr.ihs);
dseg += wqe_attr.ds_cnt_inl;
}
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + ihs, headlen, dseg);
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr.ihs,
attr.headlen, dseg);
if (unlikely(num_dma < 0))
goto err_drop;
mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
num_dma, wi, cseg, xmit_more);
mlx5e_txwqe_complete(sq, skb, &attr, &wqe_attr, num_dma, wi, cseg, xmit_more);
return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment