Commit 5e0d2eef authored by Tariq Toukan's avatar Tariq Toukan Committed by Saeed Mahameed

net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE

Add support for the HW feature of multi-packet WQE in XDP
xmit flow.

The conventional TX descriptor (WQE, Work Queue Element) serves
a single packet. Our HW has support for multi-packet WQE (MPWQE)
in which a single descriptor serves multiple TX packets.

This reduces both the PCI overhead and the CPU cycles wasted on
writing them.

In this patch we add support for the HW feature, which is supported
starting from ConnectX-5.

Performance:
Tested packet rate for UDP 64Byte multi-stream over ConnectX-5 NICs.
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz

XDP_TX:
We see a huge gain on single port ConnectX-5, and reach the 100 Mpps
milestone.
* Single-port HCA:
	Before:   70 Mpps
	After:   100 Mpps (+42.8%)

* Dual-port HCA:
	Before: 51.7 Mpps
	After:  57.3 Mpps (+10.8%)

* In both cases we tested traffic on one port and for now On Dual-port HCAs
  we see only small gain, we are working to overcome this bottleneck, but
  for the moment only with experimental firmware on dual port HCAs we can
  reach the wanted numbers as seen on Single-port HCAs.

XDP_REDIRECT:
Redirect from (A) ConnectX-5 to (B) ConnectX-5.
Due to a setup limitation, (A) and (B) are on different NUMA nodes,
so absolute performance numbers are not optimal.
Note:
  Below is the transmit rate of (B), not the redirect rate of (A)
  which is in some cases higher.

* (B) is single-port:
	Before:   77 Mpps
	After:    90 Mpps (+16.8%)

* (B) is dual-port:
	Before:  61 Mpps
	After:   72 Mpps (+18%)
Signed-off-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@mellanox.com>
parent 1feeab80
......@@ -416,6 +416,16 @@ struct mlx5e_xdp_wqe_info {
u8 num_ds;
};
struct mlx5e_xdp_mpwqe {
/* Current MPWQE session */
struct mlx5e_tx_wqe *wqe;
u8 ds_count;
u8 max_ds_count;
};
struct mlx5e_xdpsq;
typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*,
struct mlx5e_xdp_info*);
struct mlx5e_xdpsq {
/* data path */
......@@ -428,12 +438,14 @@ struct mlx5e_xdpsq {
u32 xdpi_fifo_pc ____cacheline_aligned_in_smp;
u16 pc;
struct mlx5_wqe_ctrl_seg *doorbell_cseg;
struct mlx5e_xdp_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */
struct mlx5_wq_cyc wq;
struct mlx5e_xdpsq_stats *stats;
mlx5e_fp_xmit_xdp_frame xmit_xdp_frame;
struct {
struct mlx5e_xdp_wqe_info *wqe_info;
struct mlx5e_xdp_info_fifo xdpi_fifo;
......
......@@ -47,7 +47,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di,
xdpi.xdpf->len, PCI_DMA_TODEVICE);
xdpi.di = *di;
return mlx5e_xmit_xdp_frame(sq, &xdpi);
return sq->xmit_xdp_frame(sq, &xdpi);
}
/* returns true if packet was consumed by xdp */
......@@ -102,7 +102,98 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
}
}
bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5_wq_cyc *wq = &sq->wq;
u8 wqebbs;
u16 pi;
mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
* (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
* We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
* full-session WQE be cache-aligned.
*/
#if L1_CACHE_BYTES < 128
#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
#else
#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
#endif
wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
MLX5E_XDP_MPW_MAX_WQEBBS);
session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
}
static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq)
{
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl;
u16 ds_count = session->ds_count;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi];
cseg->opmod_idx_opcode =
cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_ENHANCED_MPSW);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_count);
wi->num_wqebbs = DIV_ROUND_UP(ds_count, MLX5_SEND_WQEBB_NUM_DS);
wi->num_ds = ds_count - MLX5E_XDP_TX_EMPTY_DS_COUNT;
sq->pc += wi->num_wqebbs;
sq->doorbell_cseg = cseg;
session->wqe = NULL; /* Close session */
}
static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
struct mlx5e_xdp_info *xdpi)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
dma_addr_t dma_addr = xdpi->dma_addr;
struct xdp_frame *xdpf = xdpi->xdpf;
unsigned int dma_len = xdpf->len;
if (unlikely(sq->hw_mtu < dma_len)) {
stats->err++;
return false;
}
if (unlikely(!session->wqe)) {
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
MLX5_SEND_WQE_MAX_WQEBBS))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
stats->full++;
return false;
}
mlx5e_xdp_mpwqe_session_start(sq);
}
mlx5e_xdp_mpwqe_add_dseg(sq, dma_addr, dma_len);
if (unlikely(session->ds_count == session->max_ds_count))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);
stats->xmit++;
return true;
}
static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
......@@ -304,7 +395,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdpi.xdpf = xdpf;
if (unlikely(!mlx5e_xmit_xdp_frame(sq, &xdpi))) {
if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) {
dma_unmap_single(sq->pdev, xdpi.dma_addr,
xdpf->len, DMA_TO_DEVICE);
xdp_return_frame_rx_napi(xdpf);
......@@ -312,8 +403,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
}
}
if (flags & XDP_XMIT_FLUSH)
if (flags & XDP_XMIT_FLUSH) {
if (sq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xmit_xdp_doorbell(sq);
}
return n - drops;
}
......@@ -322,6 +416,9 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
{
struct mlx5e_xdpsq *xdpsq = &rq->xdpsq;
if (xdpsq->mpwqe.wqe)
mlx5e_xdp_mpwqe_complete(xdpsq);
mlx5e_xmit_xdp_doorbell(xdpsq);
if (xdpsq->redirect_flush) {
......@@ -329,3 +426,10 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq)
xdpsq->redirect_flush = false;
}
}
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw)
{
sq->xmit_xdp_frame = is_mpw ?
mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame;
}
......@@ -37,15 +37,16 @@
#define MLX5E_XDP_MAX_MTU ((int)(PAGE_SIZE - \
MLX5_SKB_FRAG_SZ(XDP_PACKET_HEADROOM)))
#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
#define MLX5E_XDP_TX_DS_COUNT \
((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
#define MLX5E_XDP_TX_EMPTY_DS_COUNT \
(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
#define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
void *va, u16 *rx_headroom, u32 *len);
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq);
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq);
void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw);
void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi);
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
u32 flags);
......@@ -57,6 +58,28 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
}
}
static inline void
mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len)
{
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5_wqe_data_seg *dseg =
(struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++;
dseg->addr = cpu_to_be64(dma_addr);
dseg->byte_count = cpu_to_be32(dma_len);
dseg->lkey = sq->mkey_be;
}
static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq,
struct mlx5e_tx_wqe **wqe)
{
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
memset(*wqe, 0, sizeof(**wqe));
}
static inline void
mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
struct mlx5e_xdp_info *xi)
......
......@@ -61,6 +61,7 @@ struct mlx5e_rq_param {
struct mlx5e_sq_param {
u32 sqc[MLX5_ST_SZ_DW(sqc)];
struct mlx5_wq_param wq;
bool is_mpw;
};
struct mlx5e_cq_param {
......@@ -1586,11 +1587,8 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
struct mlx5e_xdpsq *sq,
bool is_redirect)
{
unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
struct mlx5e_create_sq_param csp = {};
unsigned int inline_hdr_sz = 0;
int err;
int i;
err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect);
if (err)
......@@ -1606,27 +1604,35 @@ static int mlx5e_open_xdpsq(struct mlx5e_channel *c,
if (err)
goto err_free_xdpsq;
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
ds_cnt++;
}
mlx5e_set_xmit_fp(sq, param->is_mpw);
if (!param->is_mpw) {
unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT;
unsigned int inline_hdr_sz = 0;
int i;
/* Pre initialize fixed WQE fields */
for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg;
if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
ds_cnt++;
}
/* Pre initialize fixed WQE fields */
for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[i];
struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i);
struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
struct mlx5_wqe_eth_seg *eseg = &wqe->eth;
struct mlx5_wqe_data_seg *dseg;
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
dseg->lkey = sq->mkey_be;
dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
dseg->lkey = sq->mkey_be;
wi->num_wqebbs = 1;
wi->num_ds = 1;
wi->num_wqebbs = 1;
wi->num_ds = 1;
}
}
return 0;
......@@ -2335,6 +2341,7 @@ static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
mlx5e_build_sq_param_common(priv, param);
MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
param->is_mpw = MLX5_CAP_ETH(priv->mdev, enhanced_multi_pkt_send_wqe);
}
static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
......
......@@ -421,6 +421,7 @@ enum {
MLX5_OPCODE_ATOMIC_MASKED_FA = 0x15,
MLX5_OPCODE_BIND_MW = 0x18,
MLX5_OPCODE_CONFIG_CMD = 0x1f,
MLX5_OPCODE_ENHANCED_MPSW = 0x29,
MLX5_RECV_OPCODE_RDMA_WRITE_IMM = 0x00,
MLX5_RECV_OPCODE_SEND = 0x01,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment