Commit 8e4b53f6 authored by Maxim Mikityanskiy's avatar Maxim Mikityanskiy Committed by Saeed Mahameed

net/mlx5e: Refactor xmit functions

A huge function mlx5e_sq_xmit was split into several to achieve multiple
goals:

1. Reuse the code in IPoIB.

2. Better intergrate with TLS, IPSEC, GENEVE and checksum offloads. Now
it's possible to reserve space in the WQ before running eseg-based
offloads, so:

2.1. It's not needed to copy cseg and eseg after mlx5e_fill_sq_frag_edge
anymore.

2.2. mlx5e_txqsq_get_next_pi will be used instead of the legacy
mlx5e_fill_sq_frag_edge for better code maintainability and reuse.

3. Prepare for the upcoming TX MPWQE for SKBs. It will intervene after
mlx5e_sq_calc_wqe_attr to check if it's possible to use MPWQE, and the
code flow will split into two paths: MPWQE and non-MPWQE.

Two high-level functions are provided to send packets:

* mlx5e_xmit is called by the networking stack, runs offloads and sends
the packet. In one of the following patches, MPWQE support will be added
to this flow.

* mlx5e_sq_xmit_simple is called by the TLS offload, runs only the
checksum offload and sends the packet.

This change has no performance impact in TCP single stream test and
XDP_TX single stream test.

When compiled with a recent GCC, this change shows no visible
performance impact on UDP pktgen (burst 32) single stream test either:
  Packet rate: 16.86 Mpps (±0.15 Mpps) -> 16.95 Mpps (±0.15 Mpps)
  Instructions per packet: 434 -> 429
  Cycles per packet: 158 -> 160
  Instructions per cycle: 2.75 -> 2.69

CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64)
NIC: Mellanox ConnectX-6 Dx
GCC 10.2.0
Signed-off-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent d02dfcd5
...@@ -41,8 +41,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); ...@@ -41,8 +41,6 @@ void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev); struct net_device *sb_dev);
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
...@@ -188,23 +186,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size) ...@@ -188,23 +186,6 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
return pi; return pi;
} }
static inline void
mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
u16 pi, u16 nnops)
{
struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
edge_wi = wi + nnops;
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
memset(wi, 0, sizeof(*wi));
wi->num_wqebbs = 1;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
}
sq->stats->nop += nnops;
}
static inline void static inline void
mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
struct mlx5_wqe_ctrl_seg *ctrl) struct mlx5_wqe_ctrl_seg *ctrl)
...@@ -263,6 +244,8 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) ...@@ -263,6 +244,8 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma)
} }
} }
void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more);
static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
{ {
if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
......
...@@ -145,6 +145,11 @@ static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv, ...@@ -145,6 +145,11 @@ static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
} }
#endif #endif
#if IS_ENABLED(CONFIG_GENEVE)
if (skb->encapsulation)
mlx5e_tx_tunnel_accel(skb, &wqe->eth);
#endif
return true; return true;
} }
......
...@@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, ...@@ -189,12 +189,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
struct mlx5e_tls *tls) struct mlx5e_tls *tls)
{ {
u32 tcp_seq = ntohl(tcp_hdr(skb)->seq); u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
struct mlx5e_tx_wqe *wqe;
struct sync_info info; struct sync_info info;
struct sk_buff *nskb; struct sk_buff *nskb;
int linear_len = 0; int linear_len = 0;
int headln; int headln;
u16 pi;
int i; int i;
sq->stats->tls_ooo++; sq->stats->tls_ooo++;
...@@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, ...@@ -246,9 +244,7 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
sq->stats->tls_resync_bytes += nskb->len; sq->stats->tls_resync_bytes += nskb->len;
mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
cpu_to_be64(info.rcd_sn)); cpu_to_be64(info.rcd_sn));
pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); mlx5e_sq_xmit_simple(sq, nskb, true);
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
mlx5e_sq_xmit(sq, nskb, wqe, pi, true);
return true; return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment