Commit 5be01904 authored by Raed Salem's avatar Raed Salem Committed by Saeed Mahameed

net/mlx5e: IPsec: Add Connect-X IPsec Tx data path offload

In the TX data path, spot packets with xfrm stack IPsec offload
indication.

Fill Software-Parser segment in TX descriptor so that the hardware
may parse the ESP protocol, and perform TX checksum offload on the
inner payload.

Support GSO, by providing the trailer data and ICV placeholder
so HW can fill it post encryption operation.

Padding alignment cannot be performed in HW (ConnectX-6Dx) due to
a bug. Software can overcome this limitation by adding NETIF_F_HW_ESP to
the gso_partial_features field in netdev so the packets being
aligned by the stack.

l4_inner_checksum cannot be offloaded by HW for IPsec tunnel type packet.

Note that for GSO SKBs, the stack does not include an ESP trailer,
unlike the non-GSO case.

Below is the iperf3 performance report on two server of 24 cores
Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz with ConnectX6-DX.
All the bandwidth test uses iperf3 TCP traffic with packet size 128KB.
Each tunnel uses one iperf3 stream with one thread (option -P1).
TX crypto offload shows improvements on both bandwidth
and CPU utilization.

----------------------------------------------------------------------
Mode            |  Num tunnel | BW     | Send CPU util | Recv CPU util
                |             | (Gbps) | (Average %)   | (Average %)
----------------------------------------------------------------------
Cryto offload   |             |        |               |
(RX only)       | 1           | 4.7    | 4.2           | 3.5
----------------------------------------------------------------------
Cryto offload   |             |        |               |
(RX only)       | 24          | 15.6   | 20            | 10
----------------------------------------------------------------------
Non-offload     | 1           | 4.6    | 4             | 5
----------------------------------------------------------------------
Non-offload     | 24          | 11.9   | 16            | 12
----------------------------------------------------------------------
Cryto offload   |             |        |               |
(TX & RX)       | 1           | 11.9   | 2.1           | 5.9
----------------------------------------------------------------------
Cryto offload   |             |        |               |
(TX & RX)       | 24          | 38     | 9.5           | 27.5
----------------------------------------------------------------------
Cryto offload   |             |        |               |
(TX only)       | 1           | 4.7    | 0.7           | 5
----------------------------------------------------------------------
Cryto offload   |             |        |               |
(TX only)       | 24          | 14.5   | 6             | 20

Regression tests show no degradation on non-ipsec and
non-offload-ipsec traffics. The packet rate test uses pktgen UDP to
transmit on single CPU, the instructions and cycles are measured on
the transmit CPU.

before:
----------------------------------------------------------------------
Non-offload             | 1           | 4.7    | 4.2           | 5.1
----------------------------------------------------------------------
Non-offload             | 24          | 11.2   | 14            | 15
----------------------------------------------------------------------
Non-ipsec               | 1           | 28     | 4             | 5.7
----------------------------------------------------------------------
Non-ipsec               | 24          | 68.3   | 17.8          | 39.7
----------------------------------------------------------------------
Non-ipsec packet rate(BURST=1000 BC=5 NCPUS=1 SIZE=60)
13.56Mpps, 456 instructions/pkt, 191 cycles/pkt

after:
----------------------------------------------------------------------
Non-offload             | 1           | 4.69    | 4.2          | 5
----------------------------------------------------------------------
Non-offload             | 24          | 11.9   | 13.5          | 15.1
----------------------------------------------------------------------
Non-ipsec               | 1           | 29     | 3.2           | 5.5
----------------------------------------------------------------------
Non-ipsec               | 24          | 68.2   | 18.5          | 39.8
----------------------------------------------------------------------
Non-ipsec packet rate: 13.56Mpps, 472 instructions/pkt, 191 cycles/pkt
Signed-off-by: default avatarRaed Salem <raeds@mellanox.com>
Signed-off-by: default avatarHuy Nguyen <huyn@mellanox.com>
Reviewed-by: default avatarMaxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: default avatarTariq Toukan <tariqt@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 9b9d454d
......@@ -107,6 +107,9 @@ struct mlx5e_accel_tx_state {
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_accel_tx_tls_state tls;
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
struct mlx5e_accel_tx_ipsec_state ipsec;
#endif
};
static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
......@@ -125,22 +128,46 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
}
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) {
if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec)))
return false;
}
#endif
return true;
}
static inline bool mlx5e_accel_tx_is_ipsec_flow(struct mlx5e_accel_tx_state *state)
{
#ifdef CONFIG_MLX5_EN_IPSEC
return mlx5e_ipsec_is_tx_flow(&state->ipsec);
#endif
return false;
}
static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq,
struct mlx5e_accel_tx_state *state)
{
#ifdef CONFIG_MLX5_EN_IPSEC
if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state))
return mlx5e_ipsec_tx_ids_len(&state->ipsec);
#endif
return 0;
}
/* Part of the eseg touched by TX offloads */
#define MLX5E_ACCEL_ESEG_LEN offsetof(struct mlx5_wqe_eth_seg, mss)
static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
struct mlx5e_txqsq *sq,
struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
#ifdef CONFIG_MLX5_EN_IPSEC
if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, eseg, skb)))
return false;
}
if (xfrm_offload(skb))
mlx5e_ipsec_tx_build_eseg(priv, skb, eseg);
#endif
#if IS_ENABLED(CONFIG_GENEVE)
......@@ -153,11 +180,18 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_state *state)
struct mlx5e_accel_tx_state *state,
struct mlx5_wqe_inline_seg *inlseg)
{
#ifdef CONFIG_MLX5_EN_TLS
mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
#endif
#ifdef CONFIG_MLX5_EN_IPSEC
if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) &&
state->ipsec.xo && state->ipsec.tailen)
mlx5e_ipsec_handle_tx_wqe(wqe, &state->ipsec, inlseg);
#endif
}
static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
......
......@@ -560,6 +560,9 @@ void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
return;
}
if (mlx5_is_ipsec_device(mdev))
netdev->gso_partial_features |= NETIF_F_GSO_ESP;
mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
netdev->features |= NETIF_F_GSO_ESP;
netdev->hw_features |= NETIF_F_GSO_ESP;
......
......@@ -34,7 +34,7 @@
#include <crypto/aead.h>
#include <net/xfrm.h>
#include <net/esp.h>
#include "accel/ipsec_offload.h"
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/ipsec.h"
#include "accel/accel.h"
......@@ -233,18 +233,94 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
ntohs(mdata->content.tx.seq));
}
bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
struct mlx5_wqe_eth_seg *eseg,
struct sk_buff *skb)
void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_ipsec_state *ipsec_st,
struct mlx5_wqe_inline_seg *inlseg)
{
inlseg->byte_count = cpu_to_be32(ipsec_st->tailen | MLX5_INLINE_SEG);
esp_output_fill_trailer((u8 *)inlseg->data, 0, ipsec_st->plen, ipsec_st->xo->proto);
}
static int mlx5e_ipsec_set_state(struct mlx5e_priv *priv,
struct sk_buff *skb,
struct xfrm_state *x,
struct xfrm_offload *xo,
struct mlx5e_accel_tx_ipsec_state *ipsec_st)
{
unsigned int blksize, clen, alen, plen;
struct crypto_aead *aead;
unsigned int tailen;
ipsec_st->x = x;
ipsec_st->xo = xo;
if (mlx5_is_ipsec_device(priv->mdev)) {
aead = x->data;
alen = crypto_aead_authsize(aead);
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2, blksize);
plen = max_t(u32, clen - skb->len, 4);
tailen = plen + alen;
ipsec_st->plen = plen;
ipsec_st->tailen = tailen;
}
return 0;
}
void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
struct xfrm_offload *xo = xfrm_offload(skb);
struct mlx5e_ipsec_metadata *mdata;
struct mlx5e_ipsec_sa_entry *sa_entry;
struct xfrm_encap_tmpl *encap;
struct xfrm_state *x;
struct sec_path *sp;
u8 l3_proto;
if (!xo)
return true;
sp = skb_sec_path(skb);
if (unlikely(sp->len != 1))
return;
x = xfrm_input_state(skb);
if (unlikely(!x))
return;
if (unlikely(!x->xso.offload_handle ||
(skb->protocol != htons(ETH_P_IP) &&
skb->protocol != htons(ETH_P_IPV6))))
return;
mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
l3_proto = (x->props.family == AF_INET) ?
((struct iphdr *)skb_network_header(skb))->protocol :
((struct ipv6hdr *)skb_network_header(skb))->nexthdr;
if (mlx5_is_ipsec_device(priv->mdev)) {
eseg->flow_table_metadata |= cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC);
eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER);
encap = x->encap;
if (!encap) {
eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC) :
cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC);
} else if (encap->encap_type == UDP_ENCAP_ESPINUDP) {
eseg->trailer |= (l3_proto == IPPROTO_ESP) ?
cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC) :
cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC);
}
}
}
bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5e_accel_tx_ipsec_state *ipsec_st)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
struct xfrm_offload *xo = xfrm_offload(skb);
struct mlx5e_ipsec_sa_entry *sa_entry;
struct mlx5e_ipsec_metadata *mdata;
struct xfrm_state *x;
struct sec_path *sp;
sp = skb_sec_path(skb);
if (unlikely(sp->len != 1)) {
......@@ -270,16 +346,22 @@ bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_trailer);
goto drop;
}
if (MLX5_CAP_GEN(priv->mdev, fpga)) {
mdata = mlx5e_ipsec_add_metadata(skb);
if (IS_ERR(mdata)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
goto drop;
}
mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
}
sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
sa_entry->set_iv_op(skb, x, xo);
if (MLX5_CAP_GEN(priv->mdev, fpga))
mlx5e_ipsec_set_metadata(skb, mdata, xo);
mlx5e_ipsec_set_state(priv, skb, x, xo, ipsec_st);
return true;
drop:
......
......@@ -43,6 +43,13 @@
#define MLX5_IPSEC_METADATA_SYNDROM_MASK (0x7F)
#define MLX5_IPSEC_METADATA_HANDLE(metadata) (((metadata) >> 8) & 0xFF)
struct mlx5e_accel_tx_ipsec_state {
struct xfrm_offload *xo;
struct xfrm_state *x;
u32 tailen;
u32 plen;
};
#ifdef CONFIG_MLX5_EN_IPSEC
struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev,
......@@ -55,16 +62,32 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
struct xfrm_offload *xo);
bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
struct mlx5_wqe_eth_seg *eseg,
struct sk_buff *skb);
bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5e_accel_tx_ipsec_state *ipsec_st);
void mlx5e_ipsec_handle_tx_wqe(struct mlx5e_tx_wqe *wqe,
struct mlx5e_accel_tx_ipsec_state *ipsec_st,
struct mlx5_wqe_inline_seg *inlseg);
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
struct sk_buff *skb,
struct mlx5_cqe64 *cqe);
static inline unsigned int mlx5e_ipsec_tx_ids_len(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
{
return ipsec_st->tailen;
}
static inline bool mlx5_ipsec_is_rx_flow(struct mlx5_cqe64 *cqe)
{
return !!(MLX5_IPSEC_METADATA_MARKER_MASK & be32_to_cpu(cqe->ft_metadata));
}
static inline bool mlx5e_ipsec_is_tx_flow(struct mlx5e_accel_tx_ipsec_state *ipsec_st)
{
return ipsec_st->x;
}
void mlx5e_ipsec_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg);
#else
static inline
void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
......
......@@ -144,9 +144,29 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
}
/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
static void
ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_wqe_eth_seg *eseg)
{
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
if (skb->encapsulation) {
eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
sq->stats->csum_partial_inner++;
} else {
eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
sq->stats->csum_partial++;
}
}
static inline void
mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
return;
}
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
if (skb->encapsulation) {
......@@ -237,12 +257,14 @@ struct mlx5e_tx_attr {
u16 headlen;
u16 ihs;
__be16 mss;
u16 insz;
u8 opcode;
};
struct mlx5e_tx_wqe_attr {
u16 ds_cnt;
u16 ds_cnt_inl;
u16 ds_cnt_ids;
u8 num_wqebbs;
};
......@@ -299,6 +321,7 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->packets++;
}
attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
stats->bytes += attr->num_bytes;
}
......@@ -307,9 +330,13 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
{
u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
u16 ds_cnt_inl = 0;
u16 ds_cnt_ids = 0;
ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
if (attr->insz)
ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
MLX5_SEND_WQE_DS);
ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
if (attr->ihs) {
u16 inl = attr->ihs - INL_HDR_START_SZ;
......@@ -323,6 +350,7 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
*wqe_attr = (struct mlx5e_tx_wqe_attr) {
.ds_cnt = ds_cnt,
.ds_cnt_inl = ds_cnt_inl,
.ds_cnt_ids = ds_cnt_ids,
.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
};
}
......@@ -398,11 +426,11 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
if (attr->ihs) {
if (skb_vlan_tag_present(skb)) {
eseg->inline_hdr.sz = cpu_to_be16(attr->ihs + VLAN_HLEN);
eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
stats->added_vlan_packets++;
} else {
eseg->inline_hdr.sz = cpu_to_be16(attr->ihs);
eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
}
dseg += wqe_attr->ds_cnt_inl;
......@@ -414,6 +442,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->added_vlan_packets++;
}
dseg += wqe_attr->ds_cnt_ids;
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
attr->headlen, dseg);
if (unlikely(num_dma < 0))
......@@ -430,7 +459,8 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
{
return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs;
return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
!attr->insz;
}
static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
......@@ -580,7 +610,7 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
if (unlikely(!mlx5e_accel_tx_eseg(priv, sq, skb, eseg)))
if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
return false;
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
......@@ -625,7 +655,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
/* May update the WQE, but may not post other WQEs. */
mlx5e_accel_tx_finish(sq, wqe, &accel);
mlx5e_accel_tx_finish(sq, wqe, &accel,
(struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
return NETDEV_TX_OK;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment