Commit 4f4edcc2 authored by Ariel Levkovich's avatar Ariel Levkovich Committed by Saeed Mahameed

net/mlx5: E-Switch, Add ovs internal port mapping to metadata support

Adding infrastructure to map ovs internal port device to vport
match metadata to support offload of rules with internal port as
the filter device or as the destination device.

The infrastructure allows adding and removing internal port device
to an eswitch database and getting a unique vport metadata value to
be placed and match on in reg_c0 when offloading rules that are coming
from or going to an internal port.

The new int port metadata can be written to the source port register
in HW to indicate that current source port of the packet is the
internal port and not one of the actual HW vports (uplink or VF).
Using this method, it is possible to offload TC rules with an OVS
internal port as their destination port (overwriting the src vport
register) or as the filter port (matching on the value of the src
vport register and making sure it matches to the internal port's
value).

There is also a need to handle a miss case where the packet's
src port value was changed in HW to an internal port but a following
rule which matches on this new src port value wasn't found in HW.

In such case, the packet will be forwarded to the driver with
metadata which allows driver to restore the info of the internal
port's netdevice. Once this info is restored, the uplink driver
can forward the packet to the relevant netdevice in SW.
Signed-off-by: default avatarAriel Levkovich <lariel@nvidia.com>
Reviewed-by: default avatarVlad Buslov <vladbu@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 189ce08e
......@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
esw/indir_table.o en/tc_tun_encap.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
en/tc/post_act.o
en/tc/post_act.o en/tc/int_port.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
......
......@@ -20,6 +20,7 @@
#include "lib/port_tun.h"
#include "en/tc/sample.h"
#include "en_accel/ipsec_rxtx.h"
#include "en/tc/int_port.h"
struct mlx5e_rep_indr_block_priv {
struct net_device *netdev;
......@@ -672,12 +673,43 @@ static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *sk
mlx5_rep_tc_post_napi_receive(tc_priv);
}
static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5_mapped_obj *mapped_obj,
struct mlx5e_tc_update_priv *tc_priv,
bool *forward_tx,
u32 reg_c1)
{
u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
/* Tunnel restore takes precedence over int port restore */
if (tunnel_id)
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
mapped_obj->int_port_metadata, forward_tx)) {
/* Set fwd_dev for future dev_put */
tc_priv->fwd_dev = skb->dev;
return true;
}
return false;
}
void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
struct sk_buff *skb)
{
u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
struct mlx5e_tc_update_priv tc_priv = {};
struct mlx5_mapped_obj mapped_obj;
struct mlx5_eswitch *esw;
bool forward_tx = false;
struct mlx5e_priv *priv;
u32 reg_c0;
int err;
......@@ -702,21 +734,26 @@ void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
}
if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) &&
!mlx5_ipsec_is_rx_flow(cqe))
goto free_skb;
} else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv);
goto free_skb;
} else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) {
if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv,
&forward_tx, reg_c1))
goto free_skb;
} else {
netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
goto free_skb;
}
forward:
napi_gro_receive(rq->cq.napi, skb);
if (forward_tx)
dev_queue_xmit(skb);
else
napi_gro_receive(rq->cq.napi, skb);
mlx5_rep_tc_post_napi_receive(&tc_priv);
......
......@@ -64,7 +64,6 @@ static inline int
mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data) { return -EOPNOTSUPP; }
struct mlx5e_tc_update_priv;
static inline void
mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq,
struct sk_buff *skb) {}
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#ifndef __MLX5_EN_TC_INT_PORT_H__
#define __MLX5_EN_TC_INT_PORT_H__
#include "en.h"
struct mlx5e_tc_int_port;
struct mlx5e_tc_int_port_priv;
enum mlx5e_tc_int_port_type {
MLX5E_TC_INT_PORT_INGRESS,
MLX5E_TC_INT_PORT_EGRESS,
};
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw);
struct mlx5e_tc_int_port_priv *
mlx5e_tc_int_port_init(struct mlx5e_priv *priv);
void
mlx5e_tc_int_port_cleanup(struct mlx5e_tc_int_port_priv *priv);
void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv);
void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv);
bool
mlx5e_tc_int_port_dev_fwd(struct mlx5e_tc_int_port_priv *priv,
struct sk_buff *skb, u32 int_vport_metadata,
bool *forward_tx);
struct mlx5e_tc_int_port *
mlx5e_tc_int_port_get(struct mlx5e_tc_int_port_priv *priv,
int ifindex,
enum mlx5e_tc_int_port_type type);
void
mlx5e_tc_int_port_put(struct mlx5e_tc_int_port_priv *priv,
struct mlx5e_tc_int_port *int_port);
u32 mlx5e_tc_int_port_get_metadata(struct mlx5e_tc_int_port *int_port);
u32 mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port);
int mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port);
#else /* CONFIG_MLX5_CLS_ACT */
static inline u32
mlx5e_tc_int_port_get_metadata_for_match(struct mlx5e_tc_int_port *int_port)
{
return 0;
}
static inline int
mlx5e_tc_int_port_get_flow_source(struct mlx5e_tc_int_port *int_port)
{
return 0;
}
static inline bool mlx5e_tc_int_port_supported(const struct mlx5_eswitch *esw)
{
return false;
}
static inline void mlx5e_tc_int_port_init_rep_rx(struct mlx5e_priv *priv) {}
static inline void mlx5e_tc_int_port_cleanup_rep_rx(struct mlx5e_priv *priv) {}
#endif /* CONFIG_MLX5_CLS_ACT */
#endif /* __MLX5_EN_TC_INT_PORT_H__ */
......@@ -53,6 +53,7 @@
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
#include "en_accel/ipsec.h"
#include "en/tc/int_port.h"
#define MLX5E_REP_PARAMS_DEF_LOG_SQ_SIZE \
max(0x7, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
......@@ -857,12 +858,22 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv)
{
int err;
mlx5e_create_q_counters(priv);
return mlx5e_init_rep_rx(priv);
err = mlx5e_init_rep_rx(priv);
if (err)
goto out;
mlx5e_tc_int_port_init_rep_rx(priv);
out:
return err;
}
static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv)
{
mlx5e_tc_int_port_cleanup_rep_rx(priv);
mlx5e_cleanup_rep_rx(priv);
mlx5e_destroy_q_counters(priv);
}
......
......@@ -58,6 +58,7 @@ struct mlx5e_neigh_update_table {
};
struct mlx5_tc_ct_priv;
struct mlx5_tc_int_port_priv;
struct mlx5e_rep_bond;
struct mlx5e_tc_tun_encap;
struct mlx5e_post_act;
......@@ -98,6 +99,9 @@ struct mlx5_rep_uplink_priv {
/* tc tunneling encapsulation private data */
struct mlx5e_tc_tun_encap *encap;
/* OVS internal port support */
struct mlx5e_tc_int_port_priv *int_port_priv;
};
struct mlx5e_rep_priv {
......
......@@ -5073,6 +5073,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
MLX5_FLOW_NAMESPACE_FDB,
uplink_priv->post_act);
uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
......@@ -5120,6 +5122,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
mapping_destroy(uplink_priv->tunnel_mapping);
err_tun_mapping:
mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
mlx5_tc_ct_clean(uplink_priv->ct_priv);
netdev_warn(priv->netdev,
"Failed to initialize tc (eswitch), err: %d", err);
......@@ -5140,6 +5143,7 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
mapping_destroy(uplink_priv->tunnel_mapping);
mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
mlx5_tc_ct_clean(uplink_priv->ct_priv);
mlx5e_tc_post_act_destroy(uplink_priv->post_act);
}
......
......@@ -38,6 +38,7 @@
#include "eswitch.h"
#include "en/tc_ct.h"
#include "en/tc_tun.h"
#include "en/tc/int_port.h"
#include "en_rep.h"
#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
......@@ -104,6 +105,8 @@ struct mlx5_rx_tun_attr {
#define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16
#define MLX5E_TC_TABLE_CHAIN_TAG_MASK GENMASK(MLX5E_TC_TABLE_CHAIN_TAG_BITS - 1, 0)
#define MLX5E_TC_MAX_INT_PORT_NUM (8)
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
struct tunnel_match_key {
......
......@@ -51,6 +51,7 @@
enum mlx5_mapped_obj_type {
MLX5_MAPPED_OBJ_CHAIN,
MLX5_MAPPED_OBJ_SAMPLE,
MLX5_MAPPED_OBJ_INT_PORT_METADATA,
};
struct mlx5_mapped_obj {
......@@ -63,6 +64,7 @@ struct mlx5_mapped_obj {
u32 trunc_size;
u32 tunnel_id;
} sample;
u32 int_port_metadata;
};
};
......@@ -88,6 +90,7 @@ enum {
MAPPING_TYPE_TUNNEL_ENC_OPTS,
MAPPING_TYPE_LABELS,
MAPPING_TYPE_ZONE,
MAPPING_TYPE_INT_PORT,
};
struct vport_ingress {
......@@ -336,6 +339,9 @@ void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw);
int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
......
......@@ -1857,6 +1857,17 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
atomic64_set(&esw->user_count, 0);
}
static int esw_get_offloads_ft_size(struct mlx5_eswitch *esw)
{
int nvports;
nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
if (mlx5e_tc_int_port_supported(esw))
nvports += MLX5E_TC_MAX_INT_PORT_NUM;
return nvports;
}
static int esw_create_offloads_table(struct mlx5_eswitch *esw)
{
struct mlx5_flow_table_attr ft_attr = {};
......@@ -1871,7 +1882,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
return -EOPNOTSUPP;
}
ft_attr.max_fte = esw->total_vports + MLX5_ESW_MISS_FLOWS;
ft_attr.max_fte = esw_get_offloads_ft_size(esw);
ft_attr.prio = 1;
ft_offloads = mlx5_create_flow_table(ns, &ft_attr);
......@@ -1900,7 +1911,7 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw)
int nvports;
int err = 0;
nvports = esw->total_vports + MLX5_ESW_MISS_FLOWS;
nvports = esw_get_offloads_ft_size(esw);
flow_group_in = kvzalloc(inlen, GFP_KERNEL);
if (!flow_group_in)
return -ENOMEM;
......@@ -2805,7 +2816,8 @@ bool mlx5_esw_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
{
u32 vport_end_ida = (1 << ESW_VPORT_BITS) - 1;
u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 1;
/* Reserve 0xf for internal port offload */
u32 max_pf_num = (1 << ESW_PFNUM_BITS) - 2;
u32 pf_num;
int id;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment