Commit f97d139a authored by David S. Miller's avatar David S. Miller

Merge tag 'mlx5-updates-2019-11-12' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2019-11-12

1) Merge mlx5-next for devlink reload and flowtable offloads dependencies
2) Devlink reload support
3) TC Flowtable offloads
4) Misc cleanup
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d0db136f 84179981
......@@ -154,6 +154,27 @@ User command examples:
values:
cmode runtime value smfs
enable_roce: RoCE enablement state
----------------------------------
RoCE enablement state controls driver support for RoCE traffic.
When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well known UDP RoCE port is handled as raw ethernet traffic.
To change RoCE enablement state a user must change the driverinit cmode value and run devlink reload.
User command examples:
- Disable RoCE::
$ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit
$ devlink dev reload pci/0000:06:00.0
- Read RoCE enablement state::
$ devlink dev param show pci/0000:06:00.0 name enable_roce
pci/0000:06:00.0:
name enable_roce type generic
values:
cmode driverinit value true
Devlink health reporters
========================
......
flow_steering_mode [DEVICE, DRIVER-SPECIFIC]
Controls the flow steering mode of the driver.
Two modes are supported:
1. 'dmfs' - Device managed flow steering.
2. 'smfs - Software/Driver managed flow steering.
In DMFS mode, the HW steering entities are created and
managed through the Firmware.
In SMFS mode, the HW steering entities are created and
managed though by the driver directly into Hardware
without firmware intervention.
Type: String
Configuration mode: runtime
enable_roce [DEVICE, GENERIC]
Enable handling of RoCE traffic in the device.
Defaultly enabled.
Configuration mode: driverinit
......@@ -65,3 +65,7 @@ reset_dev_on_drv_probe [DEVICE, GENERIC]
Reset only if device firmware can be found in the
filesystem.
Type: u8
enable_roce [DEVICE, GENERIC]
Enable handling of RoCE traffic in the device.
Type: Boolean
......@@ -35,7 +35,7 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
int vport_index;
if (rep->vport == MLX5_VPORT_UPLINK)
profile = &uplink_rep_profile;
profile = &raw_eth_profile;
else
return mlx5_ib_set_vport_rep(dev, rep);
......
......@@ -10,7 +10,7 @@
#include "mlx5_ib.h"
#ifdef CONFIG_MLX5_ESWITCH
extern const struct mlx5_ib_profile uplink_rep_profile;
extern const struct mlx5_ib_profile raw_eth_profile;
u8 mlx5_ib_eswitch_mode(struct mlx5_eswitch *esw);
struct mlx5_ib_dev *mlx5_ib_get_rep_ibdev(struct mlx5_eswitch *esw,
......
......@@ -1031,7 +1031,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
if (!mlx5_core_is_pf(mdev))
if (mlx5_core_is_vf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
......@@ -5145,8 +5145,7 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
......@@ -5249,11 +5248,9 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
if (MLX5_CAP_GEN(dev->mdev, roce)) {
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
return err;
}
err = mlx5_nic_vport_enable_roce(dev->mdev);
if (err)
return err;
err = mlx5_eth_lag_init(dev);
if (err)
......@@ -5262,8 +5259,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
return 0;
err_disable_roce:
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
mlx5_nic_vport_disable_roce(dev->mdev);
return err;
}
......@@ -5271,8 +5267,7 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
mlx5_eth_lag_cleanup(dev);
if (MLX5_CAP_GEN(dev->mdev, roce))
mlx5_nic_vport_disable_roce(dev->mdev);
mlx5_nic_vport_disable_roce(dev->mdev);
}
struct mlx5_ib_counter {
......@@ -6444,7 +6439,7 @@ static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
.query_port = mlx5_ib_rep_query_port,
};
static int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev)
{
ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
return 0;
......@@ -6484,7 +6479,7 @@ static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev)
mlx5_remove_netdev_notifier(dev, port_num);
}
static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
......@@ -6500,7 +6495,7 @@ static int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
return err;
}
static void mlx5_ib_stage_rep_roce_cleanup(struct mlx5_ib_dev *dev)
static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_stage_common_roce_cleanup(dev);
}
......@@ -6807,7 +6802,7 @@ static const struct mlx5_ib_profile pf_profile = {
mlx5_ib_stage_delay_drop_cleanup),
};
const struct mlx5_ib_profile uplink_rep_profile = {
const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
mlx5_ib_stage_init_init,
mlx5_ib_stage_init_cleanup),
......@@ -6818,11 +6813,11 @@ const struct mlx5_ib_profile uplink_rep_profile = {
mlx5_ib_stage_caps_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
mlx5_ib_stage_rep_non_default_cb,
mlx5_ib_stage_raw_eth_non_default_cb,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
mlx5_ib_stage_raw_eth_roce_init,
mlx5_ib_stage_raw_eth_roce_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_SRQ,
mlx5_init_srq_table,
mlx5_cleanup_srq_table),
......@@ -6898,6 +6893,7 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
const struct mlx5_ib_profile *profile;
enum rdma_link_layer ll;
struct mlx5_ib_dev *dev;
int port_type_cap;
......@@ -6933,7 +6929,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
dev->num_ports = num_ports;
return __mlx5_ib_add(dev, &pf_profile);
if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_enabled(mdev))
profile = &raw_eth_profile;
else
profile = &pf_profile;
return __mlx5_ib_add(dev, profile);
}
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
......
......@@ -85,6 +85,22 @@ mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
return 0;
}
static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
return mlx5_unload_one(dev, false);
}
static int mlx5_devlink_reload_up(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
return mlx5_load_one(dev, false);
}
static const struct devlink_ops mlx5_devlink_ops = {
#ifdef CONFIG_MLX5_ESWITCH
.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
......@@ -96,6 +112,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
#endif
.flash_update = mlx5_devlink_flash_update,
.info_get = mlx5_devlink_info_get,
.reload_down = mlx5_devlink_reload_down,
.reload_up = mlx5_devlink_reload_up,
};
struct devlink *mlx5_devlink_alloc(void)
......@@ -177,12 +195,29 @@ enum mlx5_devlink_param_id {
MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
};
static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
union devlink_param_value val,
struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
bool new_state = val.vbool;
if (new_state && !MLX5_CAP_GEN(dev, roce)) {
NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
return -EOPNOTSUPP;
}
return 0;
}
static const struct devlink_param mlx5_devlink_params[] = {
DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
"flow_steering_mode", DEVLINK_PARAM_TYPE_STRING,
BIT(DEVLINK_PARAM_CMODE_RUNTIME),
mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set,
mlx5_devlink_fs_mode_validate),
DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, mlx5_devlink_enable_roce_validate),
};
static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
......@@ -197,6 +232,11 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
devlink_param_driverinit_value_set(devlink,
MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE,
value);
value.vbool = MLX5_CAP_GEN(dev, roce);
devlink_param_driverinit_value_set(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
value);
}
int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
......@@ -213,6 +253,7 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
goto params_reg_err;
mlx5_devlink_set_params_init_values(devlink);
devlink_params_publish(devlink);
devlink_reload_enable(devlink);
return 0;
params_reg_err:
......@@ -222,6 +263,7 @@ int mlx5_devlink_register(struct devlink *devlink, struct device *dev)
void mlx5_devlink_unregister(struct devlink *devlink)
{
devlink_reload_disable(devlink);
devlink_params_unregister(devlink, mlx5_devlink_params,
ARRAY_SIZE(mlx5_devlink_params));
devlink_unregister(devlink);
......
......@@ -77,8 +77,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
struct neighbour **out_n,
u8 *out_ttl)
{
struct neighbour *n;
struct rtable *rt;
struct neighbour *n = NULL;
#if IS_ENABLED(CONFIG_INET)
struct mlx5_core_dev *mdev = priv->mdev;
......@@ -138,8 +138,8 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
struct neighbour **out_n,
u8 *out_ttl)
{
struct neighbour *n = NULL;
struct dst_entry *dst;
struct neighbour *n;
#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
int ret;
......@@ -212,8 +212,8 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi4 fl4 = {};
struct neighbour *n;
int ipv4_encap_size;
char *encap_header;
u8 nud_state, ttl;
......@@ -239,12 +239,15 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
if (max_encap_size < ipv4_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv4_encap_size, max_encap_size);
return -EOPNOTSUPP;
err = -EOPNOTSUPP;
goto out;
}
encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
if (!encap_header)
return -ENOMEM;
if (!encap_header) {
err = -ENOMEM;
goto out;
}
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
......@@ -328,9 +331,9 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
const struct ip_tunnel_key *tun_key = &e->tun_info->key;
struct net_device *out_dev, *route_dev;
struct neighbour *n = NULL;
struct flowi6 fl6 = {};
struct ipv6hdr *ip6h;
struct neighbour *n;
int ipv6_encap_size;
char *encap_header;
u8 nud_state, ttl;
......@@ -355,12 +358,15 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
if (max_encap_size < ipv6_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv6_encap_size, max_encap_size);
return -EOPNOTSUPP;
err = -EOPNOTSUPP;
goto out;
}
encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
if (!encap_header)
return -ENOMEM;
if (!encap_header) {
err = -ENOMEM;
goto out;
}
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
......
......@@ -63,6 +63,7 @@
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
#include "en/hv_vhca_stats.h"
#include "lib/mlx5.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
......@@ -5427,6 +5428,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
return NULL;
}
dev_net_set(netdev, mlx5_core_net(mdev));
priv = netdev_priv(netdev);
err = mlx5e_attach(mdev, priv);
......
......@@ -47,6 +47,7 @@
#include "en/tc_tun.h"
#include "fs_core.h"
#include "lib/port_tun.h"
#include "lib/mlx5.h"
#define CREATE_TRACE_POINTS
#include "diag/en_rep_tracepoint.h"
......@@ -1243,21 +1244,60 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
}
}
static LIST_HEAD(mlx5e_rep_block_cb_list);
static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
struct flow_cls_offload *f = type_data;
struct flow_cls_offload cls_flower;
struct mlx5e_priv *priv = cb_priv;
struct mlx5_eswitch *esw;
unsigned long flags;
int err;
flags = MLX5_TC_FLAG(INGRESS) |
MLX5_TC_FLAG(ESW_OFFLOAD) |
MLX5_TC_FLAG(FT_OFFLOAD);
esw = priv->mdev->priv.eswitch;
switch (type) {
case TC_SETUP_CLSFLOWER:
if (!mlx5_eswitch_prios_supported(esw) || f->common.chain_index)
return -EOPNOTSUPP;
/* Re-use tc offload path by moving the ft flow to the
* reserved ft chain.
*/
memcpy(&cls_flower, f, sizeof(*f));
cls_flower.common.chain_index = FDB_FT_CHAIN;
err = mlx5e_rep_setup_tc_cls_flower(priv, &cls_flower, flags);
memcpy(&f->stats, &cls_flower.stats, sizeof(f->stats));
return err;
default:
return -EOPNOTSUPP;
}
}
static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct flow_block_offload *f = type_data;
f->unlocked_driver_cb = true;
switch (type) {
case TC_SETUP_BLOCK:
f->unlocked_driver_cb = true;
return flow_block_cb_setup_simple(type_data,
&mlx5e_rep_block_cb_list,
&mlx5e_rep_block_tc_cb_list,
mlx5e_rep_setup_tc_cb,
priv, priv, true);
case TC_SETUP_FT:
return flow_block_cb_setup_simple(type_data,
&mlx5e_rep_block_ft_cb_list,
mlx5e_rep_setup_ft_cb,
priv, priv, true);
default:
return -EOPNOTSUPP;
}
......@@ -1877,6 +1917,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
return -EINVAL;
}
dev_net_set(netdev, mlx5_core_net(dev));
rpriv->netdev = netdev;
rep->rep_data[REP_ETH].priv = rpriv;
INIT_LIST_HEAD(&rpriv->vport_sqs_list);
......
......@@ -74,6 +74,7 @@ enum {
MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT,
MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT,
MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
MLX5E_TC_FLOW_FLAG_FT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE,
MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1,
......@@ -276,6 +277,11 @@ static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
return flow_flag_test(flow, ESWITCH);
}
static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, FT);
}
static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
{
return flow_flag_test(flow, OFFLOADED);
......@@ -1074,7 +1080,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN;
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
if (!IS_ERR(rule))
......@@ -1091,7 +1097,7 @@ mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
slow_attr->split_count = 0;
slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
slow_attr->dest_chain = FDB_TC_SLOW_PATH_CHAIN;
mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
flow_flag_clear(flow, SLOW);
}
......@@ -1168,7 +1174,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
}
if (attr->chain > max_chain) {
/* We check chain range only for tc flows.
* For ft flows, we checked attr->chain was originally 0 and set it to
* FDB_FT_CHAIN which is outside tc range.
* See mlx5e_rep_setup_ft_cb().
*/
if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
return -EOPNOTSUPP;
}
......@@ -3217,6 +3228,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
const struct ip_tunnel_info *info = NULL;
bool ft_flow = mlx5e_is_ft_flow(flow);
const struct flow_action_entry *act;
bool encap = false;
u32 action = 0;
......@@ -3261,6 +3273,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
return -EINVAL;
}
if (ft_flow && out_dev == priv->netdev) {
/* Ignore forward to self rules generated
* by adding both mlx5 devs to the flow table
* block on a normal nft offload setup.
*/
return -EOPNOTSUPP;
}
if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
NL_SET_ERR_MSG_MOD(extack,
"can't support more output ports, can't offload forwarding");
......@@ -3385,6 +3405,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
u32 dest_chain = act->chain_index;
u32 max_chain = mlx5_eswitch_get_chain_range(esw);
if (ft_flow) {
NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
return -EOPNOTSUPP;
}
if (dest_chain <= attr->chain) {
NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
return -EOPNOTSUPP;
......@@ -3475,6 +3499,8 @@ static void get_flags(int flags, unsigned long *flow_flags)
__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
__flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
*flow_flags = __flow_flags;
}
......
......@@ -44,7 +44,8 @@ enum {
MLX5E_TC_FLAG_EGRESS_BIT,
MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
MLX5E_TC_FLAG_LAST_EXPORTED_BIT = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
};
#define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
......
......@@ -43,6 +43,16 @@
#include <linux/mlx5/fs.h>
#include "lib/mpfs.h"
#define FDB_TC_MAX_CHAIN 3
#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1)
#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1)
/* The index of the last real chain (FT) + 1 as chain zero is valid as well */
#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1)
#define FDB_TC_MAX_PRIO 16
#define FDB_TC_LEVELS_PER_PRIO 2
#ifdef CONFIG_MLX5_ESWITCH
#define MLX5_MAX_UC_PER_VPORT(dev) \
......@@ -59,21 +69,22 @@
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
#define FDB_MAX_CHAIN 3
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
#define FDB_MAX_PRIO 16
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_group *allow_untagged_spoofchk_grp;
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
struct mlx5_modify_hdr *modify_metadata;
struct mlx5_flow_handle *modify_metadata_rule;
struct mlx5_flow_handle *allow_rule;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
struct mlx5_flow_handle *allow_rule;
struct {
struct mlx5_flow_group *allow_spoofchk_only_grp;
struct mlx5_flow_group *allow_untagged_spoofchk_grp;
struct mlx5_flow_group *allow_untagged_only_grp;
struct mlx5_flow_group *drop_grp;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
} legacy;
struct {
struct mlx5_flow_group *metadata_grp;
struct mlx5_modify_hdr *modify_metadata;
struct mlx5_flow_handle *modify_metadata_rule;
} offloads;
};
struct vport_egress {
......@@ -81,8 +92,10 @@ struct vport_egress {
struct mlx5_flow_group *allowed_vlans_grp;
struct mlx5_flow_group *drop_grp;
struct mlx5_flow_handle *allowed_vlan;
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
struct {
struct mlx5_flow_handle *drop_rule;
struct mlx5_fc *drop_counter;
} legacy;
};
struct mlx5_vport_drop_stats {
......@@ -139,7 +152,6 @@ enum offloads_fdb_flags {
extern const unsigned int ESW_POOLS[4];
#define PRIO_LEVELS 2
struct mlx5_eswitch_fdb {
union {
struct legacy_fdb {
......@@ -166,7 +178,7 @@ struct mlx5_eswitch_fdb {
struct {
struct mlx5_flow_table *fdb;
u32 num_rules;
} fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS];
} fdb_prio[FDB_NUM_CHAINS][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO];
/* Protects fdb_prio table */
struct mutex fdb_prio_lock;
......@@ -217,8 +229,8 @@ enum {
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
struct mlx5_nb nb;
/* legacy data structures */
struct mlx5_eswitch_fdb fdb_table;
/* legacy data structures */
struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE];
struct esw_mc_addr mc_promisc;
/* end of legacy */
......@@ -251,18 +263,16 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
int esw_offloads_init_reps(struct mlx5_eswitch *esw);
void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
int table_size);
void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport);
void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
u32 rate_mbps);
......@@ -292,9 +302,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
struct ifla_vf_stats *vf_stats);
void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
bool other_vport,
void *in, int inlen);
int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport,
int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
bool other_vport,
void *out, int outlen);
struct mlx5_flow_spec;
......@@ -421,6 +433,10 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
u16 vport, u16 vlan, u8 qos, u8 set_flags);
int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
u16 vlan_id, u32 flow_action);
static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
u8 vlan_depth)
{
......@@ -459,6 +475,12 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev)
MLX5_VPORT_ECPF : MLX5_VPORT_PF;
}
static inline bool
mlx5_esw_is_manager_vport(const struct mlx5_eswitch *esw, u16 vport_num)
{
return esw->manager_vport == vport_num;
}
static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev)
{
return mlx5_core_is_ecpf_esw_manager(dev) ?
......@@ -593,11 +615,18 @@ bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num);
void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs);
int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data);
void
int
mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
enum mlx5_eswitch_vport_event enabled_events);
void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw);
int
esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
void
esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
struct mlx5_vport *vport);
#else /* CONFIG_MLX5_ESWITCH */
/* eswitch API stubs */
static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; }
......@@ -613,10 +642,6 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {}
#define FDB_MAX_CHAIN 1
#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
#define FDB_MAX_PRIO 1
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
......@@ -2400,9 +2400,17 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level)
int acc_level_ns = acc_level;
prio->start_level = acc_level;
fs_for_each_ns(ns, prio)
fs_for_each_ns(ns, prio) {
/* This updates start_level and num_levels of ns's priority descendants */
acc_level_ns = set_prio_attrs_in_ns(ns, acc_level);
/* If this a prio with chains, and we can jump from one chain
* (namepsace) to another, so we accumulate the levels
*/
if (prio->node.type == FS_TYPE_PRIO_CHAINS)
acc_level = acc_level_ns;
}
if (!prio->num_levels)
prio->num_levels = acc_level_ns - prio->start_level;
WARN_ON(prio->num_levels < acc_level_ns - prio->start_level);
......@@ -2591,58 +2599,109 @@ static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
steering->rdma_rx_root_ns = NULL;
return err;
}
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
/* FT and tc chains are stored in the same array so we can re-use the
* mlx5_get_fdb_sub_ns() and tc api for FT chains.
* When creating a new ns for each chain store it in the first available slot.
* Assume tc chains are created and stored first and only then the FT chain.
*/
static void store_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
struct mlx5_flow_namespace *ns)
{
int chain = 0;
while (steering->fdb_sub_ns[chain])
++chain;
steering->fdb_sub_ns[chain] = ns;
}
static int create_fdb_sub_ns_prio_chain(struct mlx5_flow_steering *steering,
struct fs_prio *maj_prio)
{
struct mlx5_flow_namespace *ns;
struct fs_prio *maj_prio;
struct fs_prio *min_prio;
int prio;
ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
if (IS_ERR(ns))
return PTR_ERR(ns);
for (prio = 0; prio < FDB_TC_MAX_PRIO; prio++) {
min_prio = fs_create_prio(ns, prio, FDB_TC_LEVELS_PER_PRIO);
if (IS_ERR(min_prio))
return PTR_ERR(min_prio);
}
store_fdb_sub_ns_prio_chain(steering, ns);
return 0;
}
static int create_fdb_chains(struct mlx5_flow_steering *steering,
int fs_prio,
int chains)
{
struct fs_prio *maj_prio;
int levels;
int chain;
int prio;
int err;
steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
if (!steering->fdb_root_ns)
return -ENOMEM;
levels = FDB_TC_LEVELS_PER_PRIO * FDB_TC_MAX_PRIO * chains;
maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
fs_prio,
levels);
if (IS_ERR(maj_prio))
return PTR_ERR(maj_prio);
for (chain = 0; chain < chains; chain++) {
err = create_fdb_sub_ns_prio_chain(steering, maj_prio);
if (err)
return err;
}
return 0;
}
steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
(FDB_MAX_CHAIN + 1), GFP_KERNEL);
static int create_fdb_fast_path(struct mlx5_flow_steering *steering)
{
int err;
steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS,
sizeof(*steering->fdb_sub_ns),
GFP_KERNEL);
if (!steering->fdb_sub_ns)
return -ENOMEM;
err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1);
if (err)
return err;
err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1);
if (err)
return err;
return 0;
}
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
struct fs_prio *maj_prio;
int err;
steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
if (!steering->fdb_root_ns)
return -ENOMEM;
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BYPASS_PATH,
1);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
goto out_err;
}
levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns,
FDB_FAST_PATH,
levels);
if (IS_ERR(maj_prio)) {
err = PTR_ERR(maj_prio);
err = create_fdb_fast_path(steering);
if (err)
goto out_err;
}
for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
ns = fs_create_namespace(maj_prio, MLX5_FLOW_TABLE_MISS_ACTION_DEF);
if (IS_ERR(ns)) {
err = PTR_ERR(ns);
goto out_err;
}
for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
min_prio = fs_create_prio(ns, prio, 2);
if (IS_ERR(min_prio)) {
err = PTR_ERR(min_prio);
goto out_err;
}
}
steering->fdb_sub_ns[chain] = ns;
}
maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_SLOW_PATH, 1);
if (IS_ERR(maj_prio)) {
......
......@@ -84,4 +84,9 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev,
void *key, u32 sz_bytes, u32 *p_key_id);
void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id);
static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev)
{
return devlink_net(priv_to_devlink(dev));
}
#endif
......@@ -1168,7 +1168,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
mlx5_put_uars_page(dev, dev->priv.uar);
}
static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
{
int err = 0;
......@@ -1226,7 +1226,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
return err;
}
static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
{
if (cleanup) {
mlx5_unregister_device(dev);
......
......@@ -243,4 +243,7 @@ enum {
u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup);
int mlx5_load_one(struct mlx5_core_dev *dev, bool boot);
#endif /* __MLX5_CORE_H__ */
......@@ -108,10 +108,10 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
return 0;
}
static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev, bool clear_vf)
static void
mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
int num_vfs = pci_num_vf(dev->pdev);
int err;
int vf;
......@@ -147,7 +147,7 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
mlx5_device_disable_sriov(dev, true);
mlx5_device_disable_sriov(dev, num_vfs, true);
}
return err;
}
......@@ -155,9 +155,10 @@ static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
static void mlx5_sriov_disable(struct pci_dev *pdev)
{
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int num_vfs = pci_num_vf(dev->pdev);
pci_disable_sriov(pdev);
mlx5_device_disable_sriov(dev, true);
mlx5_device_disable_sriov(dev, num_vfs, true);
}
int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
......@@ -192,7 +193,7 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev)
if (!mlx5_core_is_pf(dev))
return;
mlx5_device_disable_sriov(dev, false);
mlx5_device_disable_sriov(dev, pci_num_vf(dev->pdev), false);
}
static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
......
......@@ -154,7 +154,7 @@ int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher,
nic_matcher->num_of_builders =
nic_matcher->num_of_builders_arr[outer_ipv][inner_ipv];
if (!nic_matcher->ste_builder) {
if (!nic_matcher->num_of_builders) {
mlx5dr_dbg(matcher->tbl->dmn,
"Rule not supported on this matcher due to IP related fields\n");
return -EINVAL;
......
......@@ -1121,6 +1121,11 @@ static inline bool mlx5_core_is_pf(const struct mlx5_core_dev *dev)
return dev->coredev_type == MLX5_COREDEV_PF;
}
static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev)
{
return dev->coredev_type == MLX5_COREDEV_VF;
}
static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev)
{
return dev->caps.embedded_cpu;
......@@ -1186,4 +1191,15 @@ enum {
MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
};
static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev)
{
struct devlink *devlink = priv_to_devlink(dev);
union devlink_param_value val;
devlink_param_driverinit_value_get(devlink,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
&val);
return val.vbool;
}
#endif /* MLX5_DRIVER_H */
......@@ -80,7 +80,8 @@ enum mlx5_flow_namespace_type {
enum {
FDB_BYPASS_PATH,
FDB_FAST_PATH,
FDB_TC_OFFLOAD,
FDB_FT_OFFLOAD,
FDB_SLOW_PATH,
};
......
......@@ -1153,7 +1153,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_srq[0x5];
u8 reserved_at_b0[0x10];
u8 reserved_at_c0[0x8];
u8 max_sgl_for_optimized_performance[0x8];
u8 log_max_cq_sz[0x8];
u8 reserved_at_d0[0xb];
u8 log_max_cq[0x5];
......
......@@ -402,6 +402,7 @@ enum devlink_param_generic_id {
DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
/* add new param generic ids above here*/
__DEVLINK_PARAM_GENERIC_ID_MAX,
......@@ -436,6 +437,9 @@ enum devlink_param_generic_id {
"reset_dev_on_drv_probe"
#define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE DEVLINK_PARAM_TYPE_U8
#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce"
#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL
#define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \
{ \
.id = DEVLINK_PARAM_GENERIC_ID_##_id, \
......
......@@ -3006,6 +3006,11 @@ static const struct devlink_param devlink_param_generic[] = {
.name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
.type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
},
{
.id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
.name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME,
.type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE,
},
};
static int devlink_param_generic_verify(const struct devlink_param *param)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment