Commit 53d94892 authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-bulk-flow-stats-sriov-tc-offloads'

Saeed Mahameed says:

====================
Mellanox 100G mlx5 Bulk flow statistics and SRIOV TC offloads

This series from Amir and Or deals with two enhancements for the mlx5 TC offloads.

The 1st two patches add bulk reading of flow counters. Few bulk counter queries are
used instead of issuing thousands firmware commands per second to get statistics of all
flows set to HW.

The next patches add TC based SRIOV offloading to mlx5, as a follow up for the e-switch
offloads mode and the VF representors. When the e-switch is set to the (new) "offloads"
mode, we can now offload TC/flower drop and forward rules, the forward action we offload
is TC mirred/redirect.

The above is done by the VF representor netdevices exporting the setup_tc ndo where from
there we're re-using and enhancing the existing mlx5 TC offloads sub-module which now
works for both the NIC and the SRIOV cases.

The series is applied on top b38a75d2 ('mlxsw: core: Trace EMAD messages')
and it has no merge issues with the on-going net submission ('mlx5 tx timeout watchdog fixes')

V2:
    - Fixed compilation warning.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 18017723 d957b4e3
...@@ -33,9 +33,11 @@ ...@@ -33,9 +33,11 @@
#include <generated/utsrelease.h> #include <generated/utsrelease.h>
#include <linux/mlx5/fs.h> #include <linux/mlx5/fs.h>
#include <net/switchdev.h> #include <net/switchdev.h>
#include <net/pkt_cls.h>
#include "eswitch.h" #include "eswitch.h"
#include "en.h" #include "en.h"
#include "en_tc.h"
static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
...@@ -201,6 +203,10 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, ...@@ -201,6 +203,10 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
if (test_bit(MLX5E_STATE_OPENED, &priv->state)) if (test_bit(MLX5E_STATE_OPENED, &priv->state))
mlx5e_remove_sqs_fwd_rules(priv); mlx5e_remove_sqs_fwd_rules(priv);
/* clean (and re-init) existing uplink offloaded TC rules */
mlx5e_tc_cleanup(priv);
mlx5e_tc_init(priv);
} }
static int mlx5e_rep_get_phys_port_name(struct net_device *dev, static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
...@@ -217,6 +223,29 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, ...@@ -217,6 +223,29 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
return 0; return 0;
} }
static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
__be16 proto, struct tc_to_netdev *tc)
{
struct mlx5e_priv *priv = netdev_priv(dev);
if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
return -EOPNOTSUPP;
switch (tc->type) {
case TC_SETUP_CLSFLOWER:
switch (tc->cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, proto, tc->cls_flower);
case TC_CLSFLOWER_DESTROY:
return mlx5e_delete_flower(priv, tc->cls_flower);
case TC_CLSFLOWER_STATS:
return mlx5e_stats_flower(priv, tc->cls_flower);
}
default:
return -EOPNOTSUPP;
}
}
static const struct switchdev_ops mlx5e_rep_switchdev_ops = { static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
.switchdev_port_attr_get = mlx5e_attr_get, .switchdev_port_attr_get = mlx5e_attr_get,
}; };
...@@ -226,6 +255,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { ...@@ -226,6 +255,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = {
.ndo_stop = mlx5e_close, .ndo_stop = mlx5e_close,
.ndo_start_xmit = mlx5e_xmit, .ndo_start_xmit = mlx5e_xmit,
.ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name,
.ndo_setup_tc = mlx5e_rep_ndo_setup_tc,
.ndo_get_stats64 = mlx5e_get_stats, .ndo_get_stats64 = mlx5e_get_stats,
}; };
...@@ -279,7 +309,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) ...@@ -279,7 +309,8 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; netdev->switchdev_ops = &mlx5e_rep_switchdev_ops;
#endif #endif
netdev->features |= NETIF_F_VLAN_CHALLENGED; netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC;
netdev->hw_features |= NETIF_F_HW_TC;
eth_hw_addr_random(netdev); eth_hw_addr_random(netdev);
} }
...@@ -323,8 +354,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) ...@@ -323,8 +354,14 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
} }
rep->vport_rx_rule = flow_rule; rep->vport_rx_rule = flow_rule;
err = mlx5e_tc_init(priv);
if (err)
goto err_del_flow_rule;
return 0; return 0;
err_del_flow_rule:
mlx5_del_flow_rule(rep->vport_rx_rule);
err_destroy_direct_tirs: err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_direct_tirs(priv);
err_destroy_direct_rqts: err_destroy_direct_rqts:
...@@ -338,6 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) ...@@ -338,6 +375,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
struct mlx5_eswitch_rep *rep = priv->ppriv; struct mlx5_eswitch_rep *rep = priv->ppriv;
int i; int i;
mlx5e_tc_cleanup(priv);
mlx5_del_flow_rule(rep->vport_rx_rule); mlx5_del_flow_rule(rep->vport_rx_rule);
mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_direct_tirs(priv);
for (i = 0; i < priv->params.num_channels; i++) for (i = 0; i < priv->params.num_channels; i++)
......
...@@ -37,8 +37,11 @@ ...@@ -37,8 +37,11 @@
#include <linux/mlx5/fs.h> #include <linux/mlx5/fs.h>
#include <linux/mlx5/device.h> #include <linux/mlx5/device.h>
#include <linux/rhashtable.h> #include <linux/rhashtable.h>
#include <net/switchdev.h>
#include <net/tc_act/tc_mirred.h>
#include "en.h" #include "en.h"
#include "en_tc.h" #include "en_tc.h"
#include "eswitch.h"
struct mlx5e_tc_flow { struct mlx5e_tc_flow {
struct rhash_head node; struct rhash_head node;
...@@ -49,9 +52,9 @@ struct mlx5e_tc_flow { ...@@ -49,9 +52,9 @@ struct mlx5e_tc_flow {
#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
#define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_NUM_GROUPS 4
static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec, struct mlx5_flow_spec *spec,
u32 action, u32 flow_tag) u32 action, u32 flow_tag)
{ {
struct mlx5_core_dev *dev = priv->mdev; struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest = { 0 }; struct mlx5_flow_destination dest = { 0 };
...@@ -62,7 +65,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, ...@@ -62,7 +65,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
dest.ft = priv->fs.vlan.ft.t; dest.ft = priv->fs.vlan.ft.t;
} else { } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true); counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter)) if (IS_ERR(counter))
return ERR_CAST(counter); return ERR_CAST(counter);
...@@ -109,6 +112,22 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, ...@@ -109,6 +112,22 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
return rule; return rule;
} }
static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
u32 action, u32 dst_vport)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_eswitch_rep *rep = priv->ppriv;
u32 src_vport;
if (rep->vport) /* set source vport for the flow */
src_vport = rep->vport;
else
src_vport = FDB_UPLINK_VPORT;
return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport);
}
static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
struct mlx5_flow_rule *rule) struct mlx5_flow_rule *rule)
{ {
...@@ -120,7 +139,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, ...@@ -120,7 +139,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
mlx5_fc_destroy(priv->mdev, counter); mlx5_fc_destroy(priv->mdev, counter);
if (!mlx5e_tc_num_filters(priv)) { if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
mlx5_destroy_flow_table(priv->fs.tc.t); mlx5_destroy_flow_table(priv->fs.tc.t);
priv->fs.tc.t = NULL; priv->fs.tc.t = NULL;
} }
...@@ -295,8 +314,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec ...@@ -295,8 +314,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
return 0; return 0;
} }
static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
u32 *action, u32 *flow_tag) u32 *action, u32 *flow_tag)
{ {
const struct tc_action *a; const struct tc_action *a;
...@@ -339,16 +358,66 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, ...@@ -339,16 +358,66 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return 0; return 0;
} }
static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
u32 *action, u32 *dest_vport)
{
const struct tc_action *a;
if (tc_no_actions(exts))
return -EINVAL;
*action = 0;
tc_for_each_action(a, exts) {
/* Only support a single action per rule */
if (*action)
return -EINVAL;
if (is_tcf_gact_shot(a)) {
*action = MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
continue;
}
if (is_tcf_mirred_redirect(a)) {
int ifindex = tcf_mirred_ifindex(a);
struct net_device *out_dev;
struct mlx5e_priv *out_priv;
struct mlx5_eswitch_rep *out_rep;
out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
priv->netdev->name, out_dev->name);
return -EINVAL;
}
out_priv = netdev_priv(out_dev);
out_rep = out_priv->ppriv;
if (out_rep->vport == 0)
*dest_vport = FDB_UPLINK_VPORT;
else
*dest_vport = out_rep->vport;
*action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
continue;
}
return -EINVAL;
}
return 0;
}
int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
struct tc_cls_flower_offload *f) struct tc_cls_flower_offload *f)
{ {
struct mlx5e_tc_table *tc = &priv->fs.tc; struct mlx5e_tc_table *tc = &priv->fs.tc;
int err = 0; int err = 0;
u32 flow_tag; u32 flow_tag, action, dest_vport = 0;
u32 action;
struct mlx5e_tc_flow *flow; struct mlx5e_tc_flow *flow;
struct mlx5_flow_spec *spec; struct mlx5_flow_spec *spec;
struct mlx5_flow_rule *old = NULL; struct mlx5_flow_rule *old = NULL;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
tc->ht_params); tc->ht_params);
...@@ -369,28 +438,35 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, ...@@ -369,28 +438,35 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
if (err < 0) if (err < 0)
goto err_free; goto err_free;
err = parse_tc_actions(priv, f->exts, &action, &flow_tag); if (esw && esw->mode == SRIOV_OFFLOADS) {
if (err < 0) err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport);
if (err < 0)
goto err_free;
flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport);
} else {
err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
if (err < 0)
goto err_free;
flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag);
}
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
goto err_free; goto err_free;
}
err = rhashtable_insert_fast(&tc->ht, &flow->node, err = rhashtable_insert_fast(&tc->ht, &flow->node,
tc->ht_params); tc->ht_params);
if (err) if (err)
goto err_free; goto err_del_rule;
flow->rule = mlx5e_tc_add_flow(priv, spec, action, flow_tag);
if (IS_ERR(flow->rule)) {
err = PTR_ERR(flow->rule);
goto err_hash_del;
}
if (old) if (old)
mlx5e_tc_del_flow(priv, old); mlx5e_tc_del_flow(priv, old);
goto out; goto out;
err_hash_del: err_del_rule:
rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); mlx5_del_flow_rule(flow->rule);
err_free: err_free:
if (!old) if (!old)
......
...@@ -145,6 +145,7 @@ struct mlx5_eswitch_fdb { ...@@ -145,6 +145,7 @@ struct mlx5_eswitch_fdb {
} legacy; } legacy;
struct offloads_fdb { struct offloads_fdb {
struct mlx5_flow_table *fdb;
struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp; struct mlx5_flow_group *miss_grp;
struct mlx5_flow_rule *miss_rule; struct mlx5_flow_rule *miss_rule;
...@@ -221,6 +222,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, ...@@ -221,6 +222,12 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
int vport, int vport,
struct ifla_vf_stats *vf_stats); struct ifla_vf_stats *vf_stats);
struct mlx5_flow_spec;
struct mlx5_flow_rule *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
u32 action, u32 src_vport, u32 dst_vport);
struct mlx5_flow_rule * struct mlx5_flow_rule *
mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
......
...@@ -38,6 +38,54 @@ ...@@ -38,6 +38,54 @@
#include "mlx5_core.h" #include "mlx5_core.h"
#include "eswitch.h" #include "eswitch.h"
enum {
FDB_FAST_PATH = 0,
FDB_SLOW_PATH
};
struct mlx5_flow_rule *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
u32 action, u32 src_vport, u32 dst_vport)
{
struct mlx5_flow_destination dest = { 0 };
struct mlx5_fc *counter = NULL;
struct mlx5_flow_rule *rule;
void *misc;
if (esw->mode != SRIOV_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = dst_vport;
action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(esw->dev, true);
if (IS_ERR(counter))
return ERR_CAST(counter);
dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
dest.counter = counter;
}
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
MLX5_MATCH_MISC_PARAMETERS;
rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
spec, action, 0, &dest);
if (IS_ERR(rule))
mlx5_fc_destroy(esw->dev, counter);
return rule;
}
static struct mlx5_flow_rule * static struct mlx5_flow_rule *
mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
{ {
...@@ -149,7 +197,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) ...@@ -149,7 +197,7 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
dest.vport_num = 0; dest.vport_num = 0;
flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec, flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
0, &dest); 0, &dest);
if (IS_ERR(flow_rule)) { if (IS_ERR(flow_rule)) {
...@@ -165,6 +213,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) ...@@ -165,6 +213,8 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
} }
#define MAX_PF_SQ 256 #define MAX_PF_SQ 256
#define ESW_OFFLOADS_NUM_ENTRIES (1 << 13) /* 8K */
#define ESW_OFFLOADS_NUM_GROUPS 4
static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
{ {
...@@ -190,15 +240,25 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) ...@@ -190,15 +240,25 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
table_size = nvports + MAX_PF_SQ + 1; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); ESW_OFFLOADS_NUM_ENTRIES,
ESW_OFFLOADS_NUM_GROUPS, 0);
if (IS_ERR(fdb)) { if (IS_ERR(fdb)) {
err = PTR_ERR(fdb); err = PTR_ERR(fdb);
esw_warn(dev, "Failed to create FDB Table err %d\n", err); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
goto fdb_err; goto fast_fdb_err;
} }
esw->fdb_table.fdb = fdb; esw->fdb_table.fdb = fdb;
table_size = nvports + MAX_PF_SQ + 1;
fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
if (IS_ERR(fdb)) {
err = PTR_ERR(fdb);
esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
goto slow_fdb_err;
}
esw->fdb_table.offloads.fdb = fdb;
/* create send-to-vport group */ /* create send-to-vport group */
memset(flow_group_in, 0, inlen); memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
...@@ -247,8 +307,10 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) ...@@ -247,8 +307,10 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
miss_err: miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err: send_vport_err:
mlx5_destroy_flow_table(fdb); mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
fdb_err: slow_fdb_err:
mlx5_destroy_flow_table(esw->fdb_table.fdb);
fast_fdb_err:
ns_err: ns_err:
kvfree(flow_group_in); kvfree(flow_group_in);
return err; return err;
...@@ -264,6 +326,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) ...@@ -264,6 +326,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb);
mlx5_destroy_flow_table(esw->fdb_table.fdb); mlx5_destroy_flow_table(esw->fdb_table.fdb);
} }
......
...@@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, ...@@ -413,3 +413,70 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
return 0; return 0;
} }
struct mlx5_cmd_fc_bulk {
u16 id;
int num;
int outlen;
u32 out[0];
};
struct mlx5_cmd_fc_bulk *
mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
{
struct mlx5_cmd_fc_bulk *b;
int outlen = sizeof(*b) +
MLX5_ST_SZ_BYTES(query_flow_counter_out) +
MLX5_ST_SZ_BYTES(traffic_counter) * num;
b = kzalloc(outlen, GFP_KERNEL);
if (!b)
return NULL;
b->id = id;
b->num = num;
b->outlen = outlen;
return b;
}
void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
{
kfree(b);
}
int
mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
{
u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
memset(in, 0, sizeof(in));
MLX5_SET(query_flow_counter_in, in, opcode,
MLX5_CMD_OP_QUERY_FLOW_COUNTER);
MLX5_SET(query_flow_counter_in, in, op_mod, 0);
MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
b->out, b->outlen);
}
void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
struct mlx5_cmd_fc_bulk *b, u16 id,
u64 *packets, u64 *bytes)
{
int index = id - b->id;
void *stats;
if (index < 0 || index >= b->num) {
mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
id, b->id, b->id + b->num - 1);
return;
}
stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
flow_statistics[index]);
*packets = MLX5_GET64(traffic_counter, stats, packets);
*bytes = MLX5_GET64(traffic_counter, stats, octets);
}
...@@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); ...@@ -76,4 +76,16 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id);
int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id);
int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
u64 *packets, u64 *bytes); u64 *packets, u64 *bytes);
struct mlx5_cmd_fc_bulk;
struct mlx5_cmd_fc_bulk *
mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num);
void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
int
mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
struct mlx5_cmd_fc_bulk *b, u16 id,
u64 *packets, u64 *bytes);
#endif #endif
...@@ -1712,15 +1712,21 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering) ...@@ -1712,15 +1712,21 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
if (!steering->fdb_root_ns) if (!steering->fdb_root_ns)
return -ENOMEM; return -ENOMEM;
/* Create single prio */
prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1); prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 1);
if (IS_ERR(prio)) { if (IS_ERR(prio))
cleanup_root_ns(steering->fdb_root_ns); goto out_err;
steering->fdb_root_ns = NULL;
return PTR_ERR(prio); prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
} else { if (IS_ERR(prio))
return 0; goto out_err;
}
set_prio_attrs(steering->fdb_root_ns);
return 0;
out_err:
cleanup_root_ns(steering->fdb_root_ns);
steering->fdb_root_ns = NULL;
return PTR_ERR(prio);
} }
static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering) static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering)
......
...@@ -111,6 +111,7 @@ struct mlx5_fc_cache { ...@@ -111,6 +111,7 @@ struct mlx5_fc_cache {
}; };
struct mlx5_fc { struct mlx5_fc {
struct rb_node node;
struct list_head list; struct list_head list;
/* last{packets,bytes} members are used when calculating the delta since /* last{packets,bytes} members are used when calculating the delta since
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h> #include <linux/mlx5/fs.h>
#include <linux/rbtree.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "fs_core.h" #include "fs_core.h"
#include "fs_cmd.h" #include "fs_cmd.h"
...@@ -68,32 +69,108 @@ ...@@ -68,32 +69,108 @@
* elapsed, the thread will actually query the hardware. * elapsed, the thread will actually query the hardware.
*/ */
static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
{
struct rb_node **new = &root->rb_node;
struct rb_node *parent = NULL;
while (*new) {
struct mlx5_fc *this = container_of(*new, struct mlx5_fc, node);
int result = counter->id - this->id;
parent = *new;
if (result < 0)
new = &((*new)->rb_left);
else
new = &((*new)->rb_right);
}
/* Add new node and rebalance tree. */
rb_link_node(&counter->node, parent, new);
rb_insert_color(&counter->node, root);
}
static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
struct mlx5_fc *first,
u16 last_id)
{
struct mlx5_cmd_fc_bulk *b;
struct rb_node *node = NULL;
u16 afirst_id;
int num;
int err;
int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk);
/* first id must be aligned to 4 when using bulk query */
afirst_id = first->id & ~0x3;
/* number of counters to query inc. the last counter */
num = ALIGN(last_id - afirst_id + 1, 4);
if (num > max_bulk) {
num = max_bulk;
last_id = afirst_id + num - 1;
}
b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
if (!b) {
mlx5_core_err(dev, "Error allocating resources for bulk query\n");
return NULL;
}
err = mlx5_cmd_fc_bulk_query(dev, b);
if (err) {
mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
goto out;
}
for (node = &first->node; node; node = rb_next(node)) {
struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
struct mlx5_fc_cache *c = &counter->cache;
if (counter->id > last_id)
break;
mlx5_cmd_fc_bulk_get(dev, b,
counter->id, &c->packets, &c->bytes);
}
out:
mlx5_cmd_fc_bulk_free(b);
return node;
}
static void mlx5_fc_stats_work(struct work_struct *work) static void mlx5_fc_stats_work(struct work_struct *work)
{ {
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
priv.fc_stats.work.work); priv.fc_stats.work.work);
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
unsigned long now = jiffies; unsigned long now = jiffies;
struct mlx5_fc *counter; struct mlx5_fc *counter = NULL;
struct mlx5_fc *tmp; struct mlx5_fc *last = NULL;
int err = 0; struct rb_node *node;
LIST_HEAD(tmplist);
spin_lock(&fc_stats->addlist_lock); spin_lock(&fc_stats->addlist_lock);
list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); list_splice_tail_init(&fc_stats->addlist, &tmplist);
if (!list_empty(&fc_stats->list)) if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD);
spin_unlock(&fc_stats->addlist_lock); spin_unlock(&fc_stats->addlist_lock);
list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { list_for_each_entry(counter, &tmplist, list)
struct mlx5_fc_cache *c = &counter->cache; mlx5_fc_stats_insert(&fc_stats->counters, counter);
u64 packets;
u64 bytes; node = rb_first(&fc_stats->counters);
while (node) {
counter = rb_entry(node, struct mlx5_fc, node);
node = rb_next(node);
if (counter->deleted) { if (counter->deleted) {
list_del(&counter->list); rb_erase(&counter->node, &fc_stats->counters);
mlx5_cmd_fc_free(dev, counter->id); mlx5_cmd_fc_free(dev, counter->id);
...@@ -101,26 +178,20 @@ static void mlx5_fc_stats_work(struct work_struct *work) ...@@ -101,26 +178,20 @@ static void mlx5_fc_stats_work(struct work_struct *work)
continue; continue;
} }
if (time_before(now, fc_stats->next_query)) last = counter;
continue; }
err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes); if (time_before(now, fc_stats->next_query) || !last)
if (err) { return;
pr_err("Error querying stats for counter id %d\n",
counter->id);
continue;
}
if (packets == c->packets) node = rb_first(&fc_stats->counters);
continue; while (node) {
counter = rb_entry(node, struct mlx5_fc, node);
c->lastuse = jiffies; node = mlx5_fc_stats_query(dev, counter, last->id);
c->packets = packets;
c->bytes = bytes;
} }
if (time_after_eq(now, fc_stats->next_query)) fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
fc_stats->next_query = now + MLX5_FC_STATS_PERIOD;
} }
struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
...@@ -176,7 +247,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) ...@@ -176,7 +247,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{ {
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
INIT_LIST_HEAD(&fc_stats->list); fc_stats->counters = RB_ROOT;
INIT_LIST_HEAD(&fc_stats->addlist); INIT_LIST_HEAD(&fc_stats->addlist);
spin_lock_init(&fc_stats->addlist_lock); spin_lock_init(&fc_stats->addlist_lock);
...@@ -194,20 +265,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) ...@@ -194,20 +265,32 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
struct mlx5_fc *counter; struct mlx5_fc *counter;
struct mlx5_fc *tmp; struct mlx5_fc *tmp;
struct rb_node *node;
cancel_delayed_work_sync(&dev->priv.fc_stats.work); cancel_delayed_work_sync(&dev->priv.fc_stats.work);
destroy_workqueue(dev->priv.fc_stats.wq); destroy_workqueue(dev->priv.fc_stats.wq);
dev->priv.fc_stats.wq = NULL; dev->priv.fc_stats.wq = NULL;
list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) {
list_del(&counter->list); list_del(&counter->list);
mlx5_cmd_fc_free(dev, counter->id); mlx5_cmd_fc_free(dev, counter->id);
kfree(counter); kfree(counter);
} }
node = rb_first(&fc_stats->counters);
while (node) {
counter = rb_entry(node, struct mlx5_fc, node);
node = rb_next(node);
rb_erase(&counter->node, &fc_stats->counters);
mlx5_cmd_fc_free(dev, counter->id);
kfree(counter);
}
} }
void mlx5_fc_query_cached(struct mlx5_fc *counter, void mlx5_fc_query_cached(struct mlx5_fc *counter,
......
...@@ -469,7 +469,7 @@ struct mlx5_irq_info { ...@@ -469,7 +469,7 @@ struct mlx5_irq_info {
}; };
struct mlx5_fc_stats { struct mlx5_fc_stats {
struct list_head list; struct rb_root counters;
struct list_head addlist; struct list_head addlist;
/* protect addlist add/splice operations */ /* protect addlist add/splice operations */
spinlock_t addlist_lock; spinlock_t addlist_lock;
......
...@@ -893,7 +893,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { ...@@ -893,7 +893,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_330[0xb]; u8 reserved_at_330[0xb];
u8 log_max_xrcd[0x5]; u8 log_max_xrcd[0x5];
u8 reserved_at_340[0x20]; u8 reserved_at_340[0x8];
u8 log_max_flow_counter_bulk[0x8];
u8 max_flow_counter[0x10];
u8 reserved_at_360[0x3]; u8 reserved_at_360[0x3];
u8 log_max_rq[0x5]; u8 log_max_rq[0x5];
...@@ -980,7 +983,8 @@ struct mlx5_ifc_dest_format_struct_bits { ...@@ -980,7 +983,8 @@ struct mlx5_ifc_dest_format_struct_bits {
}; };
struct mlx5_ifc_flow_counter_list_bits { struct mlx5_ifc_flow_counter_list_bits {
u8 reserved_at_0[0x10]; u8 clear[0x1];
u8 num_of_counters[0xf];
u8 flow_counter_id[0x10]; u8 flow_counter_id[0x10];
u8 reserved_at_20[0x20]; u8 reserved_at_20[0x20];
......
...@@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev, ...@@ -227,6 +227,8 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
struct net_device *group_dev, struct net_device *group_dev,
bool joining); bool joining);
bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b);
#else #else
static inline void switchdev_deferred_process(void) static inline void switchdev_deferred_process(void)
...@@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev, ...@@ -351,6 +353,12 @@ static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
{ {
} }
static inline bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
return false;
}
#endif #endif
#endif /* _LINUX_SWITCHDEV_H_ */ #endif /* _LINUX_SWITCHDEV_H_ */
...@@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi) ...@@ -1286,8 +1286,8 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
} }
EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
static bool switchdev_port_same_parent_id(struct net_device *a, bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b) struct net_device *b)
{ {
struct switchdev_attr a_attr = { struct switchdev_attr a_attr = {
.orig_dev = a, .orig_dev = a,
...@@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev, ...@@ -1323,6 +1323,7 @@ static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
return dev->ifindex; return dev->ifindex;
} }
EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
u32 old_mark, u32 *reset_mark) u32 old_mark, u32 *reset_mark)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment