Commit 7937963a authored by David S. Miller's avatar David S. Miller

Merge branch 'mlx5-enhanced-flow-steering'

Or Gerlitz says:

====================
net/mlx5_core: Enhance flow steering support

v0 --> v1 changes:
  - fixed improperly formatted comments.
  - compare value of ib_spec->eth.mask.ether_type in network byte order
     in ('IB/mlx5: Add flow steering utilities').

v1 --> v2 changes:
  - made sure that service functions added in the IB driver are only static-fied
    on the last commit, to make sure bisection with -Werror works fine.

v2 --> v3 changes:
   - squashed patches 11 and 12 into one patch, s.t Dave's comment
     on unused static functions gcc complaints during bisection is
     correctly addressed.

v3 has been generated against net-next commit c9c99311 "Merge tag
'batman-adv-for-davem' of git://git.open-mesh.org/linux-merge"

The series is signed by Matan who was revently assigned to a maintainer for
the mlx5_core and IB drivers (this is a 4.5-rc1 change to the maintainers file coming
from the rdma tree) -- as such I didn't see a neeed to add my signature (Or).

This series adds three new functionalists to the driver flow-steering
infrastructure: auto-grouped flow tables, chaining of flow tables and
updates for the root flow table.

1. Auto-grouped flow tables - Flow table with auto grouping management.
When a flow table is created, hints regarding the number of rule types
and the number of rules are given in advance. Thus, a flow table is
divided into #NUM_TYPES+1 groups each contains
(#NUM_RULES)/(#NUM_TYPES+1) rules. The first #NUM_TYPES parts are groups
which are filled if the added rule matches the group specification or
the group is empty. The last part is filled by rules that can't fit
any of the former groups.

2. Chaining flow tables - Flow tables from different priorities are chained
together, if there is no match in flow table of priority i we continue
searching for a match in priority i+1. This is both true if priorities
i and i+1 belongs to the same namespace or not.

3. Updating the root flow table - the root flow table is the flow table
with the lowest level. The hardware start searching for a match in the
root flow table and continue according to the matches it find along
the way.

The first usage for the new functionality is flow steering for user-space
ConnectX-4 offloaded HW Eth RX queues done through the mlx5 IB driver.

When the mlx5 core driver is loaded, it opens three flow namespaces:
1. By-pass namespace (used by mlx5 IB driver).
2. Kernel namespace (used in order to get packets to the networking stack
through mlx5 EN driver).
3. Leftovers namespace (used by mlx5 IB and future sniffer)

The series is built as follows:

Patch #1 introduces auto-grouped flow tables support.

Patch #2 add utility functions for finding the next and the previous
flow tables in different priorities. This is used in order to chain
the flow tables in a downstream patch.

Patch #3 introduces a firmware command for updating the root flow table.

Patch #4 introduces modify flow table firmware command, this command is used
when we want to change the next flow table of an existing flow table.
This is used for chaining flow tables as well.

Patch #5 connect/disconnect flow tables. This is actually the chaining
process when we want to link flow tables. This means that if we couldn't
find a match in the first flow table, we'll continue in the chained
flow table.

Patch #6 updates priority's attributes that is required for flow table
level allocation. We update both the max_fts (the number of allowed FTs
in the sub-tree of this priority) and the start_level (which is the first
level we'll assign to the flow-tables created inside the priority).

Patch #7 adds checking of required device capabilities. Some namespaces
could be only created if the hardware supports certain attributes.
This is especially true for the Bypass and leftovers namespaces. This
adds a generic mechanism to check these required attributes.

Patch #8 creates two additional namespaces:
	a. Bypass flow rules(has nine priorities)
	b. Leftovers packets(have one priority) - for unmatched packets.

Patch #9 re-factors ipv4/ipv6 match fields in the mlx5 firmware interface
header to be more clear.

Patch #10 exports the flow steering API for mlx5_ib usage

Patch #11 implements the required support in mlx5_ib in order
to support the RDMA flow steering verbs.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 23c09c26 038d2ef8
This diff is collapsed.
......@@ -105,6 +105,36 @@ struct mlx5_ib_pd {
u32 pdn;
};
#define MLX5_IB_FLOW_MCAST_PRIO (MLX5_BY_PASS_NUM_PRIOS - 1)
#define MLX5_IB_FLOW_LAST_PRIO (MLX5_IB_FLOW_MCAST_PRIO - 1)
#if (MLX5_IB_FLOW_LAST_PRIO <= 0)
#error "Invalid number of bypass priorities"
#endif
#define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1)
#define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1)
struct mlx5_ib_flow_prio {
struct mlx5_flow_table *flow_table;
unsigned int refcount;
};
struct mlx5_ib_flow_handler {
struct list_head list;
struct ib_flow ibflow;
unsigned int prio;
struct mlx5_flow_rule *rule;
};
struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT];
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
* simultaneously.
*/
struct mutex lock;
};
/* Use macros here so that don't have to duplicate
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
......@@ -171,9 +201,21 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
struct mlx5_ib_rq {
u32 tirn;
};
struct mlx5_ib_raw_packet_qp {
struct mlx5_ib_rq rq;
};
struct mlx5_ib_qp {
struct ib_qp ibqp;
struct mlx5_core_qp mqp;
union {
struct mlx5_core_qp mqp;
struct mlx5_ib_raw_packet_qp raw_packet_qp;
};
struct mlx5_buf buf;
struct mlx5_db db;
......@@ -431,6 +473,7 @@ struct mlx5_ib_dev {
*/
struct srcu_struct mr_srcu;
#endif
struct mlx5_ib_flow_db flow_db;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
......
......@@ -38,9 +38,28 @@
#include "fs_cmd.h"
#include "mlx5_core.h"
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft)
{
u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
memset(in, 0, sizeof(in));
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
sizeof(out));
}
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, unsigned int *table_id)
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id)
{
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
......@@ -51,6 +70,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
MLX5_SET(create_flow_table_in, in, opcode,
MLX5_CMD_OP_CREATE_FLOW_TABLE);
if (next_ft) {
MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
}
MLX5_SET(create_flow_table_in, in, table_type, type);
MLX5_SET(create_flow_table_in, in, level, level);
MLX5_SET(create_flow_table_in, in, log_size, log_size);
......@@ -83,6 +106,33 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
sizeof(out));
}
int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft)
{
u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
memset(in, 0, sizeof(in));
memset(out, 0, sizeof(out));
MLX5_SET(modify_flow_table_in, in, opcode,
MLX5_CMD_OP_MODIFY_FLOW_TABLE);
MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
MLX5_SET(modify_flow_table_in, in, modify_field_select,
MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
if (next_ft) {
MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
} else {
MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
}
return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
sizeof(out));
}
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
u32 *in,
......
......@@ -35,11 +35,16 @@
int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
enum fs_flow_table_type type, unsigned int level,
unsigned int log_size, unsigned int *table_id);
unsigned int log_size, struct mlx5_flow_table
*next_ft, unsigned int *table_id);
int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft);
int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct mlx5_flow_table *next_ft);
int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
u32 *in, unsigned int *group_id);
......@@ -62,4 +67,6 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
unsigned int index);
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft);
#endif
......@@ -77,6 +77,11 @@ struct mlx5_flow_table {
unsigned int max_fte;
unsigned int level;
enum fs_flow_table_type type;
struct {
bool active;
unsigned int required_groups;
unsigned int num_groups;
} autogroup;
};
/* Type of children is mlx5_flow_rule */
......@@ -124,6 +129,9 @@ struct mlx5_flow_root_namespace {
struct mlx5_flow_namespace ns;
enum fs_flow_table_type table_type;
struct mlx5_core_dev *dev;
struct mlx5_flow_table *root_ft;
/* Should be held when chaining flow tables */
struct mutex chain_lock;
};
int mlx5_init_fs(struct mlx5_core_dev *dev);
......@@ -143,6 +151,12 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev);
#define fs_for_each_prio(pos, ns) \
fs_list_for_each_entry(pos, &(ns)->node.children)
#define fs_for_each_ns(pos, prio) \
fs_list_for_each_entry(pos, &(prio)->node.children)
#define fs_for_each_ft(pos, prio) \
fs_list_for_each_entry(pos, &(prio)->node.children)
#define fs_for_each_fg(pos, ft) \
fs_list_for_each_entry(pos, &(ft)->node.children)
......
......@@ -1258,4 +1258,6 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
}
#define MLX5_BY_PASS_NUM_PRIOS 9
#endif /* MLX5_DEVICE_H */
......@@ -38,8 +38,20 @@
#define MLX5_FS_DEFAULT_FLOW_TAG 0x0
#define LEFTOVERS_RULE_NUM 2
static inline void build_leftovers_ft_param(int *priority,
int *n_ent,
int *n_grp)
{
*priority = 0; /* Priority of leftovers_prio-0 */
*n_ent = LEFTOVERS_RULE_NUM;
*n_grp = LEFTOVERS_RULE_NUM;
}
enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_BYPASS,
MLX5_FLOW_NAMESPACE_KERNEL,
MLX5_FLOW_NAMESPACE_LEFTOVERS,
MLX5_FLOW_NAMESPACE_FDB,
};
......@@ -61,6 +73,12 @@ struct mlx5_flow_namespace *
mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type);
struct mlx5_flow_table *
mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
int prio,
int num_flow_table_entries,
int max_num_groups);
struct mlx5_flow_table *
mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
int prio,
......
......@@ -185,6 +185,7 @@ enum {
MLX5_CMD_OP_MODIFY_RQT = 0x917,
MLX5_CMD_OP_DESTROY_RQT = 0x918,
MLX5_CMD_OP_QUERY_RQT = 0x919,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT = 0x92f,
MLX5_CMD_OP_CREATE_FLOW_TABLE = 0x930,
MLX5_CMD_OP_DESTROY_FLOW_TABLE = 0x931,
MLX5_CMD_OP_QUERY_FLOW_TABLE = 0x932,
......@@ -193,7 +194,8 @@ enum {
MLX5_CMD_OP_QUERY_FLOW_GROUP = 0x935,
MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY = 0x936,
MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY = 0x937,
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938
MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY = 0x938,
MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c
};
struct mlx5_ifc_flow_table_fields_supported_bits {
......@@ -258,7 +260,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 ft_support[0x1];
u8 reserved_0[0x2];
u8 flow_modify_en[0x1];
u8 reserved_1[0x1c];
u8 modify_root[0x1];
u8 identified_miss_table_mode[0x1];
u8 flow_table_modify[0x1];
u8 reserved_1[0x19];
u8 reserved_2[0x2];
u8 log_max_ft_size[0x6];
......@@ -293,6 +298,22 @@ struct mlx5_ifc_odp_per_transport_service_cap_bits {
u8 reserved_1[0x1a];
};
struct mlx5_ifc_ipv4_layout_bits {
u8 reserved_0[0x60];
u8 ipv4[0x20];
};
struct mlx5_ifc_ipv6_layout_bits {
u8 ipv6[16][0x8];
};
union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
u8 reserved_0[0x80];
};
struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 smac_47_16[0x20];
......@@ -323,9 +344,9 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
u8 udp_sport[0x10];
u8 udp_dport[0x10];
u8 src_ip[4][0x20];
union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
u8 dst_ip[4][0x20];
union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
};
struct mlx5_ifc_fte_match_set_misc_bits {
......@@ -5667,12 +5688,16 @@ struct mlx5_ifc_create_flow_table_in_bits {
u8 reserved_4[0x20];
u8 reserved_5[0x8];
u8 reserved_5[0x4];
u8 table_miss_mode[0x4];
u8 level[0x8];
u8 reserved_6[0x8];
u8 log_size[0x8];
u8 reserved_7[0x120];
u8 reserved_7[0x8];
u8 table_miss_id[0x18];
u8 reserved_8[0x100];
};
struct mlx5_ifc_create_flow_group_out_bits {
......@@ -6946,4 +6971,72 @@ union mlx5_ifc_uplink_pci_interface_document_bits {
u8 reserved_0[0x20060];
};
struct mlx5_ifc_set_flow_table_root_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
u8 syndrome[0x20];
u8 reserved_1[0x40];
};
struct mlx5_ifc_set_flow_table_root_in_bits {
u8 opcode[0x10];
u8 reserved_0[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
u8 reserved_2[0x40];
u8 table_type[0x8];
u8 reserved_3[0x18];
u8 reserved_4[0x8];
u8 table_id[0x18];
u8 reserved_5[0x140];
};
enum {
MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
};
struct mlx5_ifc_modify_flow_table_out_bits {
u8 status[0x8];
u8 reserved_0[0x18];
u8 syndrome[0x20];
u8 reserved_1[0x40];
};
struct mlx5_ifc_modify_flow_table_in_bits {
u8 opcode[0x10];
u8 reserved_0[0x10];
u8 reserved_1[0x10];
u8 op_mod[0x10];
u8 reserved_2[0x20];
u8 reserved_3[0x10];
u8 modify_field_select[0x10];
u8 table_type[0x8];
u8 reserved_4[0x18];
u8 reserved_5[0x8];
u8 table_id[0x18];
u8 reserved_6[0x4];
u8 table_miss_mode[0x4];
u8 reserved_7[0x18];
u8 reserved_8[0x8];
u8 table_miss_id[0x18];
u8 reserved_9[0x100];
};
#endif /* MLX5_IFC_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment