Commit b6dfff21 authored by Paul Blakey's avatar Paul Blakey Committed by Saeed Mahameed

net/mlx5e: Fix matching on modified inner ip_ecn bits

Tunnel device follows RFC 6040, and during decapsulation inner
ip_ecn might change depending on inner and outer ip_ecn as follows:

 +---------+----------------------------------------+
 |Arriving |         Arriving Outer Header          |
 |   Inner +---------+---------+---------+----------+
 |  Header | Not-ECT | ECT(0)  | ECT(1)  |   CE     |
 +---------+---------+---------+---------+----------+
 | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop>   |
 |  ECT(0) |  ECT(0) | ECT(0)  | ECT(1)  |   CE*    |
 |  ECT(1) |  ECT(1) | ECT(1)  | ECT(1)* |   CE*    |
 |    CE   |   CE    |  CE     | CE      |   CE     |
 +---------+---------+---------+---------+----------+

Cells marked above are changed from original inner packet ip_ecn value.

Tc then matches on the modified inner ip_ecn, but hw offload which
matches the inner ip_ecn value before decap, will fail.

Fix that by mapping all the cases of outer and inner ip_ecn matching,
and only supporting cases where we know inner wouldn't be changed by
decap, or in the outer ip_ecn=CE case, inner ip_ecn didn't matter.

Fixes: bcef735c ("net/mlx5e: Offload TC matching on tos/ttl for ip tunnels")
Signed-off-by: default avatarPaul Blakey <paulb@nvidia.com>
Reviewed-by: default avatarOz Shlomo <ozsh@nvidia.com>
Reviewed-by: default avatarEli Cohen <elic@nvidia.com>
Reviewed-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent 01c3fd11
......@@ -1949,6 +1949,111 @@ u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
return ip_version;
}
/* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
* And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
* +---------+----------------------------------------+
* |Arriving | Arriving Outer Header |
* | Inner +---------+---------+---------+----------+
* | Header | Not-ECT | ECT(0) | ECT(1) | CE |
* +---------+---------+---------+---------+----------+
* | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
* | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
* | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
* | CE | CE | CE | CE | CE |
* +---------+---------+---------+---------+----------+
*
* Tc matches on inner after decapsulation on tunnel device, but hw offload matches
* the inner ip_ecn value before hardware decap action.
*
* Cells marked are changed from original inner packet ip_ecn value during decap, and
* so matching those values on inner ip_ecn before decap will fail.
*
* The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
* except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
* and such we can drop the inner ip_ecn=CE match.
*/
static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
struct flow_cls_offload *f,
bool *match_inner_ecn)
{
u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct netlink_ext_ack *extack = f->common.extack;
struct flow_match_ip match;
*match_inner_ecn = true;
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
flow_rule_match_enc_ip(rule, &match);
outer_ecn_key = match.key->tos & INET_ECN_MASK;
outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
flow_rule_match_ip(rule, &match);
inner_ecn_key = match.key->tos & INET_ECN_MASK;
inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
}
if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
return -EOPNOTSUPP;
}
if (!outer_ecn_mask) {
if (!inner_ecn_mask)
return 0;
NL_SET_ERR_MSG_MOD(extack,
"Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
netdev_warn(priv->netdev,
"Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
return -EOPNOTSUPP;
}
if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
NL_SET_ERR_MSG_MOD(extack,
"Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
netdev_warn(priv->netdev,
"Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
return -EOPNOTSUPP;
}
if (!inner_ecn_mask)
return 0;
/* Both inner and outer have full mask on ecn */
if (outer_ecn_key == INET_ECN_ECT_1) {
/* inner ecn might change by DECAP action */
NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
return -EOPNOTSUPP;
}
if (outer_ecn_key != INET_ECN_CE)
return 0;
if (inner_ecn_key != INET_ECN_CE) {
/* Can't happen in software, as packet ecn will be changed to CE after decap */
NL_SET_ERR_MSG_MOD(extack,
"Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
netdev_warn(priv->netdev,
"Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
return -EOPNOTSUPP;
}
/* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
* drop match on inner ecn
*/
*match_inner_ecn = false;
return 0;
}
static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_spec *spec,
......@@ -2144,6 +2249,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_dissector *dissector = rule->match.dissector;
enum fs_flow_table_type fs_type;
bool match_inner_ecn = true;
u16 addr_type = 0;
u8 ip_proto = 0;
u8 *match_level;
......@@ -2197,6 +2303,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
headers_c = get_match_inner_headers_criteria(spec);
headers_v = get_match_inner_headers_value(spec);
}
err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
if (err)
return err;
}
err = mlx5e_flower_parse_meta(filter_dev, f);
......@@ -2420,10 +2530,12 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct flow_match_ip match;
flow_rule_match_ip(rule, &match);
if (match_inner_ecn) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
match.mask->tos & 0x3);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
match.key->tos & 0x3);
}
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
match.mask->tos >> 2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment