Commit ba79e9a7 authored by David S. Miller's avatar David S. Miller

Merge branch 'sfc-decap'

Edward Cree says:

====================
sfc: more flexible encap matches on TC decap rules

This series extends the TC offload support on EF100 to support optionally
 matching on the IP ToS and UDP source port of the outer header in rules
 performing tunnel decapsulation.  Both of these fields allow masked
 matches if the underlying hardware supports it (current EF100 hardware
 supports masking on ToS, but only exact-match on source port).
Given that the source port is typically populated from a hash of inner
 header entropy, it's not clear whether filtering on it is useful, but
 since we can support it we may as well expose the capability.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d3616dc7 b6583d5e
...@@ -482,12 +482,14 @@ int efx_mae_match_check_caps(struct efx_nic *efx, ...@@ -482,12 +482,14 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
rc; \ rc; \
}) })
/* Checks that the fields needed for encap-rule matches are supported by the /* Checks that the fields needed for encap-rule matches are supported by the
* MAE. All the fields are exact-match. * MAE. All the fields are exact-match, except possibly ENC_IP_TOS.
*/ */
int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
u8 ip_tos_mask, __be16 udp_sport_mask,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
u8 *supported_fields = efx->tc->caps->outer_rule_fields; u8 *supported_fields = efx->tc->caps->outer_rule_fields;
enum mask_type typ;
int rc; int rc;
if (CHECK(ENC_ETHER_TYPE)) if (CHECK(ENC_ETHER_TYPE))
...@@ -504,6 +506,22 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, ...@@ -504,6 +506,22 @@ int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
if (CHECK(ENC_L4_DPORT) || if (CHECK(ENC_L4_DPORT) ||
CHECK(ENC_IP_PROTO)) CHECK(ENC_IP_PROTO))
return rc; return rc;
typ = classify_mask((const u8 *)&udp_sport_mask, sizeof(udp_sport_mask));
rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_L4_SPORT],
typ);
if (rc) {
NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s",
mask_type_name(typ), "enc_src_port");
return rc;
}
typ = classify_mask(&ip_tos_mask, sizeof(ip_tos_mask));
rc = efx_mae_match_check_cap_typ(supported_fields[MAE_FIELD_ENC_IP_TOS],
typ);
if (rc) {
NL_SET_ERR_MSG_FMT_MOD(extack, "No support for %s mask in field %s",
mask_type_name(typ), "enc_ip_tos");
return rc;
}
return 0; return 0;
} }
#undef CHECK #undef CHECK
...@@ -1001,8 +1019,16 @@ int efx_mae_register_encap_match(struct efx_nic *efx, ...@@ -1001,8 +1019,16 @@ int efx_mae_register_encap_match(struct efx_nic *efx,
encap->udp_dport); encap->udp_dport);
MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK, MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK,
~(__be16)0); ~(__be16)0);
MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE,
encap->udp_sport);
MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_L4_DPORT_BE_MASK,
encap->udp_sport_mask);
MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO, IPPROTO_UDP);
MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0); MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_PROTO_MASK, ~0);
MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS,
encap->ip_tos);
MCDI_STRUCT_SET_BYTE(match_crit, MAE_ENC_FIELD_PAIRS_ENC_IP_TOS_MASK,
encap->ip_tos_mask);
rc = efx_mcdi_rpc(efx, MC_CMD_MAE_OUTER_RULE_INSERT, inbuf, rc = efx_mcdi_rpc(efx, MC_CMD_MAE_OUTER_RULE_INSERT, inbuf,
sizeof(inbuf), outbuf, sizeof(outbuf), &outlen); sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
if (rc) if (rc)
......
...@@ -82,6 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx, ...@@ -82,6 +82,7 @@ int efx_mae_match_check_caps(struct efx_nic *efx,
const struct efx_tc_match_fields *mask, const struct efx_tc_match_fields *mask,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6, int efx_mae_check_encap_match_caps(struct efx_nic *efx, bool ipv6,
u8 ip_tos_mask, __be16 udp_sport_mask,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int efx_mae_check_encap_type_supported(struct efx_nic *efx, int efx_mae_check_encap_type_supported(struct efx_nic *efx,
enum efx_encap_type typ); enum efx_encap_type typ);
......
...@@ -132,23 +132,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx, ...@@ -132,23 +132,6 @@ static void efx_tc_free_action_set_list(struct efx_nic *efx,
/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */ /* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
} }
static void efx_tc_flow_free(void *ptr, void *arg)
{
struct efx_tc_flow_rule *rule = ptr;
struct efx_nic *efx = arg;
netif_err(efx, drv, efx->net_dev,
"tc rule %lx still present at teardown, removing\n",
rule->cookie);
efx_mae_delete_rule(efx, rule->fw_id);
/* Release entries in subsidiary tables */
efx_tc_free_action_set_list(efx, &rule->acts, true);
kfree(rule);
}
/* Boilerplate for the simple 'copy a field' cases */ /* Boilerplate for the simple 'copy a field' cases */
#define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \ #define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field) \
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) { \
...@@ -219,6 +202,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, ...@@ -219,6 +202,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_TCP) |
...@@ -363,20 +347,48 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, ...@@ -363,20 +347,48 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx,
return 0; return 0;
} }
static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
struct efx_tc_encap_match *encap)
{
int rc;
if (!refcount_dec_and_test(&encap->ref))
return; /* still in use */
if (encap->type == EFX_TC_EM_DIRECT) {
rc = efx_mae_unregister_encap_match(efx, encap);
if (rc)
/* Display message but carry on and remove entry from our
* SW tables, because there's not much we can do about it.
*/
netif_err(efx, drv, efx->net_dev,
"Failed to release encap match %#x, rc %d\n",
encap->fw_id, rc);
}
rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
efx_tc_encap_match_ht_params);
if (encap->pseudo)
efx_tc_flower_release_encap_match(efx, encap->pseudo);
kfree(encap);
}
static int efx_tc_flower_record_encap_match(struct efx_nic *efx, static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
struct efx_tc_match *match, struct efx_tc_match *match,
enum efx_encap_type type, enum efx_encap_type type,
enum efx_tc_em_pseudo_type em_type,
u8 child_ip_tos_mask,
__be16 child_udp_sport_mask,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct efx_tc_encap_match *encap, *old; struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
bool ipv6 = false; bool ipv6 = false;
int rc; int rc;
/* We require that the socket-defining fields (IP addrs and UDP dest /* We require that the socket-defining fields (IP addrs and UDP dest
* port) are present and exact-match. Other fields are currently not * port) are present and exact-match. Other fields may only be used
* allowed. This meets what OVS will ask for, and means that we don't * if the field-set (and any masks) are the same for all encap
* need to handle difficult checks for overlapping matches as could * matches on the same <sip,dip,dport> tuple; this is enforced by
* come up if we allowed masks or varying sets of match fields. * pseudo encap matches.
*/ */
if (match->mask.enc_dst_ip | match->mask.enc_src_ip) { if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
if (!IS_ALL_ONES(match->mask.enc_dst_ip)) { if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
...@@ -414,29 +426,42 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, ...@@ -414,29 +426,42 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port"); NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
if (match->mask.enc_sport) { if (match->mask.enc_sport || match->mask.enc_ip_tos) {
NL_SET_ERR_MSG_MOD(extack, "Egress encap match on src UDP port not supported"); struct efx_tc_match pmatch = *match;
return -EOPNOTSUPP;
} if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
if (match->mask.enc_ip_tos) { NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP ToS not supported"); return -EOPNOTSUPP;
return -EOPNOTSUPP; }
pmatch.value.enc_ip_tos = 0;
pmatch.mask.enc_ip_tos = 0;
pmatch.value.enc_sport = 0;
pmatch.mask.enc_sport = 0;
rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
EFX_TC_EM_PSEUDO_MASK,
match->mask.enc_ip_tos,
match->mask.enc_sport,
extack);
if (rc)
return rc;
pseudo = pmatch.encap;
} }
if (match->mask.enc_ip_ttl) { if (match->mask.enc_ip_ttl) {
NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported"); NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
return -EOPNOTSUPP; rc = -EOPNOTSUPP;
goto fail_pseudo;
} }
rc = efx_mae_check_encap_match_caps(efx, ipv6, extack); rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
if (rc) { match->mask.enc_sport, extack);
NL_SET_ERR_MSG_FMT_MOD(extack, "MAE hw reports no support for IPv%d encap matches", if (rc)
ipv6 ? 6 : 4); goto fail_pseudo;
return -EOPNOTSUPP;
}
encap = kzalloc(sizeof(*encap), GFP_USER); encap = kzalloc(sizeof(*encap), GFP_USER);
if (!encap) if (!encap) {
return -ENOMEM; rc = -ENOMEM;
goto fail_pseudo;
}
encap->src_ip = match->value.enc_src_ip; encap->src_ip = match->value.enc_src_ip;
encap->dst_ip = match->value.enc_dst_ip; encap->dst_ip = match->value.enc_dst_ip;
#ifdef CONFIG_IPV6 #ifdef CONFIG_IPV6
...@@ -445,12 +470,66 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, ...@@ -445,12 +470,66 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
#endif #endif
encap->udp_dport = match->value.enc_dport; encap->udp_dport = match->value.enc_dport;
encap->tun_type = type; encap->tun_type = type;
encap->ip_tos = match->value.enc_ip_tos;
encap->ip_tos_mask = match->mask.enc_ip_tos;
encap->child_ip_tos_mask = child_ip_tos_mask;
encap->udp_sport = match->value.enc_sport;
encap->udp_sport_mask = match->mask.enc_sport;
encap->child_udp_sport_mask = child_udp_sport_mask;
encap->type = em_type;
encap->pseudo = pseudo;
old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht, old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
&encap->linkage, &encap->linkage,
efx_tc_encap_match_ht_params); efx_tc_encap_match_ht_params);
if (old) { if (old) {
/* don't need our new entry */ /* don't need our new entry */
kfree(encap); kfree(encap);
if (pseudo) /* don't need our new pseudo either */
efx_tc_flower_release_encap_match(efx, pseudo);
/* check old and new em_types are compatible */
switch (old->type) {
case EFX_TC_EM_DIRECT:
/* old EM is in hardware, so mustn't overlap with a
* pseudo, but may be shared with another direct EM
*/
if (em_type == EFX_TC_EM_DIRECT)
break;
NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
return -EEXIST;
case EFX_TC_EM_PSEUDO_MASK:
/* old EM is protecting a ToS- or src port-qualified
* filter, so may only be shared with another pseudo
* for the same ToS and src port masks.
*/
if (em_type != EFX_TC_EM_PSEUDO_MASK) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"%s encap match conflicts with existing pseudo(MASK) entry",
encap->type ? "Pseudo" : "Direct");
return -EEXIST;
}
if (child_ip_tos_mask != old->child_ip_tos_mask) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"Pseudo encap match for TOS mask %#04x conflicts with existing pseudo(MASK) entry for TOS mask %#04x",
child_ip_tos_mask,
old->child_ip_tos_mask);
return -EEXIST;
}
if (child_udp_sport_mask != old->child_udp_sport_mask) {
NL_SET_ERR_MSG_FMT_MOD(extack,
"Pseudo encap match for UDP src port mask %#x conflicts with existing pseudo(MASK) entry for mask %#x",
child_udp_sport_mask,
old->child_udp_sport_mask);
return -EEXIST;
}
break;
default: /* Unrecognised pseudo-type. Just say no */
NL_SET_ERR_MSG_FMT_MOD(extack,
"%s encap match conflicts with existing pseudo(%d) entry",
encap->type ? "Pseudo" : "Direct",
old->type);
return -EEXIST;
}
/* check old and new tun_types are compatible */
if (old->tun_type != type) { if (old->tun_type != type) {
NL_SET_ERR_MSG_FMT_MOD(extack, NL_SET_ERR_MSG_FMT_MOD(extack,
"Egress encap match with conflicting tun_type %u != %u", "Egress encap match with conflicting tun_type %u != %u",
...@@ -462,10 +541,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, ...@@ -462,10 +541,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
/* existing entry found */ /* existing entry found */
encap = old; encap = old;
} else { } else {
rc = efx_mae_register_encap_match(efx, encap); if (em_type == EFX_TC_EM_DIRECT) {
if (rc) { rc = efx_mae_register_encap_match(efx, encap);
NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW"); if (rc) {
goto fail; NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
goto fail;
}
} }
refcount_set(&encap->ref, 1); refcount_set(&encap->ref, 1);
} }
...@@ -475,30 +556,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx, ...@@ -475,30 +556,12 @@ static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage, rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
efx_tc_encap_match_ht_params); efx_tc_encap_match_ht_params);
kfree(encap); kfree(encap);
fail_pseudo:
if (pseudo)
efx_tc_flower_release_encap_match(efx, pseudo);
return rc; return rc;
} }
static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
struct efx_tc_encap_match *encap)
{
int rc;
if (!refcount_dec_and_test(&encap->ref))
return; /* still in use */
rc = efx_mae_unregister_encap_match(efx, encap);
if (rc)
/* Display message but carry on and remove entry from our
* SW tables, because there's not much we can do about it.
*/
netif_err(efx, drv, efx->net_dev,
"Failed to release encap match %#x, rc %d\n",
encap->fw_id, rc);
rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
efx_tc_encap_match_ht_params);
kfree(encap);
}
static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule) static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
{ {
efx_mae_delete_rule(efx, rule->fw_id); efx_mae_delete_rule(efx, rule->fw_id);
...@@ -652,6 +715,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, ...@@ -652,6 +715,7 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
} }
rc = efx_tc_flower_record_encap_match(efx, &match, type, rc = efx_tc_flower_record_encap_match(efx, &match, type,
EFX_TC_EM_DIRECT, 0, 0,
extack); extack);
if (rc) if (rc)
goto release; goto release;
...@@ -1454,6 +1518,21 @@ static void efx_tc_encap_match_free(void *ptr, void *__unused) ...@@ -1454,6 +1518,21 @@ static void efx_tc_encap_match_free(void *ptr, void *__unused)
kfree(encap); kfree(encap);
} }
static void efx_tc_flow_free(void *ptr, void *arg)
{
struct efx_tc_flow_rule *rule = ptr;
struct efx_nic *efx = arg;
netif_err(efx, drv, efx->net_dev,
"tc rule %lx still present at teardown, removing\n",
rule->cookie);
/* Also releases entries in subsidiary tables */
efx_tc_delete_rule(efx, rule);
kfree(rule);
}
int efx_init_struct_tc(struct efx_nic *efx) int efx_init_struct_tc(struct efx_nic *efx)
{ {
int rc; int rc;
......
...@@ -74,14 +74,41 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask) ...@@ -74,14 +74,41 @@ static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask)
mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport; mask->enc_ip_ttl || mask->enc_sport || mask->enc_dport;
} }
/**
* enum efx_tc_em_pseudo_type - &struct efx_tc_encap_match pseudo type
*
* These are used to classify "pseudo" encap matches, which don't refer
* to an entry in hardware but rather indicate that a section of the
* match space is in use by another Outer Rule.
*
* @EFX_TC_EM_DIRECT: real HW entry in Outer Rule table; not a pseudo.
* Hardware index in &struct efx_tc_encap_match.fw_id is valid.
* @EFX_TC_EM_PSEUDO_MASK: registered by an encap match which includes a
* match on an optional field (currently ip_tos and/or udp_sport),
* to prevent an overlapping encap match _without_ optional fields.
* The pseudo encap match may be referenced again by an encap match
* with different values for these fields, but all masks must match the
* first (stored in our child_* fields).
*/
enum efx_tc_em_pseudo_type {
EFX_TC_EM_DIRECT,
EFX_TC_EM_PSEUDO_MASK,
};
struct efx_tc_encap_match { struct efx_tc_encap_match {
__be32 src_ip, dst_ip; __be32 src_ip, dst_ip;
struct in6_addr src_ip6, dst_ip6; struct in6_addr src_ip6, dst_ip6;
__be16 udp_dport; __be16 udp_dport;
__be16 udp_sport, udp_sport_mask;
u8 ip_tos, ip_tos_mask;
struct rhash_head linkage; struct rhash_head linkage;
enum efx_encap_type tun_type; enum efx_encap_type tun_type;
u8 child_ip_tos_mask;
__be16 child_udp_sport_mask;
refcount_t ref; refcount_t ref;
enum efx_tc_em_pseudo_type type;
u32 fw_id; /* index of this entry in firmware encap match table */ u32 fw_id; /* index of this entry in firmware encap match table */
struct efx_tc_encap_match *pseudo; /* Referenced pseudo EM if needed */
}; };
struct efx_tc_match { struct efx_tc_match {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment