Commit 1909387f authored by Edward Cree's avatar Edward Cree Committed by David S. Miller

sfc: offload conntrack flow entries (match only) from CT zones

No handling yet for FLOW_ACTION_MANGLE (NAT or NAPT) actions.
Reviewed-by: default avatarPieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Signed-off-by: default avatarEdward Cree <ecree.xilinx@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 94aa05bd
......@@ -18,12 +18,10 @@
#define IS_ALL_ONES(v) (!(typeof (v))~(v))
#ifdef CONFIG_IPV6
static inline bool efx_ipv6_addr_all_ones(struct in6_addr *addr)
{
return !memchr_inv(addr, 0xff, sizeof(*addr));
}
#endif
struct efx_tc_encap_action; /* see tc_encap_actions.h */
......@@ -197,6 +195,7 @@ struct efx_tc_table_ct { /* TABLE_ID_CONNTRACK_TABLE */
* @encap_match_ht: Hashtable of TC encap matches
* @match_action_ht: Hashtable of TC match-action rules
* @ct_zone_ht: Hashtable of TC conntrack flowtable bindings
* @ct_ht: Hashtable of TC conntrack flow entries
* @neigh_ht: Hashtable of neighbour watches (&struct efx_neigh_binder)
* @meta_ct: MAE table layout for conntrack table
* @reps_mport_id: MAE port allocated for representor RX
......@@ -230,6 +229,7 @@ struct efx_tc_state {
struct rhashtable encap_match_ht;
struct rhashtable match_action_ht;
struct rhashtable ct_zone_ht;
struct rhashtable ct_ht;
struct rhashtable neigh_ht;
struct efx_tc_table_ct meta_ct;
u32 reps_mport_id, reps_mport_vport_id;
......
......@@ -21,6 +21,12 @@ static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
.head_offset = offsetof(struct efx_tc_ct_zone, linkage),
};
static const struct rhashtable_params efx_tc_ct_ht_params = {
.key_len = offsetof(struct efx_tc_ct_entry, linkage),
.key_offset = 0,
.head_offset = offsetof(struct efx_tc_ct_entry, linkage),
};
static void efx_tc_ct_zone_free(void *ptr, void *arg)
{
struct efx_tc_ct_zone *zone = ptr;
......@@ -34,24 +40,420 @@ static void efx_tc_ct_zone_free(void *ptr, void *arg)
kfree(zone);
}
static void efx_tc_ct_free(void *ptr, void *arg)
{
struct efx_tc_ct_entry *conn = ptr;
struct efx_nic *efx = arg;
netif_err(efx, drv, efx->net_dev,
"tc ct_entry %lx still present at teardown\n",
conn->cookie);
/* We can release the counter, but we can't remove the CT itself
* from hardware because the table meta is already gone.
*/
efx_tc_flower_release_counter(efx, conn->cnt);
kfree(conn);
}
int efx_tc_init_conntrack(struct efx_nic *efx)
{
int rc;
rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
if (rc < 0)
return rc;
goto fail_ct_zone_ht;
rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
if (rc < 0)
goto fail_ct_ht;
return 0;
fail_ct_ht:
rhashtable_destroy(&efx->tc->ct_zone_ht);
fail_ct_zone_ht:
return rc;
}
void efx_tc_fini_conntrack(struct efx_nic *efx)
{
rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
}
#define EFX_NF_TCP_FLAG(flg) cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
struct efx_tc_ct_entry *conn)
{
struct flow_dissector *dissector = fr->match.dissector;
unsigned char ipv = 0;
bool tcp = false;
if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
struct flow_match_control fm;
flow_rule_match_control(fr, &fm);
if (IS_ALL_ONES(fm.mask->addr_type))
switch (fm.key->addr_type) {
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
ipv = 4;
break;
case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
ipv = 6;
break;
default:
break;
}
}
if (!ipv) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack missing ipv specification\n");
return -EOPNOTSUPP;
}
if (dissector->used_keys &
~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
netif_dbg(efx, drv, efx->net_dev,
"Unsupported conntrack keys %#llx\n",
dissector->used_keys);
return -EOPNOTSUPP;
}
if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
struct flow_match_basic fm;
flow_rule_match_basic(fr, &fm);
if (!IS_ALL_ONES(fm.mask->n_proto)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack eth_proto is not exact-match; mask %04x\n",
ntohs(fm.mask->n_proto));
return -EOPNOTSUPP;
}
conn->eth_proto = fm.key->n_proto;
if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
: htons(ETH_P_IPV6))) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack eth_proto is not IPv%u, is %04x\n",
ipv, ntohs(conn->eth_proto));
return -EOPNOTSUPP;
}
if (!IS_ALL_ONES(fm.mask->ip_proto)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ip_proto is not exact-match; mask %02x\n",
fm.mask->ip_proto);
return -EOPNOTSUPP;
}
conn->ip_proto = fm.key->ip_proto;
switch (conn->ip_proto) {
case IPPROTO_TCP:
tcp = true;
break;
case IPPROTO_UDP:
break;
default:
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ip_proto not TCP or UDP, is %02x\n",
conn->ip_proto);
return -EOPNOTSUPP;
}
} else {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack missing eth_proto, ip_proto\n");
return -EOPNOTSUPP;
}
if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
struct flow_match_ipv4_addrs fm;
flow_rule_match_ipv4_addrs(fr, &fm);
if (!IS_ALL_ONES(fm.mask->src)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ipv4.src is not exact-match; mask %08x\n",
ntohl(fm.mask->src));
return -EOPNOTSUPP;
}
conn->src_ip = fm.key->src;
if (!IS_ALL_ONES(fm.mask->dst)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ipv4.dst is not exact-match; mask %08x\n",
ntohl(fm.mask->dst));
return -EOPNOTSUPP;
}
conn->dst_ip = fm.key->dst;
} else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
struct flow_match_ipv6_addrs fm;
flow_rule_match_ipv6_addrs(fr, &fm);
if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ipv6.src is not exact-match; mask %pI6\n",
&fm.mask->src);
return -EOPNOTSUPP;
}
conn->src_ip6 = fm.key->src;
if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ipv6.dst is not exact-match; mask %pI6\n",
&fm.mask->dst);
return -EOPNOTSUPP;
}
conn->dst_ip6 = fm.key->dst;
} else {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack missing IPv%u addrs\n", ipv);
return -EOPNOTSUPP;
}
if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
struct flow_match_ports fm;
flow_rule_match_ports(fr, &fm);
if (!IS_ALL_ONES(fm.mask->src)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ports.src is not exact-match; mask %04x\n",
ntohs(fm.mask->src));
return -EOPNOTSUPP;
}
conn->l4_sport = fm.key->src;
if (!IS_ALL_ONES(fm.mask->dst)) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack ports.dst is not exact-match; mask %04x\n",
ntohs(fm.mask->dst));
return -EOPNOTSUPP;
}
conn->l4_dport = fm.key->dst;
} else {
netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
return -EOPNOTSUPP;
}
if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
__be16 tcp_interesting_flags;
struct flow_match_tcp fm;
if (!tcp) {
netif_dbg(efx, drv, efx->net_dev,
"Conntrack matching on TCP keys but ipproto is not tcp\n");
return -EOPNOTSUPP;
}
flow_rule_match_tcp(fr, &fm);
tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
EFX_NF_TCP_FLAG(RST) |
EFX_NF_TCP_FLAG(FIN);
/* If any of the tcp_interesting_flags is set, we always
* inhibit CT lookup in LHS (so SW can update CT table).
*/
if (fm.key->flags & tcp_interesting_flags) {
netif_dbg(efx, drv, efx->net_dev,
"Unsupported conntrack tcp.flags %04x/%04x\n",
ntohs(fm.key->flags), ntohs(fm.mask->flags));
return -EOPNOTSUPP;
}
/* Other TCP flags cannot be filtered at CT */
if (fm.mask->flags & ~tcp_interesting_flags) {
netif_dbg(efx, drv, efx->net_dev,
"Unsupported conntrack tcp.flags %04x/%04x\n",
ntohs(fm.key->flags), ntohs(fm.mask->flags));
return -EOPNOTSUPP;
}
}
return 0;
}
static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
struct flow_cls_offload *tc)
{
struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
struct efx_tc_ct_entry *conn, *old;
struct efx_nic *efx = ct_zone->efx;
const struct flow_action_entry *fa;
struct efx_tc_counter *cnt;
int rc, i;
if (WARN_ON(!efx->tc))
return -ENETDOWN;
if (WARN_ON(!efx->tc->up))
return -ENETDOWN;
conn = kzalloc(sizeof(*conn), GFP_USER);
if (!conn)
return -ENOMEM;
conn->cookie = tc->cookie;
old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
&conn->linkage,
efx_tc_ct_ht_params);
if (old) {
netif_dbg(efx, drv, efx->net_dev,
"Already offloaded conntrack (cookie %lx)\n", tc->cookie);
rc = -EEXIST;
goto release;
}
/* Parse match */
conn->zone = ct_zone;
rc = efx_tc_ct_parse_match(efx, fr, conn);
if (rc)
goto release;
/* Parse actions */
flow_action_for_each(i, fa, &fr->action) {
switch (fa->id) {
case FLOW_ACTION_CT_METADATA:
conn->mark = fa->ct_metadata.mark;
if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
netif_dbg(efx, drv, efx->net_dev,
"Setting CT label not supported\n");
rc = -EOPNOTSUPP;
goto release;
}
break;
default:
netif_dbg(efx, drv, efx->net_dev,
"Unhandled action %u for conntrack\n", fa->id);
rc = -EOPNOTSUPP;
goto release;
}
}
/* fill in defaults for unmangled values */
conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
if (IS_ERR(cnt)) {
rc = PTR_ERR(cnt);
goto release;
}
conn->cnt = cnt;
rc = efx_mae_insert_ct(efx, conn);
if (rc) {
netif_dbg(efx, drv, efx->net_dev,
"Failed to insert conntrack, %d\n", rc);
goto release;
}
mutex_lock(&ct_zone->mutex);
list_add_tail(&conn->list, &ct_zone->cts);
mutex_unlock(&ct_zone->mutex);
return 0;
release:
if (conn->cnt)
efx_tc_flower_release_counter(efx, conn->cnt);
if (!old)
rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
efx_tc_ct_ht_params);
kfree(conn);
return rc;
}
/* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
{
int rc;
/* Remove it from HW */
rc = efx_mae_remove_ct(efx, conn);
/* Delete it from SW */
rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
efx_tc_ct_ht_params);
if (rc) {
netif_err(efx, drv, efx->net_dev,
"Failed to remove conntrack %lx from hw, rc %d\n",
conn->cookie, rc);
} else {
netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
conn->cookie);
}
}
static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
{
/* Remove related CT counter. This is delayed after the conn object we
* are working with has been successfully removed. This protects the
* counter from being used-after-free inside efx_tc_ct_stats.
*/
efx_tc_flower_release_counter(efx, conn->cnt);
kfree(conn);
}
static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
struct flow_cls_offload *tc)
{
struct efx_nic *efx = ct_zone->efx;
struct efx_tc_ct_entry *conn;
conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
efx_tc_ct_ht_params);
if (!conn) {
netif_warn(efx, drv, efx->net_dev,
"Conntrack %lx not found to remove\n", tc->cookie);
return -ENOENT;
}
mutex_lock(&ct_zone->mutex);
list_del(&conn->list);
efx_tc_ct_remove(efx, conn);
mutex_unlock(&ct_zone->mutex);
synchronize_rcu();
efx_tc_ct_remove_finish(efx, conn);
return 0;
}
static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
struct flow_cls_offload *tc)
{
struct efx_nic *efx = ct_zone->efx;
struct efx_tc_ct_entry *conn;
struct efx_tc_counter *cnt;
rcu_read_lock();
conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
efx_tc_ct_ht_params);
if (!conn) {
netif_warn(efx, drv, efx->net_dev,
"Conntrack %lx not found for stats\n", tc->cookie);
rcu_read_unlock();
return -ENOENT;
}
cnt = conn->cnt;
spin_lock_bh(&cnt->lock);
/* Report only last use */
flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
FLOW_ACTION_HW_STATS_DELAYED);
spin_unlock_bh(&cnt->lock);
rcu_read_unlock();
return 0;
}
static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
void *cb_priv)
{
struct flow_cls_offload *tcb = type_data;
struct efx_tc_ct_zone *ct_zone = cb_priv;
if (type != TC_SETUP_CLSFLOWER)
return -EOPNOTSUPP;
switch (tcb->command) {
case FLOW_CLS_REPLACE:
return efx_tc_ct_replace(ct_zone, tcb);
case FLOW_CLS_DESTROY:
return efx_tc_ct_destroy(ct_zone, tcb);
case FLOW_CLS_STATS:
return efx_tc_ct_stats(ct_zone, tcb);
default:
break;
};
return -EOPNOTSUPP;
}
......@@ -81,6 +483,8 @@ struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
}
ct_zone->nf_ft = ct_ft;
ct_zone->efx = efx;
INIT_LIST_HEAD(&ct_zone->cts);
mutex_init(&ct_zone->mutex);
rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
zone, rc);
......@@ -98,11 +502,22 @@ struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
void efx_tc_ct_unregister_zone(struct efx_nic *efx,
struct efx_tc_ct_zone *ct_zone)
{
struct efx_tc_ct_entry *conn, *next;
if (!refcount_dec_and_test(&ct_zone->ref))
return; /* still in use */
nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
efx_tc_ct_zone_ht_params);
mutex_lock(&ct_zone->mutex);
list_for_each_entry(conn, &ct_zone->cts, list)
efx_tc_ct_remove(efx, conn);
synchronize_rcu();
/* need to use _safe because efx_tc_ct_remove_finish() frees conn */
list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
efx_tc_ct_remove_finish(efx, conn);
mutex_unlock(&ct_zone->mutex);
mutex_destroy(&ct_zone->mutex);
netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
ct_zone->zone);
kfree(ct_zone);
......
......@@ -22,6 +22,8 @@ struct efx_tc_ct_zone {
refcount_t ref;
struct nf_flowtable *nf_ft;
struct efx_nic *efx;
struct mutex mutex; /* protects cts list */
struct list_head cts; /* list of efx_tc_ct_entry in this zone */
};
/* create/teardown hashtables */
......@@ -45,6 +47,7 @@ struct efx_tc_ct_entry {
struct efx_tc_ct_zone *zone;
u32 mark;
struct efx_tc_counter *cnt;
struct list_head list; /* entry on zone->cts */
};
#endif /* CONFIG_SFC_SRIOV */
......
......@@ -129,8 +129,8 @@ static void efx_tc_counter_work(struct work_struct *work)
/* Counter allocation */
static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
int type)
struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
int type)
{
struct efx_tc_counter *cnt;
int rc, rc2;
......@@ -169,8 +169,8 @@ static struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx
return ERR_PTR(rc > 0 ? -EIO : rc);
}
static void efx_tc_flower_release_counter(struct efx_nic *efx,
struct efx_tc_counter *cnt)
void efx_tc_flower_release_counter(struct efx_nic *efx,
struct efx_tc_counter *cnt)
{
int rc;
......
......@@ -49,6 +49,10 @@ int efx_tc_init_counters(struct efx_nic *efx);
void efx_tc_destroy_counters(struct efx_nic *efx);
void efx_tc_fini_counters(struct efx_nic *efx);
struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx,
int type);
void efx_tc_flower_release_counter(struct efx_nic *efx,
struct efx_tc_counter *cnt);
struct efx_tc_counter_index *efx_tc_flower_get_counter_index(
struct efx_nic *efx, unsigned long cookie,
enum efx_tc_counter_type type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment