Commit a2173131 authored by Oz Shlomo's avatar Oz Shlomo Committed by Saeed Mahameed

net/mlx5e: CT: manage the lifetime of the ct entry object

The ct entry object is accessed by the ct add, del, stats and restore
methods. In addition, it is referenced from several hash tables.

The lifetime of the ct entry object was not managed which triggered race
conditions as in the following kasan dump:
[ 3374.973945] ==================================================================
[ 3374.988552] BUG: KASAN: use-after-free in memcmp+0x4c/0x98
[ 3374.999590] Read of size 1 at addr ffff00036129ea55 by task ksoftirqd/1/15
[ 3375.016415] CPU: 1 PID: 15 Comm: ksoftirqd/1 Tainted: G           O      5.4.31+ #1
[ 3375.055301] Call trace:
[ 3375.060214]  dump_backtrace+0x0/0x238
[ 3375.067580]  show_stack+0x24/0x30
[ 3375.074244]  dump_stack+0xe0/0x118
[ 3375.081085]  print_address_description.isra.9+0x74/0x3d0
[ 3375.091771]  __kasan_report+0x198/0x1e8
[ 3375.099486]  kasan_report+0xc/0x18
[ 3375.106324]  __asan_load1+0x60/0x68
[ 3375.113338]  memcmp+0x4c/0x98
[ 3375.119409]  mlx5e_tc_ct_restore_flow+0x3a4/0x6f8 [mlx5_core]
[ 3375.131073]  mlx5e_rep_tc_update_skb+0x1d4/0x2f0 [mlx5_core]
[ 3375.142553]  mlx5e_handle_rx_cqe_rep+0x198/0x308 [mlx5_core]
[ 3375.154034]  mlx5e_poll_rx_cq+0x2a0/0x1060 [mlx5_core]
[ 3375.164459]  mlx5e_napi_poll+0x1d4/0xa78 [mlx5_core]
[ 3375.174453]  net_rx_action+0x28c/0x7a8
[ 3375.182004]  __do_softirq+0x1b4/0x5d0

Manage the lifetime of the ct entry object by using synchornization
mechanisms for concurrent access.

Fixes: ac991b48 ("net/mlx5e: CT: Offload established flows")
Signed-off-by: default avatarRoi Dayan <roid@nvidia.com>
Signed-off-by: default avatarOz Shlomo <ozsh@nvidia.com>
Signed-off-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent edac23c2
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <net/flow_offload.h> #include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_flow_table.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/refcount.h>
#include <linux/xarray.h> #include <linux/xarray.h>
#include "lib/fs_chains.h" #include "lib/fs_chains.h"
...@@ -51,11 +52,11 @@ struct mlx5_tc_ct_priv { ...@@ -51,11 +52,11 @@ struct mlx5_tc_ct_priv {
struct mlx5_flow_table *ct_nat; struct mlx5_flow_table *ct_nat;
struct mlx5_flow_table *post_ct; struct mlx5_flow_table *post_ct;
struct mutex control_lock; /* guards parallel adds/dels */ struct mutex control_lock; /* guards parallel adds/dels */
struct mutex shared_counter_lock;
struct mapping_ctx *zone_mapping; struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping; struct mapping_ctx *labels_mapping;
enum mlx5_flow_namespace_type ns_type; enum mlx5_flow_namespace_type ns_type;
struct mlx5_fs_chains *chains; struct mlx5_fs_chains *chains;
spinlock_t ht_lock; /* protects ft entries */
}; };
struct mlx5_ct_flow { struct mlx5_ct_flow {
...@@ -124,6 +125,10 @@ struct mlx5_ct_counter { ...@@ -124,6 +125,10 @@ struct mlx5_ct_counter {
bool is_shared; bool is_shared;
}; };
enum {
MLX5_CT_ENTRY_FLAG_VALID,
};
struct mlx5_ct_entry { struct mlx5_ct_entry {
struct rhash_head node; struct rhash_head node;
struct rhash_head tuple_node; struct rhash_head tuple_node;
...@@ -134,6 +139,12 @@ struct mlx5_ct_entry { ...@@ -134,6 +139,12 @@ struct mlx5_ct_entry {
struct mlx5_ct_tuple tuple; struct mlx5_ct_tuple tuple;
struct mlx5_ct_tuple tuple_nat; struct mlx5_ct_tuple tuple_nat;
struct mlx5_ct_zone_rule zone_rules[2]; struct mlx5_ct_zone_rule zone_rules[2];
struct mlx5_tc_ct_priv *ct_priv;
struct work_struct work;
refcount_t refcnt;
unsigned long flags;
}; };
static const struct rhashtable_params cts_ht_params = { static const struct rhashtable_params cts_ht_params = {
...@@ -740,6 +751,87 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, ...@@ -740,6 +751,87 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
return err; return err;
} }
static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
{
return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
}
static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
{
struct mlx5_ct_entry *entry;
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
tuples_ht_params);
if (entry && mlx5_tc_ct_entry_valid(entry) &&
refcount_inc_not_zero(&entry->refcnt)) {
return entry;
} else if (!entry) {
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
tuple, tuples_nat_ht_params);
if (entry && mlx5_tc_ct_entry_valid(entry) &&
refcount_inc_not_zero(&entry->refcnt))
return entry;
}
return entry ? ERR_PTR(-EINVAL) : NULL;
}
static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
{
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
tuples_ht_params);
}
static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
{
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
spin_lock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_remove_from_tuples(entry);
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_counter_put(ct_priv, entry);
kfree(entry);
}
static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
if (!refcount_dec_and_test(&entry->refcnt))
return;
mlx5_tc_ct_entry_del(entry);
}
static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
{
struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
mlx5_tc_ct_entry_del(entry);
}
static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
struct mlx5e_priv *priv;
if (!refcount_dec_and_test(&entry->refcnt))
return;
priv = netdev_priv(entry->ct_priv->netdev);
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
queue_work(priv->wq, &entry->work);
}
static struct mlx5_ct_counter * static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
{ {
...@@ -792,16 +884,26 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, ...@@ -792,16 +884,26 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
} }
/* Use the same counter as the reverse direction */ /* Use the same counter as the reverse direction */
mutex_lock(&ct_priv->shared_counter_lock); spin_lock_bh(&ct_priv->ht_lock);
rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
tuples_ht_params);
if (rev_entry) { if (IS_ERR(rev_entry)) {
if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { spin_unlock_bh(&ct_priv->ht_lock);
mutex_unlock(&ct_priv->shared_counter_lock); goto create_counter;
return rev_entry->counter;
} }
if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
shared_counter = rev_entry->counter;
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(rev_entry);
return shared_counter;
} }
mutex_unlock(&ct_priv->shared_counter_lock);
spin_unlock_bh(&ct_priv->ht_lock);
create_counter:
shared_counter = mlx5_tc_ct_counter_create(ct_priv); shared_counter = mlx5_tc_ct_counter_create(ct_priv);
if (IS_ERR(shared_counter)) { if (IS_ERR(shared_counter)) {
...@@ -866,10 +968,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, ...@@ -866,10 +968,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (!meta_action) if (!meta_action)
return -EOPNOTSUPP; return -EOPNOTSUPP;
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, spin_lock_bh(&ct_priv->ht_lock);
cts_ht_params); entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
if (entry) if (entry && refcount_inc_not_zero(&entry->refcnt)) {
return 0; spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(entry);
return -EEXIST;
}
spin_unlock_bh(&ct_priv->ht_lock);
entry = kzalloc(sizeof(*entry), GFP_KERNEL); entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry) if (!entry)
...@@ -878,6 +984,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, ...@@ -878,6 +984,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
entry->tuple.zone = ft->zone; entry->tuple.zone = ft->zone;
entry->cookie = flow->cookie; entry->cookie = flow->cookie;
entry->restore_cookie = meta_action->ct_metadata.cookie; entry->restore_cookie = meta_action->ct_metadata.cookie;
refcount_set(&entry->refcnt, 2);
entry->ct_priv = ct_priv;
err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
if (err) if (err)
...@@ -888,35 +996,40 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, ...@@ -888,35 +996,40 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (err) if (err)
goto err_set; goto err_set;
err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht, spin_lock_bh(&ct_priv->ht_lock);
err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
cts_ht_params);
if (err)
goto err_entries;
err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
&entry->tuple_node, &entry->tuple_node,
tuples_ht_params); tuples_ht_params);
if (err) if (err)
goto err_tuple; goto err_tuple;
if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht, err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node, &entry->tuple_nat_node,
tuples_nat_ht_params); tuples_nat_ht_params);
if (err) if (err)
goto err_tuple_nat; goto err_tuple_nat;
} }
spin_unlock_bh(&ct_priv->ht_lock);
err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
ft->zone_restore_id); ft->zone_restore_id);
if (err) if (err)
goto err_rules; goto err_rules;
err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
cts_ht_params); mlx5_tc_ct_entry_put(entry); /* this function reference */
if (err)
goto err_insert;
return 0; return 0;
err_insert:
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
err_rules: err_rules:
spin_lock_bh(&ct_priv->ht_lock);
if (mlx5_tc_ct_entry_has_nat(entry)) if (mlx5_tc_ct_entry_has_nat(entry))
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node, tuples_nat_ht_params); &entry->tuple_nat_node, tuples_nat_ht_params);
...@@ -925,47 +1038,43 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, ...@@ -925,47 +1038,43 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
&entry->tuple_node, &entry->tuple_node,
tuples_ht_params); tuples_ht_params);
err_tuple: err_tuple:
rhashtable_remove_fast(&ft->ct_entries_ht,
&entry->node,
cts_ht_params);
err_entries:
spin_unlock_bh(&ct_priv->ht_lock);
err_set: err_set:
kfree(entry); kfree(entry);
netdev_warn(ct_priv->netdev, if (err != -EEXIST)
"Failed to offload ct entry, err: %d\n", err); netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
return err; return err;
} }
static void
mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry)
{
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
mutex_lock(&ct_priv->shared_counter_lock);
if (mlx5_tc_ct_entry_has_nat(entry))
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
tuples_ht_params);
mutex_unlock(&ct_priv->shared_counter_lock);
mlx5_tc_ct_counter_put(ct_priv, entry);
}
static int static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
struct flow_cls_offload *flow) struct flow_cls_offload *flow)
{ {
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = flow->cookie; unsigned long cookie = flow->cookie;
struct mlx5_ct_entry *entry; struct mlx5_ct_entry *entry;
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, spin_lock_bh(&ct_priv->ht_lock);
cts_ht_params); entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
if (!entry) if (!entry) {
spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT; return -ENOENT;
}
mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry); if (!mlx5_tc_ct_entry_valid(entry)) {
WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, spin_unlock_bh(&ct_priv->ht_lock);
&entry->node, return -EINVAL;
cts_ht_params)); }
kfree(entry);
rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
mlx5_tc_ct_entry_remove_from_tuples(entry);
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(entry);
return 0; return 0;
} }
...@@ -974,19 +1083,30 @@ static int ...@@ -974,19 +1083,30 @@ static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
struct flow_cls_offload *f) struct flow_cls_offload *f)
{ {
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = f->cookie; unsigned long cookie = f->cookie;
struct mlx5_ct_entry *entry; struct mlx5_ct_entry *entry;
u64 lastuse, packets, bytes; u64 lastuse, packets, bytes;
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, spin_lock_bh(&ct_priv->ht_lock);
cts_ht_params); entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
if (!entry) if (!entry) {
spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT; return -ENOENT;
}
if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
spin_unlock_bh(&ct_priv->ht_lock);
return -EINVAL;
}
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
flow_stats_update(&f->stats, bytes, packets, 0, lastuse, flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
FLOW_ACTION_HW_STATS_DELAYED); FLOW_ACTION_HW_STATS_DELAYED);
mlx5_tc_ct_entry_put(entry);
return 0; return 0;
} }
...@@ -1478,11 +1598,9 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, ...@@ -1478,11 +1598,9 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
static void static void
mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
{ {
struct mlx5_tc_ct_priv *ct_priv = arg;
struct mlx5_ct_entry *entry = ptr; struct mlx5_ct_entry *entry = ptr;
mlx5_tc_ct_del_ft_entry(ct_priv, entry); mlx5_tc_ct_entry_put(entry);
kfree(entry);
} }
static void static void
...@@ -1960,6 +2078,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, ...@@ -1960,6 +2078,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_mapping_labels; goto err_mapping_labels;
} }
spin_lock_init(&ct_priv->ht_lock);
ct_priv->ns_type = ns_type; ct_priv->ns_type = ns_type;
ct_priv->chains = chains; ct_priv->chains = chains;
ct_priv->netdev = priv->netdev; ct_priv->netdev = priv->netdev;
...@@ -1994,7 +2113,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, ...@@ -1994,7 +2113,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
idr_init(&ct_priv->fte_ids); idr_init(&ct_priv->fte_ids);
mutex_init(&ct_priv->control_lock); mutex_init(&ct_priv->control_lock);
mutex_init(&ct_priv->shared_counter_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
...@@ -2037,7 +2155,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) ...@@ -2037,7 +2155,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht); rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock); mutex_destroy(&ct_priv->control_lock);
mutex_destroy(&ct_priv->shared_counter_lock);
idr_destroy(&ct_priv->fte_ids); idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv); kfree(ct_priv);
} }
...@@ -2059,14 +2176,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, ...@@ -2059,14 +2176,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
return false; return false;
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, spin_lock(&ct_priv->ht_lock);
tuples_ht_params);
if (!entry) entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, if (!entry) {
&tuple, tuples_nat_ht_params); spin_unlock(&ct_priv->ht_lock);
if (!entry)
return false; return false;
}
if (IS_ERR(entry)) {
spin_unlock(&ct_priv->ht_lock);
return false;
}
spin_unlock(&ct_priv->ht_lock);
tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
__mlx5_tc_ct_entry_put(entry);
return true; return true;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment