Commit a5b72a08 authored by Davide Caratti's avatar Davide Caratti Committed by David S. Miller

net/sched: add delete_empty() to filters and use it in cls_flower

Revert "net/sched: cls_u32: fix refcount leak in the error path of
u32_change()", and fix the u32 refcount leak in a more generic way that
preserves the semantic of rule dumping.
On tc filters that don't support lockless insertion/removal, there is no
need to guard against concurrent insertion when a removal is in progress.
Therefore, for most of them we can avoid a full walk() when deleting, and
just decrease the refcount, like it was done on older Linux kernels.
This fixes situations where walk() was wrongly detecting a non-empty
filter, like it happened with cls_u32 in the error path of change(), thus
leading to failures in the following tdc selftests:

 6aa7: (filter, u32) Add/Replace u32 with source match and invalid indev
 6658: (filter, u32) Add/Replace u32 with custom hash table and invalid handle
 74c2: (filter, u32) Add/Replace u32 filter with invalid hash table id

On cls_flower, and on (future) lockless filters, this check is necessary:
move all the check_empty() logic in a callback so that each filter
can have its own implementation. For cls_flower, it's sufficient to check
if no IDRs have been allocated.

This reverts commit 275c44aa.

Changes since v1:
 - document the need for delete_empty() when TCF_PROTO_OPS_DOIT_UNLOCKED
   is used, thanks to Vlad Buslov
 - implement delete_empty() without doing fl_walk(), thanks to Vlad Buslov
 - squash revert and new fix in a single patch, to be nice with bisect
   tests that run tdc on u32 filter, thanks to Dave Miller

Fixes: 275c44aa ("net/sched: cls_u32: fix refcount leak in the error path of u32_change()")
Fixes: 6676d5e4 ("net: sched: set dedicated tcf_walker flag when tp is empty")
Suggested-by: default avatarJamal Hadi Salim <jhs@mojatatu.com>
Suggested-by: default avatarVlad Buslov <vladbu@mellanox.com>
Signed-off-by: default avatarDavide Caratti <dcaratti@redhat.com>
Reviewed-by: default avatarVlad Buslov <vladbu@mellanox.com>
Tested-by: default avatarJamal Hadi Salim <jhs@mojatatu.com>
Acked-by: default avatarJamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 85369750
...@@ -308,6 +308,7 @@ struct tcf_proto_ops { ...@@ -308,6 +308,7 @@ struct tcf_proto_ops {
int (*delete)(struct tcf_proto *tp, void *arg, int (*delete)(struct tcf_proto *tp, void *arg,
bool *last, bool rtnl_held, bool *last, bool rtnl_held,
struct netlink_ext_ack *); struct netlink_ext_ack *);
bool (*delete_empty)(struct tcf_proto *tp);
void (*walk)(struct tcf_proto *tp, void (*walk)(struct tcf_proto *tp,
struct tcf_walker *arg, bool rtnl_held); struct tcf_walker *arg, bool rtnl_held);
int (*reoffload)(struct tcf_proto *tp, bool add, int (*reoffload)(struct tcf_proto *tp, bool add,
...@@ -336,6 +337,10 @@ struct tcf_proto_ops { ...@@ -336,6 +337,10 @@ struct tcf_proto_ops {
int flags; int flags;
}; };
/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
* are expected to implement tcf_proto_ops->delete_empty(), otherwise race
* conditions can occur when filters are inserted/deleted simultaneously.
*/
enum tcf_proto_ops_flags { enum tcf_proto_ops_flags {
TCF_PROTO_OPS_DOIT_UNLOCKED = 1, TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
}; };
......
...@@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, ...@@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
tcf_proto_destroy(tp, rtnl_held, true, extack); tcf_proto_destroy(tp, rtnl_held, true, extack);
} }
static int walker_check_empty(struct tcf_proto *tp, void *fh, static bool tcf_proto_check_delete(struct tcf_proto *tp)
struct tcf_walker *arg)
{ {
if (fh) { if (tp->ops->delete_empty)
arg->nonempty = true; return tp->ops->delete_empty(tp);
return -1;
}
return 0;
}
static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
{
struct tcf_walker walker = { .fn = walker_check_empty, };
if (tp->ops->walk) {
tp->ops->walk(tp, &walker, rtnl_held);
return !walker.nonempty;
}
return true;
}
static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) tp->deleting = true;
{
spin_lock(&tp->lock);
if (tcf_proto_is_empty(tp, rtnl_held))
tp->deleting = true;
spin_unlock(&tp->lock);
return tp->deleting; return tp->deleting;
} }
...@@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, ...@@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
* concurrently. * concurrently.
* Mark tp for deletion if it is empty. * Mark tp for deletion if it is empty.
*/ */
if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { if (!tp_iter || !tcf_proto_check_delete(tp)) {
mutex_unlock(&chain->filter_chain_lock); mutex_unlock(&chain->filter_chain_lock);
return; return;
} }
......
...@@ -2773,6 +2773,17 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl) ...@@ -2773,6 +2773,17 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
f->res.class = cl; f->res.class = cl;
} }
static bool fl_delete_empty(struct tcf_proto *tp)
{
struct cls_fl_head *head = fl_head_dereference(tp);
spin_lock(&tp->lock);
tp->deleting = idr_is_empty(&head->handle_idr);
spin_unlock(&tp->lock);
return tp->deleting;
}
static struct tcf_proto_ops cls_fl_ops __read_mostly = { static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.kind = "flower", .kind = "flower",
.classify = fl_classify, .classify = fl_classify,
...@@ -2782,6 +2793,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { ...@@ -2782,6 +2793,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.put = fl_put, .put = fl_put,
.change = fl_change, .change = fl_change,
.delete = fl_delete, .delete = fl_delete,
.delete_empty = fl_delete_empty,
.walk = fl_walk, .walk = fl_walk,
.reoffload = fl_reoffload, .reoffload = fl_reoffload,
.hw_add = fl_hw_add, .hw_add = fl_hw_add,
......
...@@ -1108,33 +1108,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ...@@ -1108,33 +1108,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err; return err;
} }
static bool u32_hnode_empty(struct tc_u_hnode *ht, bool *non_root_ht)
{
int i;
if (!ht)
return true;
if (!ht->is_root) {
*non_root_ht = true;
return false;
}
if (*non_root_ht)
return false;
if (ht->refcnt < 2)
return true;
for (i = 0; i <= ht->divisor; i++) {
if (rtnl_dereference(ht->ht[i]))
return false;
}
return true;
}
static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
bool rtnl_held) bool rtnl_held)
{ {
struct tc_u_common *tp_c = tp->data; struct tc_u_common *tp_c = tp->data;
bool non_root_ht = false;
struct tc_u_hnode *ht; struct tc_u_hnode *ht;
struct tc_u_knode *n; struct tc_u_knode *n;
unsigned int h; unsigned int h;
...@@ -1147,8 +1124,6 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, ...@@ -1147,8 +1124,6 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
ht = rtnl_dereference(ht->next)) { ht = rtnl_dereference(ht->next)) {
if (ht->prio != tp->prio) if (ht->prio != tp->prio)
continue; continue;
if (u32_hnode_empty(ht, &non_root_ht))
return;
if (arg->count >= arg->skip) { if (arg->count >= arg->skip) {
if (arg->fn(tp, ht, arg) < 0) { if (arg->fn(tp, ht, arg) < 0) {
arg->stop = 1; arg->stop = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment